Sync to pluginize

This commit is contained in:
John Schember 2009-04-11 07:39:12 -04:00
commit 1edbd88e73
76 changed files with 24321 additions and 11312 deletions

View File

@ -14,7 +14,20 @@ IMAGEMAGICK_DIR = 'C:\\ImageMagick'
FONTCONFIG_DIR = 'C:\\fontconfig' FONTCONFIG_DIR = 'C:\\fontconfig'
VC90 = r'C:\VC90.CRT' VC90 = r'C:\VC90.CRT'
import sys, os, py2exe, shutil, zipfile, glob, re # ModuleFinder can't handle runtime changes to __path__, but win32com uses them
import sys
import py2exe.mf as modulefinder
import win32com
for p in win32com.__path__[1:]:
modulefinder.AddPackagePath("win32com", p)
for extra in ["win32com.shell"]: #,"win32com.mapi"
__import__(extra)
m = sys.modules[extra]
for p in m.__path__[1:]:
modulefinder.AddPackagePath(extra, p)
import os, py2exe, shutil, zipfile, glob, re
from distutils.core import setup from distutils.core import setup
BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
sys.path.insert(0, BASE_DIR) sys.path.insert(0, BASE_DIR)

View File

@ -2,7 +2,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
__appname__ = 'calibre' __appname__ = 'calibre'
__version__ = '0.5.5' __version__ = '0.5.6'
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>" __author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
''' '''
Various run time constants. Various run time constants.

View File

@ -18,7 +18,7 @@ every time you add an HTML file to the library.\
file_types = set(['html', 'htm', 'xhtml', 'xhtm']) file_types = set(['html', 'htm', 'xhtml', 'xhtm'])
supported_platforms = ['windows', 'osx', 'linux'] supported_platforms = ['windows', 'osx', 'linux']
on_import = True on_import = True
def run(self, htmlfile): def run(self, htmlfile):
of = self.temporary_file('_plugin_html2zip.zip') of = self.temporary_file('_plugin_html2zip.zip')
from calibre.ebooks.html import gui_main as html2oeb from calibre.ebooks.html import gui_main as html2oeb
@ -26,172 +26,173 @@ every time you add an HTML file to the library.\
return of.name return of.name
class OPFMetadataReader(MetadataReaderPlugin): class OPFMetadataReader(MetadataReaderPlugin):
name = 'Read OPF metadata' name = 'Read OPF metadata'
file_types = set(['opf']) file_types = set(['opf'])
description = _('Read metadata from %s files')%'OPF' description = _('Read metadata from %s files')%'OPF'
def get_metadata(self, stream, ftype): def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.opf2 import OPF from calibre.ebooks.metadata.opf2 import OPF
from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata import MetaInformation
return MetaInformation(OPF(stream, os.getcwd())) return MetaInformation(OPF(stream, os.getcwd()))
class RTFMetadataReader(MetadataReaderPlugin): class RTFMetadataReader(MetadataReaderPlugin):
name = 'Read RTF metadata' name = 'Read RTF metadata'
file_types = set(['rtf']) file_types = set(['rtf'])
description = _('Read metadata from %s files')%'RTF' description = _('Read metadata from %s files')%'RTF'
def get_metadata(self, stream, ftype): def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.rtf import get_metadata from calibre.ebooks.metadata.rtf import get_metadata
return get_metadata(stream) return get_metadata(stream)
class FB2MetadataReader(MetadataReaderPlugin): class FB2MetadataReader(MetadataReaderPlugin):
name = 'Read FB2 metadata' name = 'Read FB2 metadata'
file_types = set(['fb2']) file_types = set(['fb2'])
description = _('Read metadata from %s files')%'FB2' description = _('Read metadata from %s files')%'FB2'
def get_metadata(self, stream, ftype): def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.fb2 import get_metadata from calibre.ebooks.metadata.fb2 import get_metadata
return get_metadata(stream) return get_metadata(stream)
class LRFMetadataReader(MetadataReaderPlugin): class LRFMetadataReader(MetadataReaderPlugin):
name = 'Read LRF metadata' name = 'Read LRF metadata'
file_types = set(['lrf']) file_types = set(['lrf'])
description = _('Read metadata from %s files')%'LRF' description = _('Read metadata from %s files')%'LRF'
def get_metadata(self, stream, ftype): def get_metadata(self, stream, ftype):
from calibre.ebooks.lrf.meta import get_metadata from calibre.ebooks.lrf.meta import get_metadata
return get_metadata(stream) return get_metadata(stream)
class PDFMetadataReader(MetadataReaderPlugin): class PDFMetadataReader(MetadataReaderPlugin):
name = 'Read PDF metadata' name = 'Read PDF metadata'
file_types = set(['pdf']) file_types = set(['pdf'])
description = _('Read metadata from %s files')%'PDF' description = _('Read metadata from %s files')%'PDF'
def get_metadata(self, stream, ftype): def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.pdf import get_metadata from calibre.ebooks.metadata.pdf import get_metadata
return get_metadata(stream) return get_metadata(stream)
class LITMetadataReader(MetadataReaderPlugin): class LITMetadataReader(MetadataReaderPlugin):
name = 'Read LIT metadata' name = 'Read LIT metadata'
file_types = set(['lit']) file_types = set(['lit'])
description = _('Read metadata from %s files')%'LIT' description = _('Read metadata from %s files')%'LIT'
def get_metadata(self, stream, ftype): def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.lit import get_metadata from calibre.ebooks.metadata.lit import get_metadata
return get_metadata(stream) return get_metadata(stream)
class IMPMetadataReader(MetadataReaderPlugin): class IMPMetadataReader(MetadataReaderPlugin):
name = 'Read IMP metadata' name = 'Read IMP metadata'
file_types = set(['imp']) file_types = set(['imp'])
description = _('Read metadata from %s files')%'IMP' description = _('Read metadata from %s files')%'IMP'
author = 'Ashish Kulkarni' author = 'Ashish Kulkarni'
def get_metadata(self, stream, ftype): def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.imp import get_metadata from calibre.ebooks.metadata.imp import get_metadata
return get_metadata(stream) return get_metadata(stream)
class RBMetadataReader(MetadataReaderPlugin): class RBMetadataReader(MetadataReaderPlugin):
name = 'Read RB metadata' name = 'Read RB metadata'
file_types = set(['rb']) file_types = set(['rb'])
description = _('Read metadata from %s files')%'RB' description = _('Read metadata from %s files')%'RB'
author = 'Ashish Kulkarni' author = 'Ashish Kulkarni'
def get_metadata(self, stream, ftype): def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.rb import get_metadata from calibre.ebooks.metadata.rb import get_metadata
return get_metadata(stream) return get_metadata(stream)
class EPUBMetadataReader(MetadataReaderPlugin): class EPUBMetadataReader(MetadataReaderPlugin):
name = 'Read EPUB metadata' name = 'Read EPUB metadata'
file_types = set(['epub']) file_types = set(['epub'])
description = _('Read metadata from %s files')%'EPUB' description = _('Read metadata from %s files')%'EPUB'
def get_metadata(self, stream, ftype): def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.epub import get_metadata from calibre.ebooks.metadata.epub import get_metadata
return get_metadata(stream) return get_metadata(stream)
class HTMLMetadataReader(MetadataReaderPlugin): class HTMLMetadataReader(MetadataReaderPlugin):
name = 'Read HTML metadata' name = 'Read HTML metadata'
file_types = set(['html']) file_types = set(['html'])
description = _('Read metadata from %s files')%'HTML' description = _('Read metadata from %s files')%'HTML'
def get_metadata(self, stream, ftype): def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.html import get_metadata from calibre.ebooks.metadata.html import get_metadata
return get_metadata(stream) return get_metadata(stream)
class MOBIMetadataReader(MetadataReaderPlugin): class MOBIMetadataReader(MetadataReaderPlugin):
name = 'Read MOBI metadata' name = 'Read MOBI metadata'
file_types = set(['mobi', 'prc', 'azw']) file_types = set(['mobi', 'prc', 'azw'])
description = _('Read metadata from %s files')%'MOBI' description = _('Read metadata from %s files')%'MOBI'
def get_metadata(self, stream, ftype): def get_metadata(self, stream, ftype):
from calibre.ebooks.mobi.reader import get_metadata from calibre.ebooks.mobi.reader import get_metadata
return get_metadata(stream) return get_metadata(stream)
class TOPAZMetadataReader(MetadataReaderPlugin): class TOPAZMetadataReader(MetadataReaderPlugin):
name = 'Read Topaz metadata' name = 'Read Topaz metadata'
file_types = set(['tpz', 'azw1']) file_types = set(['tpz', 'azw1'])
description = _('Read metadata from %s files')%'MOBI' description = _('Read metadata from %s files')%'MOBI'
def get_metadata(self, stream, ftype): def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.topaz import get_metadata from calibre.ebooks.metadata.topaz import get_metadata
return get_metadata(stream) return get_metadata(stream)
class ODTMetadataReader(MetadataReaderPlugin): class ODTMetadataReader(MetadataReaderPlugin):
name = 'Read ODT metadata' name = 'Read ODT metadata'
file_types = set(['odt']) file_types = set(['odt'])
description = _('Read metadata from %s files')%'ODT' description = _('Read metadata from %s files')%'ODT'
def get_metadata(self, stream, ftype): def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.odt import get_metadata from calibre.ebooks.metadata.odt import get_metadata
return get_metadata(stream) return get_metadata(stream)
class TXTMetadataReader(MetadataReaderPlugin): class TXTMetadataReader(MetadataReaderPlugin):
name = 'Read TXT metadata' name = 'Read TXT metadata'
file_types = set(['txt']) file_types = set(['txt'])
description = _('Read metadata from %s files') % 'TXT' description = _('Read metadata from %s files') % 'TXT'
def get_metadata(self, stream, ftype): def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.txt import get_metadata from calibre.ebooks.metadata.txt import get_metadata
return get_metadata(stream) return get_metadata(stream)
class LRXMetadataReader(MetadataReaderPlugin): class LRXMetadataReader(MetadataReaderPlugin):
name = 'Read LRX metadata' name = 'Read LRX metadata'
file_types = set(['lrx']) file_types = set(['lrx'])
description = _('Read metadata from %s files')%'LRX' description = _('Read metadata from %s files')%'LRX'
def get_metadata(self, stream, ftype): def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.lrx import get_metadata from calibre.ebooks.metadata.lrx import get_metadata
return get_metadata(stream) return get_metadata(stream)
class ComicMetadataReader(MetadataReaderPlugin): class ComicMetadataReader(MetadataReaderPlugin):
name = 'Read comic metadata' name = 'Read comic metadata'
file_types = set(['cbr', 'cbz']) file_types = set(['cbr', 'cbz'])
description = _('Extract cover from comic files') description = _('Extract cover from comic files')
def get_metadata(self, stream, ftype): def get_metadata(self, stream, ftype):
if ftype == 'cbr': if ftype == 'cbr':
from calibre.libunrar import extract_member as extract_first from calibre.libunrar import extract_member as extract_first
extract_first
else: else:
from calibre.libunzip import extract_member as extract_first from calibre.libunzip import extract_member as extract_first
from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata import MetaInformation
ret = extract_first(stream) ret = extract_first(stream)
mi = MetaInformation(None, None) mi = MetaInformation(None, None)
if ret is not None: if ret is not None:
@ -199,65 +200,65 @@ class ComicMetadataReader(MetadataReaderPlugin):
ext = os.path.splitext(path)[1][1:] ext = os.path.splitext(path)[1][1:]
mi.cover_data = (ext.lower(), data) mi.cover_data = (ext.lower(), data)
return mi return mi
class ZipMetadataReader(MetadataReaderPlugin): class ZipMetadataReader(MetadataReaderPlugin):
name = 'Read ZIP metadata' name = 'Read ZIP metadata'
file_types = set(['zip', 'oebzip']) file_types = set(['zip', 'oebzip'])
description = _('Read metadata from ebooks in ZIP archives') description = _('Read metadata from ebooks in ZIP archives')
def get_metadata(self, stream, ftype): def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.zip import get_metadata from calibre.ebooks.metadata.zip import get_metadata
return get_metadata(stream) return get_metadata(stream)
class RARMetadataReader(MetadataReaderPlugin): class RARMetadataReader(MetadataReaderPlugin):
name = 'Read RAR metadata' name = 'Read RAR metadata'
file_types = set(['rar']) file_types = set(['rar'])
description = _('Read metadata from ebooks in RAR archives') description = _('Read metadata from ebooks in RAR archives')
def get_metadata(self, stream, ftype): def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.rar import get_metadata from calibre.ebooks.metadata.rar import get_metadata
return get_metadata(stream) return get_metadata(stream)
class EPUBMetadataWriter(MetadataWriterPlugin): class EPUBMetadataWriter(MetadataWriterPlugin):
name = 'Set EPUB metadata' name = 'Set EPUB metadata'
file_types = set(['epub']) file_types = set(['epub'])
description = _('Set metadata in %s files')%'EPUB' description = _('Set metadata in %s files')%'EPUB'
def set_metadata(self, stream, mi, type): def set_metadata(self, stream, mi, type):
from calibre.ebooks.metadata.epub import set_metadata from calibre.ebooks.metadata.epub import set_metadata
set_metadata(stream, mi) set_metadata(stream, mi)
class LRFMetadataWriter(MetadataWriterPlugin): class LRFMetadataWriter(MetadataWriterPlugin):
name = 'Set LRF metadata' name = 'Set LRF metadata'
file_types = set(['lrf']) file_types = set(['lrf'])
description = _('Set metadata in %s files')%'LRF' description = _('Set metadata in %s files')%'LRF'
def set_metadata(self, stream, mi, type): def set_metadata(self, stream, mi, type):
from calibre.ebooks.lrf.meta import set_metadata from calibre.ebooks.lrf.meta import set_metadata
set_metadata(stream, mi) set_metadata(stream, mi)
class RTFMetadataWriter(MetadataWriterPlugin): class RTFMetadataWriter(MetadataWriterPlugin):
name = 'Set RTF metadata' name = 'Set RTF metadata'
file_types = set(['rtf']) file_types = set(['rtf'])
description = _('Set metadata in %s files')%'RTF' description = _('Set metadata in %s files')%'RTF'
def set_metadata(self, stream, mi, type): def set_metadata(self, stream, mi, type):
from calibre.ebooks.metadata.rtf import set_metadata from calibre.ebooks.metadata.rtf import set_metadata
set_metadata(stream, mi) set_metadata(stream, mi)
class MOBIMetadataWriter(MetadataWriterPlugin): class MOBIMetadataWriter(MetadataWriterPlugin):
name = 'Set MOBI metadata' name = 'Set MOBI metadata'
file_types = set(['mobi', 'prc', 'azw']) file_types = set(['mobi', 'prc', 'azw'])
description = _('Set metadata in %s files')%'MOBI' description = _('Set metadata in %s files')%'MOBI'
author = 'Marshall T. Vandegrift' author = 'Marshall T. Vandegrift'
def set_metadata(self, stream, mi, type): def set_metadata(self, stream, mi, type):
from calibre.ebooks.metadata.mobi import set_metadata from calibre.ebooks.metadata.mobi import set_metadata
set_metadata(stream, mi) set_metadata(stream, mi)
@ -267,14 +268,16 @@ from calibre.ebooks.epub.input import EPUBInput
from calibre.ebooks.mobi.input import MOBIInput from calibre.ebooks.mobi.input import MOBIInput
from calibre.ebooks.pdf.input import PDFInput from calibre.ebooks.pdf.input import PDFInput
from calibre.ebooks.txt.input import TXTInput from calibre.ebooks.txt.input import TXTInput
from calibre.ebooks.html.input import HTMLInput
from calibre.ebooks.oeb.output import OEBOutput from calibre.ebooks.oeb.output import OEBOutput
from calibre.ebooks.txt.output import TXTOutput from calibre.ebooks.txt.output import TXTOutput
from calibre.ebooks.pdf.output import PDFOutput from calibre.ebooks.pdf.output import PDFOutput
from calibre.customize.profiles import input_profiles, output_profiles from calibre.customize.profiles import input_profiles, output_profiles
plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDFInput, TXTInput, OEBOutput, TXTOutput, PDFOutput] plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDFInput, HTMLInput,
TXTInput, OEBOutput, TXTOutput, PDFOutput]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \ plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
x.__name__.endswith('MetadataReader')] x.__name__.endswith('MetadataReader')]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \ plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
x.__name__.endswith('MetadataWriter')] x.__name__.endswith('MetadataWriter')]
plugins += input_profiles + output_profiles plugins += input_profiles + output_profiles

View File

@ -163,9 +163,9 @@ class InputFormatPlugin(Plugin):
for x in os.listdir('.'): for x in os.listdir('.'):
shutil.rmtree(x) if os.path.isdir(x) else os.remove(x) shutil.rmtree(x) if os.path.isdir(x) else os.remove(x)
ret = self.convert(stream, options, file_ext, ret = self.convert(stream, options, file_ext,
log, accelerators) log, accelerators)
if options.debug_input is not None: if options.debug_input is not None:
options.debug_input = os.path.abspath(options.debug_input) options.debug_input = os.path.abspath(options.debug_input)
if not os.path.exists(options.debug_input): if not os.path.exists(options.debug_input):

View File

@ -13,12 +13,14 @@ def devices():
from calibre.devices.kindle.driver import KINDLE from calibre.devices.kindle.driver import KINDLE
from calibre.devices.kindle.driver import KINDLE2 from calibre.devices.kindle.driver import KINDLE2
from calibre.devices.blackberry.driver import BLACKBERRY from calibre.devices.blackberry.driver import BLACKBERRY
return (PRS500, PRS505, PRS700, CYBOOKG3, KINDLE, KINDLE2, BLACKBERRY) from calibre.devices.eb600.driver import EB600
return (PRS500, PRS505, PRS700, CYBOOKG3, KINDLE, KINDLE2,
BLACKBERRY, EB600)
import time import time
DAY_MAP = dict(Sun=0, Mon=1, Tue=2, Wed=3, Thu=4, Fri=5, Sat=6) DAY_MAP = dict(Sun=0, Mon=1, Tue=2, Wed=3, Thu=4, Fri=5, Sat=6)
MONTH_MAP = dict(Jan=1, Feb=2, Mar=3, Apr=4, May=5, Jun=6, Jul=7, Aug=8, Sep=9, Oct=10, Nov=11, Dec=12) MONTH_MAP = dict(Jan=1, Feb=2, Mar=3, Apr=4, May=5, Jun=6, Jul=7, Aug=8, Sep=9, Oct=10, Nov=11, Dec=12)
INVERSE_DAY_MAP = dict(zip(DAY_MAP.values(), DAY_MAP.keys())) INVERSE_DAY_MAP = dict(zip(DAY_MAP.values(), DAY_MAP.keys()))
INVERSE_MONTH_MAP = dict(zip(MONTH_MAP.values(), MONTH_MAP.keys())) INVERSE_MONTH_MAP = dict(zip(MONTH_MAP.values(), MONTH_MAP.keys()))

View File

@ -11,37 +11,36 @@ from calibre.ebooks.metadata import authors_to_string
from calibre.devices.errors import FreeSpaceError from calibre.devices.errors import FreeSpaceError
from calibre.devices.usbms.driver import USBMS from calibre.devices.usbms.driver import USBMS
import calibre.devices.cybookg3.t2b as t2b import calibre.devices.cybookg3.t2b as t2b
from calibre.devices.errors import FreeSpaceError
class CYBOOKG3(USBMS): class CYBOOKG3(USBMS):
# Ordered list of supported formats # Ordered list of supported formats
# Be sure these have an entry in calibre.devices.mime # Be sure these have an entry in calibre.devices.mime
FORMATS = ['mobi', 'prc', 'html', 'pdf', 'rtf', 'txt'] FORMATS = ['mobi', 'prc', 'html', 'pdf', 'rtf', 'txt']
VENDOR_ID = [0x0bda, 0x3034] VENDOR_ID = [0x0bda, 0x3034]
PRODUCT_ID = [0x0703, 0x1795] PRODUCT_ID = [0x0703, 0x1795]
BCD = [0x110, 0x132] BCD = [0x110, 0x132]
VENDOR_NAME = 'BOOKEEN' VENDOR_NAME = 'BOOKEEN'
WINDOWS_MAIN_MEM = 'CYBOOK_GEN3__-FD' WINDOWS_MAIN_MEM = 'CYBOOK_GEN3__-FD'
WINDOWS_CARD_MEM = 'CYBOOK_GEN3__-SD' WINDOWS_CARD_MEM = 'CYBOOK_GEN3__-SD'
OSX_MAIN_MEM = 'Bookeen Cybook Gen3 -FD Media' OSX_MAIN_MEM = 'Bookeen Cybook Gen3 -FD Media'
OSX_CARD_MEM = 'Bookeen Cybook Gen3 -SD Media' OSX_CARD_MEM = 'Bookeen Cybook Gen3 -SD Media'
MAIN_MEMORY_VOLUME_LABEL = 'Cybook Gen 3 Main Memory' MAIN_MEMORY_VOLUME_LABEL = 'Cybook Gen 3 Main Memory'
STORAGE_CARD_VOLUME_LABEL = 'Cybook Gen 3 Storage Card' STORAGE_CARD_VOLUME_LABEL = 'Cybook Gen 3 Storage Card'
EBOOK_DIR_MAIN = "eBooks" EBOOK_DIR_MAIN = "eBooks"
EBOOK_DIR_CARD = "eBooks" EBOOK_DIR_CARD = "eBooks"
THUMBNAIL_HEIGHT = 144 THUMBNAIL_HEIGHT = 144
SUPPORTS_SUB_DIRS = True SUPPORTS_SUB_DIRS = True
def upload_books(self, files, names, on_card=False, end_session=True, def upload_books(self, files, names, on_card=False, end_session=True,
metadata=None): metadata=None):
if on_card and not self._card_prefix: if on_card and not self._card_prefix:
raise ValueError(_('The reader has no storage card connected.')) raise ValueError(_('The reader has no storage card connected.'))
if not on_card: if not on_card:
path = os.path.join(self._main_prefix, self.EBOOK_DIR_MAIN) path = os.path.join(self._main_prefix, self.EBOOK_DIR_MAIN)
else: else:
@ -66,7 +65,7 @@ class CYBOOKG3(USBMS):
paths = [] paths = []
names = iter(names) names = iter(names)
metadata = iter(metadata) metadata = iter(metadata)
for infile in files: for infile in files:
newpath = path newpath = path
mdata = metadata.next() mdata = metadata.next()
@ -83,20 +82,20 @@ class CYBOOKG3(USBMS):
newpath += tag newpath += tag
newpath = os.path.normpath(newpath) newpath = os.path.normpath(newpath)
break break
if newpath == path: if newpath == path:
newpath = os.path.join(newpath, authors_to_string(mdata.get('authors', ''))) newpath = os.path.join(newpath, authors_to_string(mdata.get('authors', '')))
newpath = os.path.join(newpath, mdata.get('title', '')) newpath = os.path.join(newpath, mdata.get('title', ''))
if not os.path.exists(newpath): if not os.path.exists(newpath):
os.makedirs(newpath) os.makedirs(newpath)
filepath = os.path.join(newpath, names.next()) filepath = os.path.join(newpath, names.next())
paths.append(filepath) paths.append(filepath)
if hasattr(infile, 'read'): if hasattr(infile, 'read'):
infile.seek(0) infile.seek(0)
dest = open(filepath, 'wb') dest = open(filepath, 'wb')
shutil.copyfileobj(infile, dest, 10*1024*1024) shutil.copyfileobj(infile, dest, 10*1024*1024)
@ -104,35 +103,35 @@ class CYBOOKG3(USBMS):
dest.close() dest.close()
else: else:
shutil.copy2(infile, filepath) shutil.copy2(infile, filepath)
coverdata = None coverdata = None
if 'cover' in mdata.keys(): if 'cover' in mdata.keys():
if mdata['cover'] != None: if mdata['cover'] != None:
coverdata = mdata['cover'][2] coverdata = mdata['cover'][2]
t2bfile = open('%s_6090.t2b' % (os.path.splitext(filepath)[0]), 'wb') t2bfile = open('%s_6090.t2b' % (os.path.splitext(filepath)[0]), 'wb')
t2b.write_t2b(t2bfile, coverdata) t2b.write_t2b(t2bfile, coverdata)
t2bfile.close() t2bfile.close()
return zip(paths, cycle([on_card])) return zip(paths, cycle([on_card]))
def delete_books(self, paths, end_session=True): def delete_books(self, paths, end_session=True):
for path in paths: for path in paths:
if os.path.exists(path): if os.path.exists(path):
os.unlink(path) os.unlink(path)
filepath, ext = os.path.splitext(path) filepath, ext = os.path.splitext(path)
# Delete the ebook auxiliary files # Delete the ebook auxiliary file
if os.path.exists(filepath + '.mbp'): if os.path.exists(filepath + '.mbp'):
os.unlink(filepath + '.mbp') os.unlink(filepath + '.mbp')
if os.path.exists(filepath + '.dat'): if os.path.exists(filepath + '.dat'):
os.unlink(filepath + '.dat') os.unlink(filepath + '.dat')
# Delete the thumbnails file auto generated for the ebook # Delete the thumbnails file auto generated for the ebook
if os.path.exists(filepath + '_6090.t2b'): if os.path.exists(filepath + '_6090.t2b'):
os.unlink(filepath + '_6090.t2b') os.unlink(filepath + '_6090.t2b')
try: try:
os.removedirs(os.path.dirname(path)) os.removedirs(os.path.dirname(path))
except: except:

View File

@ -0,0 +1,2 @@
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'

View File

@ -0,0 +1,41 @@
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
'''
Device driver for the Netronix EB600
'''
from calibre.devices.usbms.driver import USBMS
class EB600(USBMS):
# Ordered list of supported formats
FORMATS = ['epub', 'prc', 'chm', 'djvu', 'html', 'rtf', 'txt', 'pdf']
DRM_FORMATS = ['prc', 'mobi', 'html', 'pdf', 'txt']
VENDOR_ID = [0x1f85]
PRODUCT_ID = [0x1688]
BCD = [0x110]
VENDOR_NAME = 'NETRONIX'
WINDOWS_MAIN_MEM = 'EBOOK'
WINDOWS_CARD_MEM = 'EBOOK'
OSX_MAIN_MEM = 'EB600 Internal Storage Media'
OSX_CARD_MEM = 'EB600 Card Storage Media'
MAIN_MEMORY_VOLUME_LABEL = 'EB600 Main Memory'
STORAGE_CARD_VOLUME_LABEL = 'EB600 Storage Card'
EBOOK_DIR_MAIN = ''
EBOOK_DIR_CARD = ''
SUPPORTS_SUB_DIRS = True
def windows_sort_drives(self, drives):
main = drives['main']
card = drives['card']
if card and main and card < main:
drives['main'] = card
drives['card'] = main
return drives

View File

@ -174,6 +174,14 @@ class Device(_Device):
return prefix return prefix
def windows_sort_drives(self, drives):
'''
Called to disambiguate main memory and storage card for devices that
do not distinguish between them on the basis of `WINDOWS_CARD_NAME`.
For e.g.: The EB600
'''
return drives
def open_windows(self): def open_windows(self):
time.sleep(6) time.sleep(6)
drives = {} drives = {}
@ -188,11 +196,14 @@ class Device(_Device):
if 'main' in drives.keys() and 'card' in drives.keys(): if 'main' in drives.keys() and 'card' in drives.keys():
break break
drives = self.windows_sort_drives(drives)
self._main_prefix = drives.get('main') self._main_prefix = drives.get('main')
self._card_prefix = drives.get('card') self._card_prefix = drives.get('card')
if not self._main_prefix: if not self._main_prefix:
raise DeviceError(_('Unable to detect the %s disk drive. Try rebooting.') % self.__class__.__name__) raise DeviceError(
_('Unable to detect the %s disk drive. Try rebooting.') %
self.__class__.__name__)
def get_osx_mountpoints(self, raw=None): def get_osx_mountpoints(self, raw=None):
if raw is None: if raw is None:

View File

@ -36,7 +36,7 @@ import os, sys, cStringIO, logging, re, functools, shutil
from lxml.etree import XPath from lxml.etree import XPath
from lxml import html, etree from lxml import html, etree
from PyQt4.Qt import QApplication, QPixmap from PyQt4.Qt import QApplication, QPixmap, Qt
from calibre.ebooks.html_old import Processor, merge_metadata, get_filelist,\ from calibre.ebooks.html_old import Processor, merge_metadata, get_filelist,\
opf_traverse, create_metadata, rebase_toc, Link, parser opf_traverse, create_metadata, rebase_toc, Link, parser
@ -50,7 +50,7 @@ from calibre.ebooks.epub.pages import add_page_map
from calibre.ebooks.epub.fonts import Rationalizer from calibre.ebooks.epub.fonts import Rationalizer
from calibre.constants import preferred_encoding from calibre.constants import preferred_encoding
from calibre.customize.ui import run_plugins_on_postprocess from calibre.customize.ui import run_plugins_on_postprocess
from calibre import walk, CurrentDir, to_unicode from calibre import walk, CurrentDir, to_unicode, fit_image
content = functools.partial(os.path.join, u'content') content = functools.partial(os.path.join, u'content')
@ -112,6 +112,31 @@ def find_html_index(files):
return f, os.path.splitext(f)[1].lower()[1:] return f, os.path.splitext(f)[1].lower()[1:]
return html_files[-1], os.path.splitext(html_files[-1])[1].lower()[1:] return html_files[-1], os.path.splitext(html_files[-1])[1].lower()[1:]
def rescale_images(imgdir, screen_size, log):
pwidth, pheight = screen_size
if QApplication.instance() is None:
QApplication([])
for f in os.listdir(imgdir):
path = os.path.join(imgdir, f)
if os.path.splitext(f)[1] in ('.css', '.js'):
continue
p = QPixmap()
p.load(path)
if p.isNull():
continue
width, height = p.width(), p.height()
scaled, new_width, new_height = fit_image(width, height, pwidth,
pheight)
if scaled:
log.info('Rescaling image: '+f)
p.scaled(new_width, new_height, Qt.IgnoreAspectRatio,
Qt.SmoothTransformation).save(path, 'JPEG')
class HTMLProcessor(Processor, Rationalizer): class HTMLProcessor(Processor, Rationalizer):
def __init__(self, htmlfile, opts, tdir, resource_map, htmlfiles, stylesheets): def __init__(self, htmlfile, opts, tdir, resource_map, htmlfiles, stylesheets):
@ -482,6 +507,10 @@ def convert(htmlfile, opts, notification=None, create_epub=True,
if os.stat(ncx_path).st_size > opts.profile.flow_size: if os.stat(ncx_path).st_size > opts.profile.flow_size:
logger.warn('NCX still larger than allowed size at %d bytes. Menu based Table of Contents may not work on device.'%os.stat(ncx_path).st_size) logger.warn('NCX still larger than allowed size at %d bytes. Menu based Table of Contents may not work on device.'%os.stat(ncx_path).st_size)
if opts.profile.screen_size is not None:
rescale_images(os.path.join(tdir, 'content', 'resources'),
opts.profile.screen_size, logger)
if create_epub: if create_epub:
epub = initialize_container(opts.output) epub = initialize_container(opts.output)
epub.add_dir(tdir) epub.add_dir(tdir)

View File

@ -17,7 +17,7 @@ def tostring(root, strip_comments=False, pretty_print=False):
root.set('xmlns', 'http://www.w3.org/1999/xhtml') root.set('xmlns', 'http://www.w3.org/1999/xhtml')
root.set('{http://www.w3.org/1999/xhtml}xlink', 'http://www.w3.org/1999/xlink') root.set('{http://www.w3.org/1999/xhtml}xlink', 'http://www.w3.org/1999/xlink')
for x in root.iter(): for x in root.iter():
if x.tag.rpartition('}')[-1].lower() == 'svg': if hasattr(x.tag, 'rpartition') and x.tag.rpartition('}')[-1].lower() == 'svg':
x.set('xmlns', 'http://www.w3.org/2000/svg') x.set('xmlns', 'http://www.w3.org/2000/svg')
ans = _tostring(root, encoding='utf-8', pretty_print=pretty_print) ans = _tostring(root, encoding='utf-8', pretty_print=pretty_print)

View File

@ -11,14 +11,12 @@ __docformat__ = 'restructuredtext en'
Input plugin for HTML or OPF ebooks. Input plugin for HTML or OPF ebooks.
''' '''
import os, re, sys, cStringIO import os, re, sys
from urlparse import urlparse, urlunparse from urlparse import urlparse, urlunparse
from urllib import unquote from urllib import unquote
from calibre.customize.conversion import InputFormatPlugin from calibre.customize.conversion import InputFormatPlugin
from calibre.ebooks.metadata.meta import get_metadata from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.metadata.opf2 import OPF, OPFCreator
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.chardet import xml_to_unicode from calibre.ebooks.chardet import xml_to_unicode
from calibre.customize.conversion import OptionRecommendation from calibre.customize.conversion import OptionRecommendation
from calibre import unicode_path from calibre import unicode_path
@ -213,72 +211,21 @@ def traverse(path_to_html_file, max_levels=sys.maxint, verbose=0, encoding=None)
sys.setrecursionlimit(orec) sys.setrecursionlimit(orec)
def opf_traverse(opf_reader, verbose=0, encoding=None):
'''
Return a list of :class:`HTMLFile` objects in the order specified by the
`<spine>` element of the OPF.
:param opf_reader: An :class:`calibre.ebooks.metadata.opf2.OPF` instance.
:param encoding: Specify character encoding of HTML files. If `None` it is
auto-detected.
'''
if not opf_reader.spine:
raise ValueError('OPF does not have a spine')
flat = []
for path in opf_reader.spine.items():
path = os.path.abspath(path)
if path not in flat:
flat.append(os.path.abspath(path))
for item in opf_reader.manifest:
if 'html' in item.mime_type:
path = os.path.abspath(item.path)
if path not in flat:
flat.append(path)
for i, path in enumerate(flat):
if not os.path.exists(path):
path = path.replace('&', '%26')
if os.path.exists(path):
flat[i] = path
for item in opf_reader.itermanifest():
item.set('href', item.get('href').replace('&', '%26'))
ans = []
for path in flat:
if os.path.exists(path):
ans.append(HTMLFile(path, 0, encoding, verbose))
else:
print 'WARNING: OPF spine item %s does not exist'%path
ans = [f for f in ans if not f.is_binary]
return ans
def search_for_opf(dir):
for f in os.listdir(dir):
if f.lower().endswith('.opf'):
return OPF(open(os.path.join(dir, f), 'rb'), dir)
def get_filelist(htmlfile, dir, opts, log): def get_filelist(htmlfile, dir, opts, log):
''' '''
Build list of files referenced by html file or try to detect and use an Build list of files referenced by html file or try to detect and use an
OPF file instead. OPF file instead.
''' '''
print 'Building file list...' log.info('Building file list...')
opf = search_for_opf(dir) filelist = traverse(htmlfile, max_levels=int(opts.max_levels),
filelist = None verbose=opts.verbose,
if opf is not None: encoding=opts.input_encoding)\
try: [0 if opts.breadth_first else 1]
filelist = opf_traverse(opf, verbose=opts.verbose,
encoding=opts.input_encoding)
except:
pass
if not filelist:
filelist = traverse(htmlfile, max_levels=int(opts.max_levels),
verbose=opts.verbose,
encoding=opts.input_encoding)\
[0 if opts.breadth_first else 1]
if opts.verbose: if opts.verbose:
log.debug('\tFound files...') log.debug('\tFound files...')
for f in filelist: for f in filelist:
log.debug('\t\t', f) log.debug('\t\t', f)
return opf, filelist return filelist
class HTMLInput(InputFormatPlugin): class HTMLInput(InputFormatPlugin):
@ -309,34 +256,32 @@ class HTMLInput(InputFormatPlugin):
def convert(self, stream, opts, file_ext, log, def convert(self, stream, opts, file_ext, log,
accelerators): accelerators):
from calibre.ebooks.metadata.meta import get_metadata
basedir = os.getcwd() basedir = os.getcwd()
if hasattr(stream, 'name'): if hasattr(stream, 'name'):
basedir = os.path.dirname(stream.name) basedir = os.path.dirname(stream.name)
if file_ext == 'opf': if file_ext == 'opf':
opf = OPF(stream, basedir) opfpath = stream.name
filelist = opf_traverse(opf, verbose=opts.verbose,
encoding=opts.input_encoding)
mi = MetaInformation(opf)
else: else:
opf, filelist = get_filelist(stream.name, basedir, opts, log) filelist = get_filelist(stream.name, basedir, opts, log)
mi = MetaInformation(opf) mi = get_metadata(stream, 'html')
mi.smart_update(get_metadata(stream, 'html')) mi = OPFCreator(os.getcwdu(), mi)
mi.guide = None
entries = [(f.path, 'application/xhtml+xml') for f in filelist]
mi.create_manifest(entries)
mi.create_spine([f.path for f in filelist])
mi = OPFCreator(os.getcwdu(), mi) mi.render(open('metadata.opf', 'wb'))
mi.guide = None opfpath = os.path.abspath('metadata.opf')
entries = [(f.path, 'application/xhtml+xml') for f in filelist]
mi.create_manifest(entries)
mi.create_spine([f.path for f in filelist])
tocbuf = cStringIO.StringIO()
mi.render(open('metadata.opf', 'wb'), tocbuf, 'toc.ncx')
toc = tocbuf.getvalue()
if toc:
open('toc.ncx', 'wb').write(toc)
from calibre.ebooks.conversion.plumber import create_oebbook from calibre.ebooks.conversion.plumber import create_oebbook
return create_oebbook(log, os.path.abspath('metadata.opf')) oeb = create_oebbook(log, opfpath)
from calibre.ebooks.oeb.transforms.package import Package
Package(os.getcwdu())(oeb, opts)
return oeb

View File

@ -11,9 +11,7 @@ from urllib import unquote, quote
from urlparse import urlparse from urlparse import urlparse
from calibre.constants import __version__ as VERSION
from calibre import relpath from calibre import relpath
from calibre.utils.config import OptionParser
def string_to_authors(raw): def string_to_authors(raw):
raw = raw.replace('&&', u'\uffff') raw = raw.replace('&&', u'\uffff')
@ -189,11 +187,11 @@ class MetaInformation(object):
'publisher', 'series', 'series_index', 'rating', 'publisher', 'series', 'series_index', 'rating',
'isbn', 'tags', 'cover_data', 'application_id', 'guide', 'isbn', 'tags', 'cover_data', 'application_id', 'guide',
'manifest', 'spine', 'toc', 'cover', 'language', 'manifest', 'spine', 'toc', 'cover', 'language',
'book_producer', 'timestamp'): 'book_producer', 'timestamp', 'lccn', 'lcc', 'ddc'):
if hasattr(mi, attr): if hasattr(mi, attr):
setattr(ans, attr, getattr(mi, attr)) setattr(ans, attr, getattr(mi, attr))
def __init__(self, title, authors=[_('Unknown')]): def __init__(self, title, authors=(_('Unknown'),)):
''' '''
@param title: title or ``_('Unknown')`` or a MetaInformation object @param title: title or ``_('Unknown')`` or a MetaInformation object
@param authors: List of strings or [] @param authors: List of strings or []
@ -204,9 +202,9 @@ class MetaInformation(object):
title = mi.title title = mi.title
authors = mi.authors authors = mi.authors
self.title = title self.title = title
self.author = authors # Needed for backward compatibility self.author = list(authors) if authors else []# Needed for backward compatibility
#: List of strings or [] #: List of strings or []
self.authors = authors self.authors = list(authors) if authors else []
self.tags = getattr(mi, 'tags', []) self.tags = getattr(mi, 'tags', [])
#: mi.cover_data = (ext, data) #: mi.cover_data = (ext, data)
self.cover_data = getattr(mi, 'cover_data', (None, None)) self.cover_data = getattr(mi, 'cover_data', (None, None))
@ -214,7 +212,7 @@ class MetaInformation(object):
for x in ('author_sort', 'title_sort', 'comments', 'category', 'publisher', for x in ('author_sort', 'title_sort', 'comments', 'category', 'publisher',
'series', 'series_index', 'rating', 'isbn', 'language', 'series', 'series_index', 'rating', 'isbn', 'language',
'application_id', 'manifest', 'toc', 'spine', 'guide', 'cover', 'application_id', 'manifest', 'toc', 'spine', 'guide', 'cover',
'book_producer', 'timestamp' 'book_producer', 'timestamp', 'lccn', 'lcc', 'ddc'
): ):
setattr(self, x, getattr(mi, x, None)) setattr(self, x, getattr(mi, x, None))
@ -229,15 +227,15 @@ class MetaInformation(object):
if mi.authors and mi.authors[0] != _('Unknown'): if mi.authors and mi.authors[0] != _('Unknown'):
self.authors = mi.authors self.authors = mi.authors
for attr in ('author_sort', 'title_sort', 'category', for attr in ('author_sort', 'title_sort', 'category',
'publisher', 'series', 'series_index', 'rating', 'publisher', 'series', 'series_index', 'rating',
'isbn', 'application_id', 'manifest', 'spine', 'toc', 'isbn', 'application_id', 'manifest', 'spine', 'toc',
'cover', 'language', 'guide', 'book_producer', 'cover', 'language', 'guide', 'book_producer',
'timestamp'): 'timestamp', 'lccn', 'lcc', 'ddc'):
val = getattr(mi, attr, None) if hasattr(mi, attr):
if val is not None: val = getattr(mi, attr)
setattr(self, attr, val) if val is not None:
setattr(self, attr, val)
if mi.tags: if mi.tags:
self.tags += mi.tags self.tags += mi.tags
@ -245,7 +243,7 @@ class MetaInformation(object):
if getattr(mi, 'cover_data', None) and mi.cover_data[0] is not None: if getattr(mi, 'cover_data', None) and mi.cover_data[0] is not None:
self.cover_data = mi.cover_data self.cover_data = mi.cover_data
my_comments = getattr(self, 'comments', '') my_comments = getattr(self, 'comments', '')
other_comments = getattr(mi, 'comments', '') other_comments = getattr(mi, 'comments', '')
if not my_comments: if not my_comments:
@ -254,7 +252,7 @@ class MetaInformation(object):
other_comments = '' other_comments = ''
if len(other_comments.strip()) > len(my_comments.strip()): if len(other_comments.strip()) > len(my_comments.strip()):
self.comments = other_comments self.comments = other_comments
def format_series_index(self): def format_series_index(self):
try: try:
x = float(self.series_index) x = float(self.series_index)
@ -293,6 +291,13 @@ class MetaInformation(object):
fmt('Rating', self.rating) fmt('Rating', self.rating)
if self.timestamp is not None: if self.timestamp is not None:
fmt('Timestamp', self.timestamp.isoformat(' ')) fmt('Timestamp', self.timestamp.isoformat(' '))
if self.lccn:
fmt('LCCN', unicode(self.lccn))
if self.lcc:
fmt('LCC', unicode(self.lcc))
if self.ddc:
fmt('DDC', unicode(self.ddc))
return u'\n'.join(ans) return u'\n'.join(ans)
def to_html(self): def to_html(self):
@ -302,6 +307,12 @@ class MetaInformation(object):
ans += [(_('Producer'), unicode(self.book_producer))] ans += [(_('Producer'), unicode(self.book_producer))]
ans += [(_('Comments'), unicode(self.comments))] ans += [(_('Comments'), unicode(self.comments))]
ans += [('ISBN', unicode(self.isbn))] ans += [('ISBN', unicode(self.isbn))]
if self.lccn:
ans += [('LCCN', unicode(self.lccn))]
if self.lcc:
ans += [('LCC', unicode(self.lcc))]
if self.ddc:
ans += [('DDC', unicode(self.ddc))]
ans += [(_('Tags'), u', '.join([unicode(t) for t in self.tags]))] ans += [(_('Tags'), u', '.join([unicode(t) for t in self.tags]))]
if self.series: if self.series:
ans += [(_('Series'), unicode(self.series)+ ' #%s'%self.format_series_index())] ans += [(_('Series'), unicode(self.series)+ ' #%s'%self.format_series_index())]

View File

@ -59,8 +59,9 @@ class EXTHHeader(object):
elif id == 502: elif id == 502:
# last update time # last update time
pass pass
elif id == 503 and (not title or title == _('Unknown')): elif id == 503: # Long title
title = content if not title or title == _('Unknown'):
title = content
#else: #else:
# print 'unknown record', id, repr(content) # print 'unknown record', id, repr(content)
if title: if title:
@ -87,6 +88,8 @@ class EXTHHeader(object):
content, '%Y-%m-%d',).date() content, '%Y-%m-%d',).date()
except: except:
pass pass
elif id == 108:
pass # Producer
#else: #else:
# print 'unhandled metadata record', id, repr(content) # print 'unhandled metadata record', id, repr(content)
@ -522,7 +525,8 @@ class MobiReader(object):
else: else:
raise MobiError('Unknown compression algorithm: %s'%repr(self.book_header.compression_type)) raise MobiError('Unknown compression algorithm: %s'%repr(self.book_header.compression_type))
if self.book_header.ancient and '<html' not in self.mobi_html[:300].lower(): if self.book_header.ancient and '<html' not in self.mobi_html[:300].lower():
self.mobi_html = self.mobi_html.replace('\r ', '\n\n').replace('\0', '') self.mobi_html = self.mobi_html.replace('\r ', '\n\n ')
self.mobi_html = self.mobi_html.replace('\0', '')
return processed_records return processed_records

View File

@ -573,7 +573,7 @@ class OEBReader(object):
item = self._find_ncx(opf) item = self._find_ncx(opf)
self._toc_from_opf(opf, item) self._toc_from_opf(opf, item)
self._pages_from_opf(opf, item) self._pages_from_opf(opf, item)
self._ensure_cover_image() #self._ensure_cover_image()
def main(argv=sys.argv): def main(argv=sys.argv):

View File

@ -6,13 +6,14 @@ __license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import os import os, re
from urllib import unquote as urlunquote from urllib import unquote as urlunquote
from functools import partial from functools import partial
from lxml import etree from lxml import etree
import cssutils import cssutils
from calibre import sanitize_file_name
from calibre.constants import islinux from calibre.constants import islinux
from calibre.ebooks.oeb.base import OEB_DOCS, urlnormalize, urldefrag, \ from calibre.ebooks.oeb.base import OEB_DOCS, urlnormalize, urldefrag, \
rewrite_links rewrite_links
@ -36,15 +37,21 @@ class Package(object):
self.new_base_path = os.path.abspath(base) self.new_base_path = os.path.abspath(base)
def rewrite_links_in(self, item): def rewrite_links_in(self, item):
base = os.path.join(self.new_base_path, *item.href.split('/')) old_href = item.old_href.split('#')[0]
new_href = item.href.split('#')[0]
base = os.path.join(self.old_base_path, *old_href.split('/'))
base = os.path.dirname(base) base = os.path.dirname(base)
self.log.debug('\tRewriting links in', base+'/'+
item.href.rpartition('/')[-1])
new_base = os.path.join(self.new_base_path, *new_href.split('/'))
new_base = os.path.dirname(new_base)
if etree.iselement(item.data): if etree.iselement(item.data):
self.rewrite_links_in_xml(item.data, base) self.rewrite_links_in_xml(item.data, base, new_base)
elif hasattr(item.data, 'cssText'): elif hasattr(item.data, 'cssText'):
self.rewrite_links_in_css(item.data, base) self.rewrite_links_in_css(item.data, base, new_base)
def link_replacer(self, link_, base=''): def link_replacer(self, link_, base='', new_base=''):
link = urlnormalize(link_) link = urlnormalize(link_)
link, frag = urldefrag(link) link, frag = urldefrag(link)
link = urlunquote(link).replace('/', os.sep) link = urlunquote(link).replace('/', os.sep)
@ -55,20 +62,33 @@ class Package(object):
link = link.lower() link = link.lower()
if link not in self.map: if link not in self.map:
return link_ return link_
nlink = os.path.relpath(self.map[link], base) nlink = os.path.relpath(self.map[link], new_base)
if frag: if frag:
nlink = '#'.join(nlink, frag) nlink = '#'.join((nlink, frag))
return nlink.replace(os.sep, '/') return nlink.replace(os.sep, '/')
def rewrite_links_in_css(self, sheet, base): def rewrite_links_in_css(self, sheet, base, new_base):
repl = partial(self.link_replacer, base=base) repl = partial(self.link_replacer, base=base, new_base=new_base)
cssutils.replaceUrls(sheet, repl) cssutils.replaceUrls(sheet, repl)
def rewrite_links_in_xml(self, root, base): def rewrite_links_in_xml(self, root, base, new_base):
repl = partial(self.link_replacer, base=base) repl = partial(self.link_replacer, base=base, new_base=new_base)
rewrite_links(root, repl) rewrite_links(root, repl)
def move_manifest_item(self, item): def uniqify_name(self, new_href, hrefs):
c = 0
while new_href in hrefs:
c += 1
parts = new_href.split('/')
name, ext = os.path.splitext(parts[-1])
name = re.sub(r'_\d+$', '', name)
name += '_%d'%c
parts[-1] = name + ext
new_href = '/'.join(parts)
return new_href
def move_manifest_item(self, item, hrefs):
item.data # Make sure the data has been loaded and cached item.data # Make sure the data has been loaded and cached
old_abspath = os.path.join(self.old_base_path, old_abspath = os.path.join(self.old_base_path,
*(urldefrag(item.href)[0].split('/'))) *(urldefrag(item.href)[0].split('/')))
@ -79,11 +99,17 @@ class Package(object):
new_href = 'content/' new_href = 'content/'
elif item.href.lower().endswith('.ncx'): elif item.href.lower().endswith('.ncx'):
new_href = '' new_href = ''
new_href += bname new_href += sanitize_file_name(bname)
if new_href in hrefs:
new_href = self.uniqify_name(new_href, hrefs)
hrefs.add(new_href)
new_abspath = os.path.join(self.new_base_path, *new_href.split('/')) new_abspath = os.path.join(self.new_base_path, *new_href.split('/'))
new_abspath = os.path.abspath(new_abspath) new_abspath = os.path.abspath(new_abspath)
item.old_href = self.oeb.manifest.hrefs.pop(item.href).href
item.href = new_href item.href = new_href
self.oeb.manifest.hrefs[item.href] = item
if not islinux: if not islinux:
old_abspath, new_abspath = old_abspath.lower(), new_abspath.lower() old_abspath, new_abspath = old_abspath.lower(), new_abspath.lower()
if old_abspath != new_abspath: if old_abspath != new_abspath:
@ -91,25 +117,33 @@ class Package(object):
def rewrite_links_in_toc(self, toc): def rewrite_links_in_toc(self, toc):
if toc.href: if toc.href:
toc.href = self.link_replacer(toc.href, base=self.new_base_path) toc.href = self.link_replacer(toc.href, base=self.old_base_path,
new_base=self.new_base_path)
for x in toc: for x in toc:
self.rewrite_links_in_toc(x) self.rewrite_links_in_toc(x)
def __call__(self, oeb, context): def __call__(self, oeb, context):
self.map = {} self.map = {}
self.log = self.oeb.log self.log = oeb.log
self.oeb = oeb
self.old_base_path = os.path.abspath(oeb.container.rootdir) self.old_base_path = os.path.abspath(oeb.container.rootdir)
hrefs = set([])
for item in self.oeb.manifest: for item in self.oeb.manifest:
self.move_manifest_item(item) self.move_manifest_item(item, hrefs)
self.log.debug('Rewriting links in OEB documents...')
for item in self.oeb.manifest: for item in self.oeb.manifest:
self.rewrite_links_in(item) self.rewrite_links_in(item)
if getattr(oeb.toc, 'nodes', False): if getattr(oeb.toc, 'nodes', False):
self.log.debug('Rewriting links in TOC...')
self.rewrite_links_in_toc(oeb.toc) self.rewrite_links_in_toc(oeb.toc)
if hasattr(oeb, 'guide'): if hasattr(oeb, 'guide'):
self.log.debug('Rewriting links in guide...')
for ref in oeb.guide.values(): for ref in oeb.guide.values():
ref.href = self.link_replacer(ref.href, base=self.new_base_path) ref.href = self.link_replacer(ref.href,
base=self.old_base_path,
new_base=self.new_base_path)

View File

@ -48,7 +48,8 @@ class OEBWriter(object):
pretty_print=pretty_print) pretty_print=pretty_print)
def __call__(self, oeb, path): def __call__(self, oeb, path):
"""Read the book in the :class:`OEBBook` object :param:`oeb` to a file """
Read the book in the :class:`OEBBook` object :param:`oeb` to a file
at :param:`path`. at :param:`path`.
""" """
version = int(self.version[0]) version = int(self.version[0])

View File

@ -466,5 +466,3 @@ class Application(QApplication):
self.translator.loadFromData(data) self.translator.loadFromData(data)
self.installTranslator(self.translator) self.installTranslator(self.translator)

View File

@ -199,7 +199,7 @@ class EmailAccounts(QAbstractTableModel):
return (account, self.accounts[account]) return (account, self.accounts[account])
if role == Qt.ToolTipRole: if role == Qt.ToolTipRole:
return self.tooltips[col] return self.tooltips[col]
if role == Qt.DisplayRole: if role in [Qt.DisplayRole, Qt.EditRole]:
if col == 0: if col == 0:
return QVariant(account) return QVariant(account)
if col == 1: if col == 1:
@ -397,6 +397,9 @@ class ConfigDialog(QDialog, Ui_Dialog):
self.separate_cover_flow.setChecked(config['separate_cover_flow']) self.separate_cover_flow.setChecked(config['separate_cover_flow'])
self.setup_email_page() self.setup_email_page()
self.category_view.setCurrentIndex(self.category_view.model().index(0)) self.category_view.setCurrentIndex(self.category_view.model().index(0))
self.delete_news.setEnabled(bool(self.sync_news.isChecked()))
self.connect(self.sync_news, SIGNAL('toggled(bool)'),
self.delete_news.setEnabled)
def setup_email_page(self): def setup_email_page(self):
opts = smtp_prefs().parse() opts = smtp_prefs().parse()

View File

@ -371,7 +371,7 @@
<item> <item>
<widget class="QCheckBox" name="delete_news"> <widget class="QCheckBox" name="delete_news">
<property name="text"> <property name="text">
<string>&amp;Delete news from library when it is sent to reader</string> <string>&amp;Delete news from library when it is automatically sent to reader</string>
</property> </property>
</widget> </widget>
</item> </item>

View File

@ -324,7 +324,7 @@
<string>Book </string> <string>Book </string>
</property> </property>
<property name="minimum"> <property name="minimum">
<number>1</number> <number>0</number>
</property> </property>
<property name="maximum"> <property name="maximum">
<number>10000</number> <number>10000</number>

View File

@ -25,7 +25,7 @@ from calibre.gui2.dialogs.user_profiles import UserProfiles
config = DynamicConfig('scheduler') config = DynamicConfig('scheduler')
class Recipe(object): class Recipe(object):
def __init__(self, id=None, recipe_class=None, builtin=True): def __init__(self, id=None, recipe_class=None, builtin=True):
self.id = id self.id = id
self.title = getattr(recipe_class, 'title', None) self.title = getattr(recipe_class, 'title', None)
@ -39,14 +39,14 @@ class Recipe(object):
if self.author == _('Unknown') and not builtin: if self.author == _('Unknown') and not builtin:
self.author = _('You') self.author = _('You')
self.needs_subscription = getattr(recipe_class, 'needs_subscription', False) self.needs_subscription = getattr(recipe_class, 'needs_subscription', False)
def pickle(self): def pickle(self):
return self.__dict__.copy() return self.__dict__.copy()
def unpickle(self, dict): def unpickle(self, dict):
self.__dict__.update(dict) self.__dict__.update(dict)
return self return self
def __cmp__(self, other): def __cmp__(self, other):
if self.id == getattr(other, 'id', None): if self.id == getattr(other, 'id', None):
return 0 return 0
@ -59,38 +59,39 @@ class Recipe(object):
if not self.builtin and getattr(other, 'builtin', True): if not self.builtin and getattr(other, 'builtin', True):
return -1 return -1
return english_sort(self.title, getattr(other, 'title', '')) return english_sort(self.title, getattr(other, 'title', ''))
def __hash__(self): def __hash__(self):
return hash(self.id) return hash(self.id)
def __eq__(self, other): def __eq__(self, other):
return self.id == getattr(other, 'id', None) return self.id == getattr(other, 'id', None)
def __repr__(self): def __repr__(self):
schedule = self.schedule schedule = self.schedule
if schedule and schedule > 1e5: if schedule and schedule > 1e5:
schedule = decode_schedule(schedule) schedule = decode_schedule(schedule)
return u'%s|%s|%s|%s'%(self.id, self.title, self.last_downloaded.ctime(), schedule) return u'%s|%s|%s|%s'%(self.id, self.title, self.last_downloaded.ctime(), schedule)
builtin_recipes = [Recipe(m, r, True) for r, m in zip(recipes, recipe_modules)] builtin_recipes = [Recipe(m, r, True) for r, m in zip(recipes, recipe_modules)]
def save_recipes(recipes): def save_recipes(recipes):
config['scheduled_recipes'] = [r.pickle() for r in recipes] config['scheduled_recipes'] = [r.pickle() for r in recipes]
def load_recipes(): def load_recipes():
config.refresh() config.refresh()
recipes = [] recipes = []
for r in config.get('scheduled_recipes', []): for r in config.get('scheduled_recipes', []):
r = Recipe().unpickle(r) r = Recipe().unpickle(r)
if r.builtin and not str(r.id).startswith('recipe_'): if r.builtin and \
(not str(r.id).startswith('recipe_') or not str(r.id) in recipe_modules):
continue continue
recipes.append(r) recipes.append(r)
return recipes return recipes
class RecipeModel(QAbstractItemModel, SearchQueryParser): class RecipeModel(QAbstractItemModel, SearchQueryParser):
LOCATIONS = ['all'] LOCATIONS = ['all']
def __init__(self, db, *args): def __init__(self, db, *args):
QAbstractItemModel.__init__(self, *args) QAbstractItemModel.__init__(self, *args)
SearchQueryParser.__init__(self) SearchQueryParser.__init__(self)
@ -104,18 +105,18 @@ class RecipeModel(QAbstractItemModel, SearchQueryParser):
self.bold_font = QFont() self.bold_font = QFont()
self.bold_font.setBold(True) self.bold_font.setBold(True)
self.bold_font = QVariant(self.bold_font) self.bold_font = QVariant(self.bold_font)
def refresh(self): def refresh(self):
sr = load_recipes() sr = load_recipes()
for recipe in self.recipes: for recipe in self.recipes:
if recipe in sr: if recipe in sr:
recipe.schedule = sr[sr.index(recipe)].schedule recipe.schedule = sr[sr.index(recipe)].schedule
recipe.last_downloaded = sr[sr.index(recipe)].last_downloaded recipe.last_downloaded = sr[sr.index(recipe)].last_downloaded
self.recipes.sort() self.recipes.sort()
self.num_of_recipes = len(self.recipes) self.num_of_recipes = len(self.recipes)
self.category_map = {} self.category_map = {}
for r in self.recipes: for r in self.recipes:
category = getattr(r, 'language', _('Unknown')) category = getattr(r, 'language', _('Unknown'))
@ -126,12 +127,12 @@ class RecipeModel(QAbstractItemModel, SearchQueryParser):
if category not in self.category_map.keys(): if category not in self.category_map.keys():
self.category_map[category] = [] self.category_map[category] = []
self.category_map[category].append(r) self.category_map[category].append(r)
self.categories = sorted(self.category_map.keys(), cmp=self.sort_categories) self.categories = sorted(self.category_map.keys(), cmp=self.sort_categories)
self._map = dict(self.category_map) self._map = dict(self.category_map)
def sort_categories(self, x, y): def sort_categories(self, x, y):
def decorate(x): def decorate(x):
if x == _('Scheduled'): if x == _('Scheduled'):
x = '0' + x x = '0' + x
@ -140,13 +141,13 @@ class RecipeModel(QAbstractItemModel, SearchQueryParser):
else: else:
x = '2' + x x = '2' + x
return x return x
return cmp(decorate(x), decorate(y)) return cmp(decorate(x), decorate(y))
def universal_set(self): def universal_set(self):
return set(self.recipes) return set(self.recipes)
def get_matches(self, location, query): def get_matches(self, location, query):
query = query.strip().lower() query = query.strip().lower()
if not query: if not query:
@ -154,9 +155,9 @@ class RecipeModel(QAbstractItemModel, SearchQueryParser):
results = set([]) results = set([])
for recipe in self.recipes: for recipe in self.recipes:
if query in recipe.title.lower() or query in recipe.description.lower(): if query in recipe.title.lower() or query in recipe.description.lower():
results.add(recipe) results.add(recipe)
return results return results
def search(self, query): def search(self, query):
try: try:
results = self.parse(unicode(query)) results = self.parse(unicode(query))
@ -170,24 +171,24 @@ class RecipeModel(QAbstractItemModel, SearchQueryParser):
if recipe in results: if recipe in results:
self._map[category].append(recipe) self._map[category].append(recipe)
self.reset() self.reset()
def resort(self): def resort(self):
self.recipes.sort() self.recipes.sort()
self.reset() self.reset()
def index(self, row, column, parent): def index(self, row, column, parent):
return self.createIndex(row, column, parent.row() if parent.isValid() else -1) return self.createIndex(row, column, parent.row() if parent.isValid() else -1)
def parent(self, index): def parent(self, index):
if index.internalId() == -1: if index.internalId() == -1:
return QModelIndex() return QModelIndex()
return self.createIndex(index.internalId(), 0, -1) return self.createIndex(index.internalId(), 0, -1)
def columnCount(self, parent): def columnCount(self, parent):
if not parent.isValid() or not parent.parent().isValid(): if not parent.isValid() or not parent.parent().isValid():
return 1 return 1
return 0 return 0
def rowCount(self, parent): def rowCount(self, parent):
if not parent.isValid(): if not parent.isValid():
return len(self.categories) return len(self.categories)
@ -195,7 +196,7 @@ class RecipeModel(QAbstractItemModel, SearchQueryParser):
category = self.categories[parent.row()] category = self.categories[parent.row()]
return len(self._map[category]) return len(self._map[category])
return 0 return 0
def data(self, index, role): def data(self, index, role):
if index.parent().isValid(): if index.parent().isValid():
category = self.categories[index.parent().row()] category = self.categories[index.parent().row()]
@ -206,7 +207,7 @@ class RecipeModel(QAbstractItemModel, SearchQueryParser):
return recipe return recipe
elif role == Qt.DecorationRole: elif role == Qt.DecorationRole:
icon = self.default_icon icon = self.default_icon
icon_path = (':/images/news/%s.png'%recipe.id).replace('recipe_', '') icon_path = (':/images/news/%s.png'%recipe.id).replace('recipe_', '')
if not recipe.builtin: if not recipe.builtin:
icon = self.custom_icon icon = self.custom_icon
elif QFile().exists(icon_path): elif QFile().exists(icon_path):
@ -222,18 +223,18 @@ class RecipeModel(QAbstractItemModel, SearchQueryParser):
elif role == Qt.ForegroundRole and category == _('Scheduled'): elif role == Qt.ForegroundRole and category == _('Scheduled'):
return QVariant(QColor(0, 255, 0)) return QVariant(QColor(0, 255, 0))
return NONE return NONE
def update_recipe_schedule(self, recipe): def update_recipe_schedule(self, recipe):
for srecipe in self.recipes: for srecipe in self.recipes:
if srecipe == recipe: if srecipe == recipe:
srecipe.schedule = recipe.schedule srecipe.schedule = recipe.schedule
class Search(QLineEdit): class Search(QLineEdit):
HELP_TEXT = _('Search') HELP_TEXT = _('Search')
INTERVAL = 500 #: Time to wait before emitting search signal INTERVAL = 500 #: Time to wait before emitting search signal
def __init__(self, *args): def __init__(self, *args):
QLineEdit.__init__(self, *args) QLineEdit.__init__(self, *args)
self.default_palette = QApplication.palette(self) self.default_palette = QApplication.palette(self)
@ -244,20 +245,20 @@ class Search(QLineEdit):
self.clear_to_help_mode() self.clear_to_help_mode()
self.timer = None self.timer = None
self.connect(self, SIGNAL('textEdited(QString)'), self.text_edited_slot) self.connect(self, SIGNAL('textEdited(QString)'), self.text_edited_slot)
def focusInEvent(self, ev): def focusInEvent(self, ev):
self.setPalette(QApplication.palette(self)) self.setPalette(QApplication.palette(self))
if self.in_help_mode(): if self.in_help_mode():
self.setText('') self.setText('')
return QLineEdit.focusInEvent(self, ev) return QLineEdit.focusInEvent(self, ev)
def in_help_mode(self): def in_help_mode(self):
return unicode(self.text()) == self.HELP_TEXT return unicode(self.text()) == self.HELP_TEXT
def clear_to_help_mode(self): def clear_to_help_mode(self):
self.setPalette(self.gray) self.setPalette(self.gray)
self.setText(self.HELP_TEXT) self.setText(self.HELP_TEXT)
def text_edited_slot(self, text): def text_edited_slot(self, text):
text = unicode(text) text = unicode(text)
self.timer = self.startTimer(self.INTERVAL) self.timer = self.startTimer(self.INTERVAL)
@ -281,7 +282,7 @@ def decode_schedule(num):
return day-1, hour-1, minute-1 return day-1, hour-1, minute-1
class SchedulerDialog(QDialog, Ui_Dialog): class SchedulerDialog(QDialog, Ui_Dialog):
def __init__(self, db, *args): def __init__(self, db, *args):
QDialog.__init__(self, *args) QDialog.__init__(self, *args)
self.setupUi(self) self.setupUi(self)
@ -308,25 +309,25 @@ class SchedulerDialog(QDialog, Ui_Dialog):
self.search.setFocus(Qt.OtherFocusReason) self.search.setFocus(Qt.OtherFocusReason)
self.old_news.setValue(gconf['oldest_news']) self.old_news.setValue(gconf['oldest_news'])
self.rnumber.setText(_('%d recipes')%self._model.num_of_recipes) self.rnumber.setText(_('%d recipes')%self._model.num_of_recipes)
for day in (_('day'), _('Monday'), _('Tuesday'), _('Wednesday'), for day in (_('day'), _('Monday'), _('Tuesday'), _('Wednesday'),
_('Thursday'), _('Friday'), _('Saturday'), _('Sunday')): _('Thursday'), _('Friday'), _('Saturday'), _('Sunday')):
self.day.addItem(day) self.day.addItem(day)
def currentChanged(self, current, previous): def currentChanged(self, current, previous):
if current.parent().isValid(): if current.parent().isValid():
self.show_recipe(current) self.show_recipe(current)
def download_now(self): def download_now(self):
recipe = self._model.data(self.recipes.currentIndex(), Qt.UserRole) recipe = self._model.data(self.recipes.currentIndex(), Qt.UserRole)
self.emit(SIGNAL('download_now(PyQt_PyObject)'), recipe) self.emit(SIGNAL('download_now(PyQt_PyObject)'), recipe)
def set_account_info(self, *args): def set_account_info(self, *args):
username, password = map(unicode, (self.username.text(), self.password.text())) username, password = map(unicode, (self.username.text(), self.password.text()))
username, password = username.strip(), password.strip() username, password = username.strip(), password.strip()
recipe = self._model.data(self.recipes.currentIndex(), Qt.UserRole) recipe = self._model.data(self.recipes.currentIndex(), Qt.UserRole)
key = 'recipe_account_info_%s'%recipe.id key = 'recipe_account_info_%s'%recipe.id
config[key] = (username, password) if username and password else None config[key] = (username, password) if username and password else None
def do_schedule(self, *args): def do_schedule(self, *args):
if not getattr(self, 'allow_scheduling', False): if not getattr(self, 'allow_scheduling', False):
return return
@ -342,7 +343,7 @@ class SchedulerDialog(QDialog, Ui_Dialog):
recipe.last_downloaded = datetime.fromordinal(1) recipe.last_downloaded = datetime.fromordinal(1)
recipes.append(recipe) recipes.append(recipe)
if recipe.needs_subscription and not config['recipe_account_info_%s'%recipe.id]: if recipe.needs_subscription and not config['recipe_account_info_%s'%recipe.id]:
error_dialog(self, _('Must set account information'), error_dialog(self, _('Must set account information'),
_('This recipe requires a username and password')).exec_() _('This recipe requires a username and password')).exec_()
self.schedule.setCheckState(Qt.Unchecked) self.schedule.setCheckState(Qt.Unchecked)
return return
@ -364,7 +365,7 @@ class SchedulerDialog(QDialog, Ui_Dialog):
save_recipes(recipes) save_recipes(recipes)
self._model.update_recipe_schedule(recipe) self._model.update_recipe_schedule(recipe)
self.emit(SIGNAL('new_schedule(PyQt_PyObject)'), recipes) self.emit(SIGNAL('new_schedule(PyQt_PyObject)'), recipes)
def show_recipe(self, index): def show_recipe(self, index):
recipe = self._model.data(index, Qt.UserRole) recipe = self._model.data(index, Qt.UserRole)
self.current_recipe = recipe self.current_recipe = recipe
@ -395,7 +396,7 @@ class SchedulerDialog(QDialog, Ui_Dialog):
self.interval_button.setChecked(False) self.interval_button.setChecked(False)
self.interval.setEnabled(False) self.interval.setEnabled(False)
self.schedule.setChecked(recipe.schedule is not None) self.schedule.setChecked(recipe.schedule is not None)
self.allow_scheduling = True self.allow_scheduling = True
self.detail_box.setVisible(True) self.detail_box.setVisible(True)
self.account.setVisible(recipe.needs_subscription) self.account.setVisible(recipe.needs_subscription)
self.interval.setEnabled(self.schedule.checkState() == Qt.Checked) self.interval.setEnabled(self.schedule.checkState() == Qt.Checked)
@ -417,11 +418,11 @@ class SchedulerDialog(QDialog, Ui_Dialog):
self.last_downloaded.setText(_('Last downloaded')+': '+tm) self.last_downloaded.setText(_('Last downloaded')+': '+tm)
else: else:
self.last_downloaded.setText(_('Last downloaded: never')) self.last_downloaded.setText(_('Last downloaded: never'))
class Scheduler(QObject): class Scheduler(QObject):
INTERVAL = 1 # minutes INTERVAL = 1 # minutes
def __init__(self, main): def __init__(self, main):
self.main = main self.main = main
self.verbose = main.verbose self.verbose = main.verbose
@ -439,7 +440,7 @@ class Scheduler(QObject):
self.oldest = gconf['oldest_news'] self.oldest = gconf['oldest_news']
self.oldest_timer.start(int(60 * 60000)) self.oldest_timer.start(int(60 * 60000))
self.oldest_check() self.oldest_check()
self.news_menu = QMenu() self.news_menu = QMenu()
self.news_icon = QIcon(':/images/news.svg') self.news_icon = QIcon(':/images/news.svg')
self.scheduler_action = QAction(QIcon(':/images/scheduler.svg'), _('Schedule news download'), self) self.scheduler_action = QAction(QIcon(':/images/scheduler.svg'), _('Schedule news download'), self)
@ -448,27 +449,27 @@ class Scheduler(QObject):
self.cac = QAction(QIcon(':/images/user_profile.svg'), _('Add a custom news source'), self) self.cac = QAction(QIcon(':/images/user_profile.svg'), _('Add a custom news source'), self)
self.connect(self.cac, SIGNAL('triggered(bool)'), self.customize_feeds) self.connect(self.cac, SIGNAL('triggered(bool)'), self.customize_feeds)
self.news_menu.addAction(self.cac) self.news_menu.addAction(self.cac)
def oldest_check(self): def oldest_check(self):
if self.oldest > 0: if self.oldest > 0:
delta = timedelta(days=self.oldest) delta = timedelta(days=self.oldest)
ids = self.main.library_view.model().db.tags_older_than(_('News'), delta) ids = self.main.library_view.model().db.tags_older_than(_('News'), delta)
if ids: if ids:
self.main.library_view.model().delete_books_by_id(ids) self.main.library_view.model().delete_books_by_id(ids)
def customize_feeds(self, *args): def customize_feeds(self, *args):
main = self.main main = self.main
d = UserProfiles(main, main.library_view.model().db.get_feeds()) d = UserProfiles(main, main.library_view.model().db.get_feeds())
d.exec_() d.exec_()
feeds = tuple(d.profiles()) feeds = tuple(d.profiles())
main.library_view.model().db.set_feeds(feeds) main.library_view.model().db.set_feeds(feeds)
def debug(self, *args): def debug(self, *args):
if self.verbose: if self.verbose:
sys.stdout.write(' '.join(map(unicode, args))+'\n') sys.stdout.write(' '.join(map(unicode, args))+'\n')
sys.stdout.flush() sys.stdout.flush()
def check(self): def check(self):
if not self.lock.tryLock(): if not self.lock.tryLock():
return return
@ -494,15 +495,15 @@ class Scheduler(QObject):
matches = day_matches and (hour*60+minute) < tnow matches = day_matches and (hour*60+minute) < tnow
if matches and recipe.last_downloaded.toordinal() < date.today().toordinal(): if matches and recipe.last_downloaded.toordinal() < date.today().toordinal():
needs_downloading.add(recipe) needs_downloading.add(recipe)
self.debug('Needs downloading:', needs_downloading) self.debug('Needs downloading:', needs_downloading)
needs_downloading = [r for r in needs_downloading if r not in self.queue] needs_downloading = [r for r in needs_downloading if r not in self.queue]
for recipe in needs_downloading: for recipe in needs_downloading:
self.do_download(recipe) self.do_download(recipe)
finally: finally:
self.lock.unlock() self.lock.unlock()
def do_download(self, recipe): def do_download(self, recipe):
try: try:
id = int(recipe.id) id = int(recipe.id)
@ -538,7 +539,7 @@ class Scheduler(QObject):
finally: finally:
self.lock.unlock() self.lock.unlock()
self.debug('Downloaded:', recipe) self.debug('Downloaded:', recipe)
def download(self, recipe): def download(self, recipe):
self.lock.lock() self.lock.lock()
try: try:
@ -548,10 +549,10 @@ class Scheduler(QObject):
self.do_download(recipe) self.do_download(recipe)
finally: finally:
self.lock.unlock() self.lock.unlock()
def refresh_schedule(self, recipes): def refresh_schedule(self, recipes):
self.recipes = recipes self.recipes = recipes
def show_dialog(self, *args): def show_dialog(self, *args):
self.lock.lock() self.lock.lock()
try: try:

Binary file not shown.

After

Width:  |  Height:  |  Size: 360 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 524 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 524 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 694 B

View File

@ -1,72 +1,73 @@
<ui version="4.0" > <?xml version="1.0" encoding="UTF-8"?>
<ui version="4.0">
<class>ViewerConfig</class> <class>ViewerConfig</class>
<widget class="QDialog" name="ViewerConfig" > <widget class="QDialog" name="ViewerConfig">
<property name="geometry" > <property name="geometry">
<rect> <rect>
<x>0</x> <x>0</x>
<y>0</y> <y>0</y>
<width>281</width> <width>373</width>
<height>214</height> <height>264</height>
</rect> </rect>
</property> </property>
<property name="windowTitle" > <property name="windowTitle">
<string>Configure Viewer</string> <string>Configure Viewer</string>
</property> </property>
<property name="windowIcon" > <property name="windowIcon">
<iconset resource="../images.qrc" > <iconset resource="../images.qrc">
<normaloff>:/images/config.svg</normaloff>:/images/config.svg</iconset> <normaloff>:/images/config.svg</normaloff>:/images/config.svg</iconset>
</property> </property>
<layout class="QGridLayout" > <layout class="QGridLayout" name="gridLayout">
<item row="0" column="0" colspan="2" > <item row="0" column="0">
<widget class="QCheckBox" name="white_background" > <widget class="QCheckBox" name="white_background">
<property name="text" > <property name="text">
<string>Use white background</string> <string>Use white background</string>
</property> </property>
</widget> </widget>
</item> </item>
<item row="1" column="0" > <item row="1" column="0">
<widget class="QCheckBox" name="hyphenate" > <widget class="QCheckBox" name="hyphenate">
<property name="text" > <property name="text">
<string>Hyphenate</string> <string>Hyphenate</string>
</property> </property>
<property name="checked" > <property name="checked">
<bool>true</bool> <bool>true</bool>
</property> </property>
</widget> </widget>
</item> </item>
<item row="3" column="1" > <item row="2" column="0">
<widget class="QDialogButtonBox" name="buttonBox" > <widget class="QLabel" name="label">
<property name="orientation" > <property name="frameShape">
<enum>Qt::Horizontal</enum>
</property>
<property name="standardButtons" >
<set>QDialogButtonBox::Cancel|QDialogButtonBox::Ok</set>
</property>
</widget>
</item>
<item row="2" column="0" colspan="2" >
<widget class="QLabel" name="label" >
<property name="frameShape" >
<enum>QFrame::Box</enum> <enum>QFrame::Box</enum>
</property> </property>
<property name="text" > <property name="text">
<string>&lt;b&gt;Changes will only take effect after a restart.&lt;/b&gt;</string> <string>&lt;b&gt;Changes will only take effect after a restart.&lt;/b&gt;</string>
</property> </property>
<property name="textFormat" > <property name="textFormat">
<enum>Qt::RichText</enum> <enum>Qt::RichText</enum>
</property> </property>
<property name="alignment" > <property name="alignment">
<set>Qt::AlignCenter</set> <set>Qt::AlignCenter</set>
</property> </property>
<property name="wordWrap" > <property name="wordWrap">
<bool>true</bool> <bool>true</bool>
</property> </property>
</widget> </widget>
</item> </item>
<item row="3" column="0">
<widget class="QDialogButtonBox" name="buttonBox">
<property name="orientation">
<enum>Qt::Horizontal</enum>
</property>
<property name="standardButtons">
<set>QDialogButtonBox::Cancel|QDialogButtonBox::Ok</set>
</property>
</widget>
</item>
</layout> </layout>
</widget> </widget>
<resources> <resources>
<include location="../images.qrc" /> <include location="../images.qrc"/>
</resources> </resources>
<connections> <connections>
<connection> <connection>
@ -75,11 +76,11 @@
<receiver>ViewerConfig</receiver> <receiver>ViewerConfig</receiver>
<slot>accept()</slot> <slot>accept()</slot>
<hints> <hints>
<hint type="sourcelabel" > <hint type="sourcelabel">
<x>248</x> <x>248</x>
<y>254</y> <y>254</y>
</hint> </hint>
<hint type="destinationlabel" > <hint type="destinationlabel">
<x>157</x> <x>157</x>
<y>274</y> <y>274</y>
</hint> </hint>
@ -91,11 +92,11 @@
<receiver>ViewerConfig</receiver> <receiver>ViewerConfig</receiver>
<slot>reject()</slot> <slot>reject()</slot>
<hints> <hints>
<hint type="sourcelabel" > <hint type="sourcelabel">
<x>316</x> <x>316</x>
<y>260</y> <y>260</y>
</hint> </hint>
<hint type="destinationlabel" > <hint type="destinationlabel">
<x>286</x> <x>286</x>
<y>274</y> <y>274</y>
</hint> </hint>

View File

@ -1110,27 +1110,30 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
return return
self._view_file(job.result) self._view_file(job.result)
def _view_file(self, name): def _launch_viewer(self, name=None, viewer='ebook-viewer', internal=True):
self.setCursor(Qt.BusyCursor) self.setCursor(Qt.BusyCursor)
try: try:
ext = os.path.splitext(name)[1].upper().replace('.', '') if internal:
if ext in config['internally_viewed_formats']: args = [viewer]
if ext == 'LRF': if isosx and 'ebook' in viewer:
args = ['lrfviewer', name] args.append('--raise-window')
self.job_manager.server.run_free_job('lrfviewer', if name is not None:
kwdargs=dict(args=args)) args.append(name)
else: self.job_manager.server.run_free_job(viewer,
args = ['ebook-viewer', name] kwdargs=dict(args=args))
if isosx:
args.append('--raise-window')
self.job_manager.server.run_free_job('ebook-viewer',
kwdargs=dict(args=args))
else: else:
QDesktopServices.openUrl(QUrl('file:'+name))#launch(name) QDesktopServices.openUrl(QUrl.fromLocalFile(name))#launch(name)
time.sleep(5) # User feedback time.sleep(5) # User feedback
finally: finally:
self.unsetCursor() self.unsetCursor()
def _view_file(self, name):
ext = os.path.splitext(name)[1].upper().replace('.', '')
viewer = 'lrfviewer' if ext == 'LRF' else 'ebook-viewer'
internal = ext in config['internally_viewed_formats']
self._launch_viewer(name, viewer, internal)
def view_specific_format(self, triggered): def view_specific_format(self, triggered):
rows = self.library_view.selectionModel().selectedRows() rows = self.library_view.selectionModel().selectedRows()
if not rows or len(rows) == 0: if not rows or len(rows) == 0:
@ -1165,8 +1168,7 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
rows = self.current_view().selectionModel().selectedRows() rows = self.current_view().selectionModel().selectedRows()
if self.current_view() is self.library_view: if self.current_view() is self.library_view:
if not rows or len(rows) == 0: if not rows or len(rows) == 0:
d = error_dialog(self, _('Cannot view'), _('No book selected')) self._launch_viewer()
d.exec_()
return return
row = rows[0].row() row = rows[0].row()

View File

@ -15,6 +15,7 @@ from calibre import terminal_controller, preferred_encoding
from calibre.utils.config import OptionParser, prefs from calibre.utils.config import OptionParser, prefs
try: try:
from calibre.utils.single_qt_application import send_message from calibre.utils.single_qt_application import send_message
send_message
except: except:
send_message = None send_message = None
from calibre.ebooks.metadata.meta import get_metadata from calibre.ebooks.metadata.meta import get_metadata
@ -55,7 +56,7 @@ XML_TEMPLATE = '''\
</py:for> </py:for>
</formats> </formats>
</record> </record>
</py:for> </py:for>
</calibredb> </calibredb>
''' '''
@ -114,7 +115,7 @@ def get_db(dbpath, options):
dbpath = os.path.abspath(dbpath) dbpath = os.path.abspath(dbpath)
return LibraryDatabase2(dbpath) return LibraryDatabase2(dbpath)
def do_list(db, fields, sort_by, ascending, search_text, line_width, separator, def do_list(db, fields, sort_by, ascending, search_text, line_width, separator,
prefix, output_format, subtitle='Books in the calibre database'): prefix, output_format, subtitle='Books in the calibre database'):
if sort_by: if sort_by:
db.sort(sort_by, ascending) db.sort(sort_by, ascending)
@ -134,13 +135,13 @@ def do_list(db, fields, sort_by, ascending, search_text, line_width, separator,
for i in data: for i in data:
for j, field in enumerate(fields): for j, field in enumerate(fields):
widths[j] = max(widths[j], len(unicode(i[str(field)]))) widths[j] = max(widths[j], len(unicode(i[str(field)])))
screen_width = terminal_controller.COLS if line_width < 0 else line_width screen_width = terminal_controller.COLS if line_width < 0 else line_width
if not screen_width: if not screen_width:
screen_width = 80 screen_width = 80
field_width = screen_width//len(fields) field_width = screen_width//len(fields)
base_widths = map(lambda x: min(x+1, field_width), widths) base_widths = map(lambda x: min(x+1, field_width), widths)
while sum(base_widths) < screen_width: while sum(base_widths) < screen_width:
adjusted = False adjusted = False
for i in range(len(widths)): for i in range(len(widths)):
@ -150,14 +151,14 @@ def do_list(db, fields, sort_by, ascending, search_text, line_width, separator,
break break
if not adjusted: if not adjusted:
break break
widths = list(base_widths) widths = list(base_widths)
titles = map(lambda x, y: '%-*s'%(x, y), widths, fields) titles = map(lambda x, y: '%-*s'%(x, y), widths, fields)
print terminal_controller.GREEN + ''.join(titles)+terminal_controller.NORMAL print terminal_controller.GREEN + ''.join(titles)+terminal_controller.NORMAL
wrappers = map(lambda x: TextWrapper(x-1), widths) wrappers = map(lambda x: TextWrapper(x-1), widths)
o = cStringIO.StringIO() o = cStringIO.StringIO()
for record in data: for record in data:
text = [wrappers[i].wrap(unicode(record[field]).encode('utf-8')) for i, field in enumerate(fields)] text = [wrappers[i].wrap(unicode(record[field]).encode('utf-8')) for i, field in enumerate(fields)]
lines = max(map(len, text)) lines = max(map(len, text))
@ -178,9 +179,9 @@ def do_list(db, fields, sort_by, ascending, search_text, line_width, separator,
if isinstance(x['fmt_epub'], unicode): if isinstance(x['fmt_epub'], unicode):
x['fmt_epub'] = x['fmt_epub'].encode('utf-8') x['fmt_epub'] = x['fmt_epub'].encode('utf-8')
template = MarkupTemplate(STANZA_TEMPLATE) template = MarkupTemplate(STANZA_TEMPLATE)
return template.generate(id="urn:calibre:main", data=data, subtitle=subtitle, return template.generate(id="urn:calibre:main", data=data, subtitle=subtitle,
sep=os.sep, quote=quote, updated=db.last_modified()).render('xml') sep=os.sep, quote=quote, updated=db.last_modified()).render('xml')
def command_list(args, dbpath): def command_list(args, dbpath):
@ -199,7 +200,7 @@ List the books available in the calibre database.
help=_('Sort results in ascending order')) help=_('Sort results in ascending order'))
parser.add_option('-s', '--search', default=None, parser.add_option('-s', '--search', default=None,
help=_('Filter the results by the search query. For the format of the search query, please see the search related documentation in the User Manual. Default is to do no filtering.')) help=_('Filter the results by the search query. For the format of the search query, please see the search related documentation in the User Manual. Default is to do no filtering.'))
parser.add_option('-w', '--line-width', default=-1, type=int, parser.add_option('-w', '--line-width', default=-1, type=int,
help=_('The maximum width of a single line in the output. Defaults to detecting screen size.')) help=_('The maximum width of a single line in the output. Defaults to detecting screen size.'))
parser.add_option('--separator', default=' ', help=_('The string used to separate fields. Default is a space.')) parser.add_option('--separator', default=' ', help=_('The string used to separate fields. Default is a space.'))
parser.add_option('--prefix', default=None, help=_('The prefix for all file paths. Default is the absolute path to the library folder.')) parser.add_option('--prefix', default=None, help=_('The prefix for all file paths. Default is the absolute path to the library folder.'))
@ -264,14 +265,14 @@ def do_add(db, paths, one_book_per_directory, recurse, add_duplicates):
formats.append(format) formats.append(format)
metadata.append(mi) metadata.append(mi)
file_duplicates = [] file_duplicates = []
if files: if files:
file_duplicates = db.add_books(files, formats, metadata, file_duplicates = db.add_books(files, formats, metadata,
add_duplicates=add_duplicates) add_duplicates=add_duplicates)
if file_duplicates: if file_duplicates:
file_duplicates = file_duplicates[0] file_duplicates = file_duplicates[0]
dir_dups = [] dir_dups = []
for dir in dirs: for dir in dirs:

View File

@ -31,6 +31,21 @@ from calibre.customize.ui import run_plugins_on_import
from calibre import sanitize_file_name from calibre import sanitize_file_name
from calibre.ebooks import BOOK_EXTENSIONS from calibre.ebooks import BOOK_EXTENSIONS
if iswindows:
import calibre.utils.winshell as winshell
def delete_file(path):
try:
winshell.delete_file(path, silent=True, no_confirm=True)
except:
os.remove(path)
def delete_tree(path):
try:
winshell.delete_file(path, silent=True, no_confirm=True)
except:
shutil.rmtree(path)
copyfile = os.link if hasattr(os, 'link') else shutil.copyfile copyfile = os.link if hasattr(os, 'link') else shutil.copyfile
FIELD_MAP = {'id':0, 'title':1, 'authors':2, 'publisher':3, 'rating':4, 'timestamp':5, FIELD_MAP = {'id':0, 'title':1, 'authors':2, 'publisher':3, 'rating':4, 'timestamp':5,
@ -499,7 +514,7 @@ class LibraryDatabase2(LibraryDatabase):
def rmtree(self, path): def rmtree(self, path):
if not self.normpath(self.library_path).startswith(self.normpath(path)): if not self.normpath(self.library_path).startswith(self.normpath(path)):
shutil.rmtree(path) delete_tree(path)
def normpath(self, path): def normpath(self, path):
path = os.path.abspath(os.path.realpath(path)) path = os.path.abspath(os.path.realpath(path))
@ -745,7 +760,10 @@ class LibraryDatabase2(LibraryDatabase):
path = os.path.join(self.library_path, self.path(id, index_is_id=True)) path = os.path.join(self.library_path, self.path(id, index_is_id=True))
self.data.remove(id) self.data.remove(id)
if os.path.exists(path): if os.path.exists(path):
self.rmtree(path) if iswindows:
winshell.delete_file(path, no_confirm=True, silent=True)
else:
self.rmtree(path)
parent = os.path.dirname(path) parent = os.path.dirname(path)
if len(os.listdir(parent)) == 0: if len(os.listdir(parent)) == 0:
self.rmtree(parent) self.rmtree(parent)
@ -764,7 +782,7 @@ class LibraryDatabase2(LibraryDatabase):
ext = ('.' + format.lower()) if format else '' ext = ('.' + format.lower()) if format else ''
path = os.path.join(path, name+ext) path = os.path.join(path, name+ext)
try: try:
os.remove(path) delete_file(path)
except: except:
traceback.print_exc() traceback.print_exc()
self.conn.execute('DELETE FROM data WHERE book=? AND format=?', (id, format.upper())) self.conn.execute('DELETE FROM data WHERE book=? AND format=?', (id, format.upper()))

View File

@ -105,7 +105,7 @@ Device Integration
What devices does |app| support? What devices does |app| support?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
At the moment |app| has full support for the SONY PRS 500/505/700, Cybook Gen 3, Amazon Kindle 1 and 2 as well as the iPhone. In addition, using the :guilabel:`Save to disk` function you can use it with any ebook reader that exports itself as a USB disk. At the moment |app| has full support for the SONY PRS 500/505/700, Cybook Gen 3, Amazon Kindle 1/2, Netronix EB600 and the iPhone. In addition, using the :guilabel:`Save to disk` function you can use it with any ebook reader that exports itself as a USB disk.
I used |app| to transfer some books to my reader, and now the SONY software hangs every time I connect the reader? I used |app| to transfer some books to my reader, and now the SONY software hangs every time I connect the reader?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

View File

@ -24,7 +24,7 @@ from calibre.translations.msgfmt import make
_run_once = False _run_once = False
if not _run_once: if not _run_once:
_run_once = True _run_once = True
################################################################################ ################################################################################
# Setup translations # Setup translations
@ -32,7 +32,8 @@ if not _run_once:
lang = prefs['language'] lang = prefs['language']
if lang is not None: if lang is not None:
return lang return lang
lang = locale.getdefaultlocale()[0] lang = locale.getdefaultlocale(['LANGUAGE', 'LC_ALL', 'LC_CTYPE',
'LC_MESSAGES', 'LANG'])[0]
if lang is None and os.environ.has_key('LANG'): # Needed for OS X if lang is None and os.environ.has_key('LANG'): # Needed for OS X
try: try:
lang = os.environ['LANG'] lang = os.environ['LANG']

View File

@ -38,6 +38,7 @@ def get_linux_data(version='1.0.0'):
('exherbo', 'Exherbo'), ('exherbo', 'Exherbo'),
('foresight', 'Foresight 2.1'), ('foresight', 'Foresight 2.1'),
('ubuntu', 'Ubuntu Jaunty Jackalope'), ('ubuntu', 'Ubuntu Jaunty Jackalope'),
('linux_mint', 'Linux Mint Gloria'),
]: ]:
data['supported'].append(CoolDistro(name, title, data['supported'].append(CoolDistro(name, title,
prefix='http://calibre.kovidgoyal.net')) prefix='http://calibre.kovidgoyal.net'))

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.9 KiB

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,400 @@
"""winshell - convenience functions to access Windows shell functionality
Certain aspects of the Windows user interface are grouped by
Microsoft as Shell functions. These include the Desktop, shortcut
icons, special folders (such as My Documents) and a few other things.
These are mostly available via the shell module of the win32all
extensions, but whenever I need to use them, I've forgotten the
various constants and so on.
Several of the shell items have two variants: personal and common,
or User and All Users. These refer to systems with profiles in use:
anything from NT upwards, and 9x with Profiles turned on. Where
relevant, the Personal/User version refers to that owned by the
logged-on user and visible only to that user; the Common/All Users
version refers to that maintained by an Administrator and visible
to all users of the system.
(c) Tim Golden <winshell@timgolden.me.uk> 25th November 2003
Licensed under the (GPL-compatible) MIT License:
http://www.opensource.org/licenses/mit-license.php
9th Nov 2005 0.2 . License changed to MIT
. Added functionality using SHFileOperation
25th Nov 2003 0.1 . Initial release by Tim Golden
"""
__VERSION__ = "0.2"
import os
from win32com import storagecon
from win32com.shell import shell, shellcon
import pythoncom
class x_winshell (Exception):
pass
#
# Although this can be done in one call, Win9x didn't
# support it, so I added this workaround.
#
def get_path (folder_id):
return shell.SHGetPathFromIDList (shell.SHGetSpecialFolderLocation (0, folder_id))
def desktop (common=0):
"What folder is equivalent to the current desktop?"
return get_path ((shellcon.CSIDL_DESKTOP, shellcon.CSIDL_COMMON_DESKTOPDIRECTORY)[common])
def common_desktop ():
#
# Only here because already used in code
#
return desktop (common=1)
def application_data (common=0):
"What folder holds application configuration files?"
return get_path ((shellcon.CSIDL_APPDATA, shellcon.CSIDL_COMMON_APPDATA)[common])
def favourites (common=0):
"What folder holds the Explorer favourites shortcuts?"
return get_path ((shellcon.CSIDL_FAVORITES, shellcon.CSIDL_COMMON_FAVORITES)[common])
bookmarks = favourites
def start_menu (common=0):
"What folder holds the Start Menu shortcuts?"
return get_path ((shellcon.CSIDL_STARTMENU, shellcon.CSIDL_COMMON_STARTMENU)[common])
def programs (common=0):
"What folder holds the Programs shortcuts (from the Start Menu)?"
return get_path ((shellcon.CSIDL_PROGRAMS, shellcon.CSIDL_COMMON_PROGRAMS)[common])
def startup (common=0):
"What folder holds the Startup shortcuts (from the Start Menu)?"
return get_path ((shellcon.CSIDL_STARTUP, shellcon.CSIDL_COMMON_STARTUP)[common])
def personal_folder ():
"What folder holds the My Documents files?"
return get_path (shellcon.CSIDL_PERSONAL)
my_documents = personal_folder
def recent ():
"What folder holds the Documents shortcuts (from the Start Menu)?"
return get_path (shellcon.CSIDL_RECENT)
def sendto ():
"What folder holds the SendTo shortcuts (from the Context Menu)?"
return get_path (shellcon.CSIDL_SENDTO)
#
# Internally abstracted function to handle one
# of several shell-based file manipulation
# routines. Not all the possible parameters
# are covered which might be passed to the
# underlying SHFileOperation API call, but
# only those which seemed useful to me at
# the time.
#
def _file_operation (
operation,
source_path,
target_path=None,
allow_undo=True,
no_confirm=False,
rename_on_collision=True,
silent=False,
hWnd=None
):
#
# At present the Python wrapper around SHFileOperation doesn't
# allow lists of files. Hopefully it will at some point, so
# take account of it here.
# If you pass this shell function a "/"-separated path with
# a wildcard, eg c:/temp/*.tmp, it gets confused. It's ok
# with a backslash, so convert here.
#
source_path = source_path or ""
if isinstance (source_path, basestring):
source_path = os.path.abspath (source_path)
else:
source_path = [os.path.abspath (i) for i in source_path]
target_path = target_path or ""
if isinstance (target_path, basestring):
target_path = os.path.abspath (target_path)
else:
target_path = [os.path.abspath (i) for i in target_path]
flags = 0
if allow_undo: flags |= shellcon.FOF_ALLOWUNDO
if no_confirm: flags |= shellcon.FOF_NOCONFIRMATION
if rename_on_collision: flags |= shellcon.FOF_RENAMEONCOLLISION
if silent: flags |= shellcon.FOF_SILENT
result, n_aborted = shell.SHFileOperation (
(hWnd or 0, operation, source_path, target_path, flags, None, None)
)
if result <> 0:
raise x_winshell, result
elif n_aborted:
raise x_winshell, "%d operations were aborted by the user" % n_aborted
def copy_file (
source_path,
target_path,
allow_undo=True,
no_confirm=False,
rename_on_collision=True,
silent=False,
hWnd=None
):
"""Perform a shell-based file copy. Copying in
this way allows the possibility of undo, auto-renaming,
and showing the "flying file" animation during the copy.
The default options allow for undo, don't automatically
clobber on a name clash, automatically rename on collision
and display the animation.
"""
_file_operation (
shellcon.FO_COPY,
source_path,
target_path,
allow_undo,
no_confirm,
rename_on_collision,
silent,
hWnd
)
def move_file (
source_path,
target_path,
allow_undo=True,
no_confirm=False,
rename_on_collision=True,
silent=False,
hWnd=None
):
"""Perform a shell-based file move. Moving in
this way allows the possibility of undo, auto-renaming,
and showing the "flying file" animation during the copy.
The default options allow for undo, don't automatically
clobber on a name clash, automatically rename on collision
and display the animation.
"""
_file_operation (
shellcon.FO_MOVE,
source_path,
target_path,
allow_undo,
no_confirm,
rename_on_collision,
silent,
hWnd
)
def rename_file (
source_path,
target_path,
allow_undo=True,
no_confirm=False,
rename_on_collision=True,
silent=False,
hWnd=None
):
"""Perform a shell-based file rename. Renaming in
this way allows the possibility of undo, auto-renaming,
and showing the "flying file" animation during the copy.
The default options allow for undo, don't automatically
clobber on a name clash, automatically rename on collision
and display the animation.
"""
_file_operation (
shellcon.FO_RENAME,
source_path,
target_path,
allow_undo,
no_confirm,
rename_on_collision,
silent,
hWnd
)
def delete_file (
source_path,
allow_undo=True,
no_confirm=False,
rename_on_collision=True,
silent=False,
hWnd=None
):
"""Perform a shell-based file delete. Deleting in
this way uses the system recycle bin, allows the
possibility of undo, and showing the "flying file"
animation during the delete.
The default options allow for undo, don't automatically
clobber on a name clash, automatically rename on collision
and display the animation.
"""
_file_operation (
shellcon.FO_DELETE,
source_path,
None,
allow_undo,
no_confirm,
rename_on_collision,
silent,
hWnd
)
def CreateShortcut (Path, Target, Arguments = "", StartIn = "", Icon = ("",0), Description = ""):
"""Create a Windows shortcut:
Path - As what file should the shortcut be created?
Target - What command should the desktop use?
Arguments - What arguments should be supplied to the command?
StartIn - What folder should the command start in?
Icon - (filename, index) What icon should be used for the shortcut?
Description - What description should the shortcut be given?
eg
CreateShortcut (
Path=os.path.join (desktop (), "PythonI.lnk"),
Target=r"c:\python\python.exe",
Icon=(r"c:\python\python.exe", 0),
Description="Python Interpreter"
)
"""
sh = pythoncom.CoCreateInstance (
shell.CLSID_ShellLink,
None,
pythoncom.CLSCTX_INPROC_SERVER,
shell.IID_IShellLink
)
sh.SetPath (Target)
sh.SetDescription (Description)
sh.SetArguments (Arguments)
sh.SetWorkingDirectory (StartIn)
sh.SetIconLocation (Icon[0], Icon[1])
persist = sh.QueryInterface (pythoncom.IID_IPersistFile)
persist.Save (Path, 1)
#
# Constants for structured storage
#
# These come from ObjIdl.h
FMTID_USER_DEFINED_PROPERTIES = "{F29F85E0-4FF9-1068-AB91-08002B27B3D9}"
FMTID_CUSTOM_DEFINED_PROPERTIES = "{D5CDD505-2E9C-101B-9397-08002B2CF9AE}"
PIDSI_TITLE = 0x00000002
PIDSI_SUBJECT = 0x00000003
PIDSI_AUTHOR = 0x00000004
PIDSI_CREATE_DTM = 0x0000000c
PIDSI_KEYWORDS = 0x00000005
PIDSI_COMMENTS = 0x00000006
PIDSI_TEMPLATE = 0x00000007
PIDSI_LASTAUTHOR = 0x00000008
PIDSI_REVNUMBER = 0x00000009
PIDSI_EDITTIME = 0x0000000a
PIDSI_LASTPRINTED = 0x0000000b
PIDSI_LASTSAVE_DTM = 0x0000000d
PIDSI_PAGECOUNT = 0x0000000e
PIDSI_WORDCOUNT = 0x0000000f
PIDSI_CHARCOUNT = 0x00000010
PIDSI_THUMBNAIL = 0x00000011
PIDSI_APPNAME = 0x00000012
PROPERTIES = (
PIDSI_TITLE,
PIDSI_SUBJECT,
PIDSI_AUTHOR,
PIDSI_CREATE_DTM,
PIDSI_KEYWORDS,
PIDSI_COMMENTS,
PIDSI_TEMPLATE,
PIDSI_LASTAUTHOR,
PIDSI_EDITTIME,
PIDSI_LASTPRINTED,
PIDSI_LASTSAVE_DTM,
PIDSI_PAGECOUNT,
PIDSI_WORDCOUNT,
PIDSI_CHARCOUNT,
PIDSI_APPNAME
)
#
# This was taken from someone else's example,
# but I can't find where. If you know, please
# tell me so I can give due credit.
#
def structured_storage (filename):
"""Pick out info from MS documents with embedded
structured storage (typically MS Word docs etc.)
Returns a dictionary of information found
"""
if not pythoncom.StgIsStorageFile (filename):
return {}
flags = storagecon.STGM_READ | storagecon.STGM_SHARE_EXCLUSIVE
storage = pythoncom.StgOpenStorage (filename, None, flags)
try:
properties_storage = storage.QueryInterface (pythoncom.IID_IPropertySetStorage)
except pythoncom.com_error:
return {}
property_sheet = properties_storage.Open (FMTID_USER_DEFINED_PROPERTIES)
try:
data = property_sheet.ReadMultiple (PROPERTIES)
finally:
property_sheet = None
title, subject, author, created_on, keywords, comments, template_used, \
updated_by, edited_on, printed_on, saved_on, \
n_pages, n_words, n_characters, \
application = data
result = {}
if title: result['title'] = title
if subject: result['subject'] = subject
if author: result['author'] = author
if created_on: result['created_on'] = created_on
if keywords: result['keywords'] = keywords
if comments: result['comments'] = comments
if template_used: result['template_used'] = template_used
if updated_by: result['updated_by'] = updated_by
if edited_on: result['edited_on'] = edited_on
if printed_on: result['printed_on'] = printed_on
if saved_on: result['saved_on'] = saved_on
if n_pages: result['n_pages'] = n_pages
if n_words: result['n_words'] = n_words
if n_characters: result['n_characters'] = n_characters
if application: result['application'] = application
return result
if __name__ == '__main__':
try:
print 'Desktop =>', desktop ()
print 'Common Desktop =>', desktop (1)
print 'Application Data =>', application_data ()
print 'Common Application Data =>', application_data (1)
print 'Bookmarks =>', bookmarks ()
print 'Common Bookmarks =>', bookmarks (1)
print 'Start Menu =>', start_menu ()
print 'Common Start Menu =>', start_menu (1)
print 'Programs =>', programs ()
print 'Common Programs =>', programs (1)
print 'Startup =>', startup ()
print 'Common Startup =>', startup (1)
print 'My Documents =>', my_documents ()
print 'Recent =>', recent ()
print 'SendTo =>', sendto ()
finally:
raw_input ("Press enter...")

View File

@ -1,11 +1,11 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
''' '''
CLI for downloading feeds. CLI for downloading feeds.
''' '''
import sys, os, logging import sys, os
from calibre.web.feeds.recipes import get_builtin_recipe, compile_recipe, titles from calibre.web.feeds.recipes import get_builtin_recipe, compile_recipe, titles
from calibre.web.fetch.simple import option_parser as _option_parser from calibre.web.fetch.simple import option_parser as _option_parser
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
@ -14,13 +14,13 @@ from calibre.utils.config import Config, StringConfig
def config(defaults=None): def config(defaults=None):
desc = _('Options to control the fetching of periodical content from the web.') desc = _('Options to control the fetching of periodical content from the web.')
c = Config('feeds2disk', desc) if defaults is None else StringConfig(defaults, desc) c = Config('feeds2disk', desc) if defaults is None else StringConfig(defaults, desc)
web2disk = c.add_group('web2disk', _('Customize the download engine')) web2disk = c.add_group('web2disk', _('Customize the download engine'))
web2disk('timeout', ['-t', '--timeout'], default=10.0, web2disk('timeout', ['-t', '--timeout'], default=10.0,
help=_('Timeout in seconds to wait for a response from the server. Default: %default s'),) help=_('Timeout in seconds to wait for a response from the server. Default: %default s'),)
web2disk('delay', ['--delay'], default=0, web2disk('delay', ['--delay'], default=0,
help=_('Minimum interval in seconds between consecutive fetches. Default is %default s')) help=_('Minimum interval in seconds between consecutive fetches. Default is %default s'))
web2disk('encoding', ['--encoding'], default=None, web2disk('encoding', ['--encoding'], default=None,
help=_('The character encoding for the websites you are trying to download. The default is to try and guess the encoding.')) help=_('The character encoding for the websites you are trying to download. The default is to try and guess the encoding.'))
web2disk('match_regexps', ['--match-regexp'], default=[], action='append', web2disk('match_regexps', ['--match-regexp'], default=[], action='append',
help=_('Only links that match this regular expression will be followed. This option can be specified multiple times, in which case as long as a link matches any one regexp, it will be followed. By default all links are followed.')) help=_('Only links that match this regular expression will be followed. This option can be specified multiple times, in which case as long as a link matches any one regexp, it will be followed. By default all links are followed.'))
@ -28,42 +28,42 @@ def config(defaults=None):
help=_('Any link that matches this regular expression will be ignored. This option can be specified multiple times, in which case as long as any regexp matches a link, it will be ignored.By default, no links are ignored. If both --filter-regexp and --match-regexp are specified, then --filter-regexp is applied first.')) help=_('Any link that matches this regular expression will be ignored. This option can be specified multiple times, in which case as long as any regexp matches a link, it will be ignored.By default, no links are ignored. If both --filter-regexp and --match-regexp are specified, then --filter-regexp is applied first.'))
web2disk('no_stylesheets', ['--dont-download-stylesheets'], action='store_true', default=False, web2disk('no_stylesheets', ['--dont-download-stylesheets'], action='store_true', default=False,
help=_('Do not download CSS stylesheets.')) help=_('Do not download CSS stylesheets.'))
c.add_opt('feeds', ['--feeds'], default=None, c.add_opt('feeds', ['--feeds'], default=None,
help=_('''Specify a list of feeds to download. For example: help=_('''Specify a list of feeds to download. For example:
"['http://feeds.newsweek.com/newsweek/TopNews', 'http://feeds.newsweek.com/headlines/politics']" "['http://feeds.newsweek.com/newsweek/TopNews', 'http://feeds.newsweek.com/headlines/politics']"
If you specify this option, any argument to %prog is ignored and a default recipe is used to download the feeds.''')) If you specify this option, any argument to %prog is ignored and a default recipe is used to download the feeds.'''))
c.add_opt('verbose', ['-v', '--verbose'], default=0, action='count', c.add_opt('verbose', ['-v', '--verbose'], default=0, action='count',
help=_('''Be more verbose while processing.''')) help=_('''Be more verbose while processing.'''))
c.add_opt('title', ['--title'], default=None, c.add_opt('title', ['--title'], default=None,
help=_('The title for this recipe. Used as the title for any ebooks created from the downloaded feeds.')) help=_('The title for this recipe. Used as the title for any ebooks created from the downloaded feeds.'))
c.add_opt('username', ['-u', '--username'], default=None, c.add_opt('username', ['-u', '--username'], default=None,
help=_('Username for sites that require a login to access content.')) help=_('Username for sites that require a login to access content.'))
c.add_opt('password', ['-p', '--password'], default=None, c.add_opt('password', ['-p', '--password'], default=None,
help=_('Password for sites that require a login to access content.')) help=_('Password for sites that require a login to access content.'))
c.add_opt('lrf', ['--lrf'], default=False, action='store_true', c.add_opt('lrf', ['--lrf'], default=False, action='store_true',
help='Optimize fetching for subsequent conversion to LRF.') help='Optimize fetching for subsequent conversion to LRF.')
c.add_opt('epub', ['--epub'], default=False, action='store_true', c.add_opt('epub', ['--epub'], default=False, action='store_true',
help='Optimize fetching for subsequent conversion to EPUB.') help='Optimize fetching for subsequent conversion to EPUB.')
c.add_opt('mobi', ['--mobi'], default=False, action='store_true', c.add_opt('mobi', ['--mobi'], default=False, action='store_true',
help='Optimize fetching for subsequent conversion to MOBI.') help='Optimize fetching for subsequent conversion to MOBI.')
c.add_opt('recursions', ['--recursions'], default=0, c.add_opt('recursions', ['--recursions'], default=0,
help=_('Number of levels of links to follow on webpages that are linked to from feeds. Defaul %default')) help=_('Number of levels of links to follow on webpages that are linked to from feeds. Defaul %default'))
c.add_opt('output_dir', ['--output-dir'], default='.', c.add_opt('output_dir', ['--output-dir'], default='.',
help=_('The directory in which to store the downloaded feeds. Defaults to the current directory.')) help=_('The directory in which to store the downloaded feeds. Defaults to the current directory.'))
c.add_opt('no_progress_bar', ['--no-progress-bar'], default=False, action='store_true', c.add_opt('no_progress_bar', ['--no-progress-bar'], default=False, action='store_true',
help=_("Don't show the progress bar")) help=_("Don't show the progress bar"))
c.add_opt('debug', ['--debug'], action='store_true', default=False, c.add_opt('debug', ['--debug'], action='store_true', default=False,
help=_('Very verbose output, useful for debugging.')) help=_('Very verbose output, useful for debugging.'))
c.add_opt('test', ['--test'], action='store_true', default=False, c.add_opt('test', ['--test'], action='store_true', default=False,
help=_('Useful for recipe development. Forces max_articles_per_feed to 2 and downloads at most 2 feeds.')) help=_('Useful for recipe development. Forces max_articles_per_feed to 2 and downloads at most 2 feeds.'))
return c return c
USAGE=_('''\ USAGE=_('''\
%%prog [options] ARG %%prog [options] ARG
%%prog parses an online source of articles, like an RSS or ATOM feed and %%prog parses an online source of articles, like an RSS or ATOM feed and
fetches the article contents organized in a nice hierarchy. fetches the article contents organized in a nice hierarchy.
ARG can be one of: ARG can be one of:
@ -85,9 +85,9 @@ def option_parser(usage=USAGE):
p.remove_option('--verbose') p.remove_option('--verbose')
p.remove_option('--max-files') p.remove_option('--max-files')
p.subsume('WEB2DISK OPTIONS', _('Options to control web2disk (used to fetch websites linked from feeds)')) p.subsume('WEB2DISK OPTIONS', _('Options to control web2disk (used to fetch websites linked from feeds)'))
p.add_option('--feeds', default=None, p.add_option('--feeds', default=None,
help=_('''Specify a list of feeds to download. For example: help=_('''Specify a list of feeds to download. For example:
"['http://feeds.newsweek.com/newsweek/TopNews', 'http://feeds.newsweek.com/headlines/politics']" "['http://feeds.newsweek.com/newsweek/TopNews', 'http://feeds.newsweek.com/headlines/politics']"
If you specify this option, any argument to %prog is ignored and a default recipe is used to download the feeds.''')) If you specify this option, any argument to %prog is ignored and a default recipe is used to download the feeds.'''))
p.add_option('--verbose', default=False, action='store_true', p.add_option('--verbose', default=False, action='store_true',
@ -99,70 +99,62 @@ If you specify this option, any argument to %prog is ignored and a default recip
p.add_option('--lrf', default=False, action='store_true', help='Optimize fetching for subsequent conversion to LRF.') p.add_option('--lrf', default=False, action='store_true', help='Optimize fetching for subsequent conversion to LRF.')
p.add_option('--recursions', default=0, type='int', p.add_option('--recursions', default=0, type='int',
help=_('Number of levels of links to follow on webpages that are linked to from feeds. Defaul %default')) help=_('Number of levels of links to follow on webpages that are linked to from feeds. Defaul %default'))
p.add_option('--output-dir', default=os.getcwd(), p.add_option('--output-dir', default=os.getcwd(),
help=_('The directory in which to store the downloaded feeds. Defaults to the current directory.')) help=_('The directory in which to store the downloaded feeds. Defaults to the current directory.'))
p.add_option('--no-progress-bar', dest='no_progress_bar', default=False, action='store_true', p.add_option('--no-progress-bar', dest='no_progress_bar', default=False, action='store_true',
help=_('Dont show the progress bar')) help=_('Dont show the progress bar'))
p.add_option('--debug', action='store_true', default=False, p.add_option('--debug', action='store_true', default=False,
help=_('Very verbose output, useful for debugging.')) help=_('Very verbose output, useful for debugging.'))
p.add_option('--test', action='store_true', default=False, p.add_option('--test', action='store_true', default=False,
help=_('Useful for recipe development. Forces max_articles_per_feed to 2 and downloads at most 2 feeds.')) help=_('Useful for recipe development. Forces max_articles_per_feed to 2 and downloads at most 2 feeds.'))
return p return p
class RecipeError(Exception): class RecipeError(Exception):
pass pass
def run_recipe(opts, recipe_arg, parser, notification=None, handler=None): def run_recipe(opts, recipe_arg, parser, notification=None):
if notification is None: if notification is None:
from calibre.utils.terminfo import TerminalController, ProgressBar from calibre.utils.terminfo import TerminalController, ProgressBar
term = TerminalController(sys.stdout) term = TerminalController(sys.stdout)
pb = ProgressBar(term, _('Fetching feeds...'), no_progress_bar=opts.no_progress_bar) pb = ProgressBar(term, _('Fetching feeds...'), no_progress_bar=opts.no_progress_bar)
notification = pb.update notification = pb.update
recipe = None recipe = None
if opts.feeds is not None: if opts.feeds is not None:
recipe = BasicNewsRecipe recipe = BasicNewsRecipe
else: else:
try: try:
if os.access(recipe_arg, os.R_OK): if os.access(recipe_arg, os.R_OK):
recipe = compile_recipe(open(recipe_arg).read()) recipe = compile_recipe(open(recipe_arg).read())
else: else:
raise Exception('not file') raise Exception('not file')
except: except:
recipe = get_builtin_recipe(recipe_arg) recipe = get_builtin_recipe(recipe_arg)
if recipe is None: if recipe is None:
recipe = compile_recipe(recipe_arg) recipe = compile_recipe(recipe_arg)
if recipe is None: if recipe is None:
raise RecipeError(recipe_arg+ ' is an invalid recipe') raise RecipeError(recipe_arg+ ' is an invalid recipe')
if handler is None:
from calibre import ColoredFormatter
handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.DEBUG if opts.debug else logging.INFO if opts.verbose else logging.WARN)
handler.setFormatter(ColoredFormatter('%(levelname)s: %(message)s\n')) # The trailing newline is need because of the progress bar
logging.getLogger('feeds2disk').addHandler(handler)
recipe = recipe(opts, parser, notification) recipe = recipe(opts, parser, notification)
if not os.path.exists(recipe.output_dir): if not os.path.exists(recipe.output_dir):
os.makedirs(recipe.output_dir) os.makedirs(recipe.output_dir)
recipe.download(for_lrf=True) recipe.download(for_lrf=True)
return recipe return recipe
def main(args=sys.argv, notification=None, handler=None): def main(args=sys.argv, notification=None):
p = option_parser() p = option_parser()
opts, args = p.parse_args(args=args[1:]) opts, args = p.parse_args(args=args[1:])
if len(args) != 1 and opts.feeds is None: if len(args) != 1 and opts.feeds is None:
p.print_help() p.print_help()
return 1 return 1
recipe_arg = args[0] if len(args) > 0 else None recipe_arg = args[0] if len(args) > 0 else None
run_recipe(opts, recipe_arg, p, notification=notification, handler=handler) run_recipe(opts, recipe_arg, p, notification=notification)
return 0 return 0
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -7,7 +7,7 @@ Defines various abstract base classes that can be subclassed to create powerful
__docformat__ = "restructuredtext en" __docformat__ = "restructuredtext en"
import logging, os, cStringIO, time, traceback, re, urlparse, sys import os, time, traceback, re, urlparse, sys
from collections import defaultdict from collections import defaultdict
from functools import partial from functools import partial
from contextlib import nested, closing from contextlib import nested, closing
@ -27,6 +27,7 @@ from calibre.web.feeds import feed_from_xml, templates, feeds_from_index, Feed
from calibre.web.fetch.simple import option_parser as web2disk_option_parser from calibre.web.fetch.simple import option_parser as web2disk_option_parser
from calibre.web.fetch.simple import RecursiveFetcher from calibre.web.fetch.simple import RecursiveFetcher
from calibre.utils.threadpool import WorkRequest, ThreadPool, NoResultsPending from calibre.utils.threadpool import WorkRequest, ThreadPool, NoResultsPending
from calibre.utils.logging import Log
from calibre.ptempfile import PersistentTemporaryFile, \ from calibre.ptempfile import PersistentTemporaryFile, \
PersistentTemporaryDirectory PersistentTemporaryDirectory
@ -423,7 +424,7 @@ class BasicNewsRecipe(object):
''' '''
raise NotImplementedError raise NotImplementedError
def get_obfuscated_article(self, url, logger): def get_obfuscated_article(self, url):
''' '''
If you set :member:`articles_are_obfuscated` this method is called with If you set :member:`articles_are_obfuscated` this method is called with
every article URL. It should return the path to a file on the filesystem every article URL. It should return the path to a file on the filesystem
@ -443,6 +444,7 @@ class BasicNewsRecipe(object):
:param parser: Command line option parser. Used to intelligently merge options. :param parser: Command line option parser. Used to intelligently merge options.
:param progress_reporter: A Callable that takes two arguments: progress (a number between 0 and 1) and a string message. The message should be optional. :param progress_reporter: A Callable that takes two arguments: progress (a number between 0 and 1) and a string message. The message should be optional.
''' '''
self.log = Log()
if not isinstance(self.title, unicode): if not isinstance(self.title, unicode):
self.title = unicode(self.title, 'utf-8', 'replace') self.title = unicode(self.title, 'utf-8', 'replace')
@ -455,7 +457,6 @@ class BasicNewsRecipe(object):
if self.debug: if self.debug:
logging.getLogger('feeds2disk').setLevel(logging.DEBUG)
self.verbose = True self.verbose = True
self.report_progress = progress_reporter self.report_progress = progress_reporter
@ -560,20 +561,20 @@ class BasicNewsRecipe(object):
res = self.build_index() res = self.build_index()
self.report_progress(1, _('Download finished')) self.report_progress(1, _('Download finished'))
if self.failed_downloads: if self.failed_downloads:
self.log_warning(_('Failed to download the following articles:')) self.log.warning(_('Failed to download the following articles:'))
for feed, article, debug in self.failed_downloads: for feed, article, debug in self.failed_downloads:
self.log_warning(article.title+_(' from ')+feed.title) self.log.warning(article.title+_(' from ')+feed.title)
self.log_debug(article.url) self.log.debug(article.url)
self.log_debug(debug) self.log.debug(debug)
if self.partial_failures: if self.partial_failures:
self.log_warning(_('Failed to download parts of the following articles:')) self.log.warning(_('Failed to download parts of the following articles:'))
for feed, atitle, aurl, debug in self.partial_failures: for feed, atitle, aurl, debug in self.partial_failures:
self.log_warning(atitle + _(' from ') + feed) self.log.warning(atitle + _(' from ') + feed)
self.log_debug(aurl) self.log.debug(aurl)
self.log_warning(_('\tFailed links:')) self.log.warning(_('\tFailed links:'))
for l, tb in debug: for l, tb in debug:
self.log_warning(l) self.log.warning(l)
self.log_debug(tb) self.log.debug(tb)
return res return res
finally: finally:
self.cleanup() self.cleanup()
@ -636,20 +637,11 @@ class BasicNewsRecipe(object):
extra_css=self.extra_css).render(doctype='xhtml') extra_css=self.extra_css).render(doctype='xhtml')
def create_logger(self, feed_number, article_number): def _fetch_article(self, url, dir, f, a, num_of_feeds):
logger = logging.getLogger('feeds2disk.article_%d_%d'%(feed_number, article_number))
out = cStringIO.StringIO()
handler = logging.StreamHandler(out)
handler.setFormatter(logging.Formatter('%(levelname)s: %(message)s'))
handler.setLevel(logging.INFO if self.verbose else logging.WARNING)
if self.debug:
handler.setLevel(logging.DEBUG)
logger.addHandler(handler)
return logger, out
def _fetch_article(self, url, dir, logger, f, a, num_of_feeds):
self.web2disk_options.browser = self.get_browser() if self.multithreaded_fetch else self.browser self.web2disk_options.browser = self.get_browser() if self.multithreaded_fetch else self.browser
fetcher = RecursiveFetcher(self.web2disk_options, logger, self.image_map, self.css_map, (url, f, a, num_of_feeds)) fetcher = RecursiveFetcher(self.web2disk_options, self.log,
self.image_map, self.css_map,
(url, f, a, num_of_feeds))
fetcher.base_dir = dir fetcher.base_dir = dir
fetcher.current_dir = dir fetcher.current_dir = dir
fetcher.show_progress = False fetcher.show_progress = False
@ -661,21 +653,21 @@ class BasicNewsRecipe(object):
raise Exception(_('Could not fetch article. Run with --debug to see the reason')) raise Exception(_('Could not fetch article. Run with --debug to see the reason'))
return res, path, failures return res, path, failures
def fetch_article(self, url, dir, logger, f, a, num_of_feeds): def fetch_article(self, url, dir, f, a, num_of_feeds):
return self._fetch_article(url, dir, logger, f, a, num_of_feeds) return self._fetch_article(url, dir, f, a, num_of_feeds)
def fetch_obfuscated_article(self, url, dir, logger, f, a, num_of_feeds): def fetch_obfuscated_article(self, url, dir, f, a, num_of_feeds):
path = os.path.abspath(self.get_obfuscated_article(url, logger)) path = os.path.abspath(self.get_obfuscated_article(url))
url = ('file:'+path) if iswindows else ('file://'+path) url = ('file:'+path) if iswindows else ('file://'+path)
return self._fetch_article(url, dir, logger, f, a, num_of_feeds) return self._fetch_article(url, dir, f, a, num_of_feeds)
def fetch_embedded_article(self, article, dir, logger, f, a, num_of_feeds): def fetch_embedded_article(self, article, dir, f, a, num_of_feeds):
templ = templates.EmbeddedContent() templ = templates.EmbeddedContent()
raw = templ.generate(article).render('html') raw = templ.generate(article).render('html')
with PersistentTemporaryFile('_feeds2disk.html') as pt: with PersistentTemporaryFile('_feeds2disk.html') as pt:
pt.write(raw) pt.write(raw)
url = ('file:'+pt.name) if iswindows else ('file://'+pt.name) url = ('file:'+pt.name) if iswindows else ('file://'+pt.name)
return self._fetch_article(url, dir, logger, f, a, num_of_feeds) return self._fetch_article(url, dir, f, a, num_of_feeds)
def build_index(self): def build_index(self):
@ -716,7 +708,6 @@ class BasicNewsRecipe(object):
art_dir = os.path.join(feed_dir, 'article_%d'%a) art_dir = os.path.join(feed_dir, 'article_%d'%a)
if not os.path.isdir(art_dir): if not os.path.isdir(art_dir):
os.makedirs(art_dir) os.makedirs(art_dir)
logger, stream = self.create_logger(f, a)
try: try:
url = self.print_version(article.url) url = self.print_version(article.url)
except NotImplementedError: except NotImplementedError:
@ -726,10 +717,9 @@ class BasicNewsRecipe(object):
func, arg = (self.fetch_embedded_article, article) if self.use_embedded_content else \ func, arg = (self.fetch_embedded_article, article) if self.use_embedded_content else \
((self.fetch_obfuscated_article if self.articles_are_obfuscated \ ((self.fetch_obfuscated_article if self.articles_are_obfuscated \
else self.fetch_article), url) else self.fetch_article), url)
req = WorkRequest(func, (arg, art_dir, logger, f, a, len(feed)), req = WorkRequest(func, (arg, art_dir, f, a, len(feed)),
{}, (f, a), self.article_downloaded, {}, (f, a), self.article_downloaded,
self.error_in_article_download) self.error_in_article_download)
req.stream = stream
req.feed = feed req.feed = feed
req.article = article req.article = article
req.feed_dir = feed_dir req.feed_dir = feed_dir
@ -768,8 +758,8 @@ class BasicNewsRecipe(object):
cu = self.get_cover_url() cu = self.get_cover_url()
except Exception, err: except Exception, err:
cu = None cu = None
self.log_error(_('Could not download cover: %s')%str(err)) self.log.error(_('Could not download cover: %s')%str(err))
self.log_debug(traceback.format_exc()) self.log.debug(traceback.format_exc())
if cu is not None: if cu is not None:
ext = cu.rpartition('.')[-1] ext = cu.rpartition('.')[-1]
if '?' in ext: if '?' in ext:
@ -841,8 +831,8 @@ class BasicNewsRecipe(object):
f.write(html.encode('utf-8')) f.write(html.encode('utf-8'))
renderer = render_html(hf) renderer = render_html(hf)
if renderer.tb is not None: if renderer.tb is not None:
self.logger.warning('Failed to render default cover') self.log.warning('Failed to render default cover')
self.logger.debug(renderer.tb) self.log.debug(renderer.tb)
else: else:
cover_file.write(renderer.data) cover_file.write(renderer.data)
cover_file.flush() cover_file.flush()
@ -863,7 +853,7 @@ class BasicNewsRecipe(object):
manifest.append(os.path.join(dir, 'index.ncx')) manifest.append(os.path.join(dir, 'index.ncx'))
cpath = getattr(self, 'cover_path', None) cpath = getattr(self, 'cover_path', None)
if cpath is None: if cpath is None:
pf = PersistentTemporaryFile('_recipe_cover.jpg') pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
self.default_cover(pf) self.default_cover(pf)
cpath = pf.name cpath = pf.name
if cpath is not None and os.access(cpath, os.R_OK): if cpath is not None and os.access(cpath, os.R_OK):
@ -944,7 +934,7 @@ class BasicNewsRecipe(object):
a = request.requestID[1] a = request.requestID[1]
article = request.article article = request.article
self.log_debug(_('\nDownloaded article %s from %s\n%s')%(article.title, article.url, request.stream.getvalue().decode('utf-8', 'ignore'))) self.log.debug(_('\nDownloaded article %s from %s')%(article.title, article.url))
article.orig_url = article.url article.orig_url = article.url
article.url = 'article_%d/index.html'%a article.url = 'article_%d/index.html'%a
article.downloaded = True article.downloaded = True
@ -956,11 +946,11 @@ class BasicNewsRecipe(object):
def error_in_article_download(self, request, traceback): def error_in_article_download(self, request, traceback):
self.jobs_done += 1 self.jobs_done += 1
self.log_error(_('Failed to download article: %s from %s\n')%(request.article.title, request.article.url)) self.log.error(_('Failed to download article: %s from %s\n')%(request.article.title, request.article.url))
debug = request.stream.getvalue().decode('utf-8', 'ignore') debug = request.stream.getvalue().decode('utf-8', 'ignore')
self.log_debug(debug) self.log.debug(debug)
self.log_debug(traceback) self.log.debug(traceback)
self.log_debug('\n') self.log.debug('\n')
self.report_progress(float(self.jobs_done)/len(self.jobs), _('Article download failed: %s')%request.article.title) self.report_progress(float(self.jobs_done)/len(self.jobs), _('Article download failed: %s')%request.article.title)
self.failed_downloads.append((request.feed, request.article, debug)) self.failed_downloads.append((request.feed, request.article, debug))
@ -990,7 +980,7 @@ class BasicNewsRecipe(object):
feed.populate_from_preparsed_feed(msg, []) feed.populate_from_preparsed_feed(msg, [])
feed.description = unicode(err) feed.description = unicode(err)
parsed_feeds.append(feed) parsed_feeds.append(feed)
self.log_exception(msg) self.log.exception(msg)
return parsed_feeds return parsed_feeds
@ -1033,6 +1023,28 @@ class BasicNewsRecipe(object):
nmassage.extend(entity_replace) nmassage.extend(entity_replace)
return BeautifulSoup(raw, markupMassage=nmassage) return BeautifulSoup(raw, markupMassage=nmassage)
@classmethod
def adeify_images(cls, soup):
'''
If your recipe when converted to EPUB has problems with images when
viewed in Adobe Digital Editions, call this method from within
:method:`postprocess_html`.
'''
for item in soup.findAll('img'):
for attrib in ['height','width','border','align','style']:
if item.has_key(attrib):
del item[attrib]
oldParent = item.parent
myIndex = oldParent.contents.index(item)
item.extract()
divtag = Tag(soup,'div')
brtag = Tag(soup,'br')
oldParent.insert(myIndex,divtag)
divtag.append(item)
divtag.append(brtag)
return soup
class CustomIndexRecipe(BasicNewsRecipe): class CustomIndexRecipe(BasicNewsRecipe):
def custom_index(self): def custom_index(self):
@ -1057,7 +1069,7 @@ class CustomIndexRecipe(BasicNewsRecipe):
index = os.path.abspath(self.custom_index()) index = os.path.abspath(self.custom_index())
url = 'file:'+index if iswindows else 'file://'+index url = 'file:'+index if iswindows else 'file://'+index
self.web2disk_options.browser = self.browser self.web2disk_options.browser = self.browser
fetcher = RecursiveFetcher(self.web2disk_options, self.logger) fetcher = RecursiveFetcher(self.web2disk_options, self.log)
fetcher.base_dir = self.output_dir fetcher.base_dir = self.output_dir
fetcher.current_dir = self.output_dir fetcher.current_dir = self.output_dir
fetcher.show_progress = False fetcher.show_progress = False
@ -1069,7 +1081,7 @@ class AutomaticNewsRecipe(BasicNewsRecipe):
keep_only_tags = [dict(name=['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'])] keep_only_tags = [dict(name=['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'])]
def fetch_embedded_article(self, article, dir, logger, f, a, num_of_feeds): def fetch_embedded_article(self, article, dir, f, a, num_of_feeds):
if self.use_embedded_content: if self.use_embedded_content:
self.web2disk_options.keep_only_tags = [] self.web2disk_options.keep_only_tags = []
return BasicNewsRecipe.fetch_embedded_article(self, article, dir, logger, f, a, num_of_feeds) return BasicNewsRecipe.fetch_embedded_article(self, article, dir, f, a, num_of_feeds)

View File

@ -8,7 +8,7 @@ recipe_modules = ['recipe_' + r for r in (
'newsweek', 'atlantic', 'economist', 'portfolio', 'the_register', 'newsweek', 'atlantic', 'economist', 'portfolio', 'the_register',
'usatoday', 'outlook_india', 'bbc', 'greader', 'wsj', 'usatoday', 'outlook_india', 'bbc', 'greader', 'wsj',
'wired', 'globe_and_mail', 'smh', 'espn', 'business_week', 'miami_herald', 'wired', 'globe_and_mail', 'smh', 'espn', 'business_week', 'miami_herald',
'ars_technica', 'upi', 'new_yorker', 'irish_times', 'iht', 'lanacion', 'ars_technica', 'upi', 'new_yorker', 'irish_times', 'lanacion',
'discover_magazine', 'scientific_american', 'new_york_review_of_books', 'discover_magazine', 'scientific_american', 'new_york_review_of_books',
'daily_telegraph', 'guardian', 'el_pais', 'new_scientist', 'b92', 'daily_telegraph', 'guardian', 'el_pais', 'new_scientist', 'b92',
'politika', 'moscow_times', 'latimes', 'japan_times', 'san_fran_chronicle', 'politika', 'moscow_times', 'latimes', 'japan_times', 'san_fran_chronicle',
@ -37,7 +37,8 @@ recipe_modules = ['recipe_' + r for r in (
'new_york_review_of_books_no_sub', 'politico', 'adventuregamers', 'new_york_review_of_books_no_sub', 'politico', 'adventuregamers',
'mondedurable', 'instapaper', 'dnevnik_cro', 'vecernji_list', 'mondedurable', 'instapaper', 'dnevnik_cro', 'vecernji_list',
'nacional_cro', '24sata', 'dnevni_avaz', 'glas_srpske', '24sata_rs', 'nacional_cro', '24sata', 'dnevni_avaz', 'glas_srpske', '24sata_rs',
'krstarica', 'krstarica_en', 'tanjug', 'laprensa_ni', 'krstarica', 'krstarica_en', 'tanjug', 'laprensa_ni', 'azstarnet',
'corriere_della_sera_it', 'corriere_della_sera_en', 'msdnmag_en',
)] )]
import re, imp, inspect, time, os import re, imp, inspect, time, os

View File

@ -9,6 +9,7 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
import re import re
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class Ser24Sata(BasicNewsRecipe): class Ser24Sata(BasicNewsRecipe):
title = '24 Sata - Sr' title = '24 Sata - Sr'
@ -39,14 +40,30 @@ class Ser24Sata(BasicNewsRecipe):
feeds = [(u'Vesti Dana', u'http://www.24sata.rs/rss.php')] feeds = [(u'Vesti Dana', u'http://www.24sata.rs/rss.php')]
def cleanup_image_tags(self,soup):
for item in soup.findAll('img'):
for attrib in ['height','width','border','align']:
if item.has_key(attrib):
del item[attrib]
oldParent = item.parent
myIndex = oldParent.contents.index(item)
item.extract()
divtag = Tag(soup,'div')
brtag = Tag(soup,'br')
oldParent.insert(myIndex,divtag)
divtag.append(item)
divtag.append(brtag)
return soup
def preprocess_html(self, soup): def preprocess_html(self, soup):
soup.html['xml:lang'] = 'sr-Latn-RS' soup.html['xml:lang'] = 'sr-Latn-RS'
soup.html['lang'] = 'sr-Latn-RS' soup.html['lang'] = 'sr-Latn-RS'
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">' mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
soup.head.insert(0,mtag) soup.head.insert(0,mtag)
return soup return self.cleanup_image_tags(soup)
def print_version(self, url): def print_version(self, url):
article, sep, rest = url.partition('#') article, sep, rest = url.partition('#')
return article.replace('/show.php','/_print.php') article_base, sep2, article_id = article.partition('id=')
return 'http://www.24sata.co.rs/_print.php?id=' + article_id

View File

@ -0,0 +1,63 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
www.azstarnet.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Azstarnet(BasicNewsRecipe):
title = 'Arizona Daily Star'
__author__ = 'Darko Miletic'
description = 'news from Arizona'
publisher = 'azstarnet.com'
category = 'news, politics, Arizona, USA'
delay = 1
oldest_article = 1
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'utf-8'
needs_subscription = True
remove_javascript = True
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
def get_browser(self):
br = BasicNewsRecipe.get_browser()
if self.username is not None and self.password is not None:
br.open('http://azstarnet.com/registration/retro.php')
br.select_form(nr=1)
br['email'] = self.username
br['pass' ] = self.password
br.submit()
return br
keep_only_tags = [dict(name='div', attrs={'id':'storycontent'})]
remove_tags = [
dict(name=['object','link','iframe','base','img'])
,dict(name='div',attrs={'class':'bannerinstory'})
]
feeds = [(u'Tucson Region', u'http://rss.azstarnet.com/index.php?site=metro')]
def preprocess_html(self, soup):
soup.html['dir' ] = 'ltr'
soup.html['lang'] = 'en-US'
mtag = '\n<meta http-equiv="Content-Language" content="en-US"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -8,11 +8,12 @@ blic.rs
import re import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class Blic(BasicNewsRecipe): class Blic(BasicNewsRecipe):
title = u'Blic' title = 'Blic'
__author__ = u'Darko Miletic' __author__ = 'Darko Miletic'
description = u'Blic.co.yu online verzija najtiraznije novine u Srbiji donosi najnovije vesti iz Srbije i sveta, komentare, politicke analize, poslovne i ekonomske vesti, vesti iz regiona, intervjue, informacije iz kulture, reportaze, pokriva sve sportske dogadjaje, detaljan tv program, nagradne igre, zabavu, fenomenalni Blic strip, dnevni horoskop, arhivu svih dogadjaja' description = 'Blic.co.yu online verzija najtiraznije novine u Srbiji donosi najnovije vesti iz Srbije i sveta, komentare, politicke analize, poslovne i ekonomske vesti, vesti iz regiona, intervjue, informacije iz kulture, reportaze, pokriva sve sportske dogadjaje, detaljan tv program, nagradne igre, zabavu, fenomenalni Blic strip, dnevni horoskop, arhivu svih dogadjaja'
publisher = 'RINGIER d.o.o.' publisher = 'RINGIER d.o.o.'
category = 'news, politics, Serbia' category = 'news, politics, Serbia'
oldest_article = 2 oldest_article = 2
@ -21,7 +22,7 @@ class Blic(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
language = _('Serbian') language = _('Serbian')
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}' extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} '
html2lrf_options = [ html2lrf_options = [
'--comment' , description '--comment' , description
@ -30,7 +31,7 @@ class Blic(BasicNewsRecipe):
, '--ignore-tables' , '--ignore-tables'
] ]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} "'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -44,10 +45,26 @@ class Blic(BasicNewsRecipe):
start_url, question, rest_url = url.partition('?') start_url, question, rest_url = url.partition('?')
return u'http://www.blic.rs/_print.php?' + rest_url return u'http://www.blic.rs/_print.php?' + rest_url
def cleanup_image_tags(self,soup):
for item in soup.findAll('img'):
for attrib in ['height','width','border','align']:
if item.has_key(attrib):
del item[attrib]
oldParent = item.parent
myIndex = oldParent.contents.index(item)
item.extract()
divtag = Tag(soup,'div')
brtag = Tag(soup,'br')
oldParent.insert(myIndex,divtag)
divtag.append(item)
divtag.append(brtag)
return soup
def preprocess_html(self, soup): def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>' mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
soup.head.insert(0,mtag) soup.head.insert(0,mtag)
for item in soup.findAll(style=True): for item in soup.findAll(style=True):
del item['style'] del item['style']
return soup return self.cleanup_image_tags(soup)

View File

@ -0,0 +1,45 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
www.corriere.it/english
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Corriere_en(BasicNewsRecipe):
title = 'Corriere della Sera in English'
__author__ = 'Darko Miletic'
description = 'News from Milan and Italy'
oldest_article = 15
publisher = 'Corriere della Sera'
category = 'news, politics, Italy'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'cp1252'
remove_javascript = True
language = _('English')
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
keep_only_tags = [dict(name='div', attrs={'class':['news-dettaglio article','article']})]
remove_tags = [
dict(name=['base','object','link','embed','img'])
,dict(name='div', attrs={'class':'news-goback'})
,dict(name='ul', attrs={'class':'toolbar'})
]
remove_tags_after = dict(name='p', attrs={'class':'footnotes'})
feeds = [(u'Italian Life', u'http://www.corriere.it/rss/english.xml')]

View File

@ -0,0 +1,55 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
www.corriere.it
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Corriere_it(BasicNewsRecipe):
title = 'Corriere della Sera'
__author__ = 'Darko Miletic'
description = 'News from Milan and Italy'
oldest_article = 7
publisher = 'Corriere della Sera'
category = 'news, politics, Italy'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'cp1252'
remove_javascript = True
language = _('Italian')
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
keep_only_tags = [dict(name='div', attrs={'class':['news-dettaglio article','article']})]
remove_tags = [
dict(name=['base','object','link','embed','img'])
,dict(name='div', attrs={'class':'news-goback'})
,dict(name='ul', attrs={'class':'toolbar'})
]
remove_tags_after = dict(name='p', attrs={'class':'footnotes'})
feeds = [
(u'Ultimora' , u'http://www.corriere.it/rss/ultimora.xml' )
,(u'Cronache' , u'http://www.corriere.it/rss/cronache.xml' )
,(u'Economia' , u'http://www.corriere.it/rss/economia.xml' )
,(u'Editoriali', u'http://www.corriere.it/rss/editoriali.xml')
,(u'Esteri' , u'http://www.corriere.it/rss/esteri.xml' )
,(u'Politica' , u'http://www.corriere.it/rss/politica.xml' )
,(u'Salute' , u'http://www.corriere.it/rss/salute.xml' )
,(u'Scienze' , u'http://www.corriere.it/rss/scienze.xml' )
,(u'Spettacolo', u'http://www.corriere.it/rss/spettacoli.xml')
,(u'Sport' , u'http://www.corriere.it/rss/sport.xml' )
]

View File

@ -12,7 +12,7 @@ from calibre.ptempfile import PersistentTemporaryFile
class InternationalHeraldTribune(BasicNewsRecipe): class InternationalHeraldTribune(BasicNewsRecipe):
title = u'The International Herald Tribune' title = u'The International Herald Tribune'
__author__ = 'Derry FitzGerald' __author__ = 'Derry FitzGerald'
language = _('English') language = _('English')
oldest_article = 1 oldest_article = 1
max_articles_per_feed = 10 max_articles_per_feed = 10
no_stylesheets = True no_stylesheets = True
@ -20,13 +20,13 @@ class InternationalHeraldTribune(BasicNewsRecipe):
remove_tags = [dict(name='div', attrs={'class':'footer'}), remove_tags = [dict(name='div', attrs={'class':'footer'}),
dict(name=['form'])] dict(name=['form'])]
preprocess_regexps = [ preprocess_regexps = [
(re.compile(r'<!-- webtrends.*', re.DOTALL), (re.compile(r'<!-- webtrends.*', re.DOTALL),
lambda m:'</body></html>') lambda m:'</body></html>')
] ]
extra_css = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt }' extra_css = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt }'
feeds = [ feeds = [
(u'Frontpage', u'http://www.iht.com/rss/frontpage.xml'), (u'Frontpage', u'http://www.iht.com/rss/frontpage.xml'),
(u'Business', u'http://www.iht.com/rss/business.xml'), (u'Business', u'http://www.iht.com/rss/business.xml'),
(u'Americas', u'http://www.iht.com/rss/america.xml'), (u'Americas', u'http://www.iht.com/rss/america.xml'),
(u'Europe', u'http://www.iht.com/rss/europe.xml'), (u'Europe', u'http://www.iht.com/rss/europe.xml'),
@ -45,7 +45,7 @@ class InternationalHeraldTribune(BasicNewsRecipe):
] ]
temp_files = [] temp_files = []
articles_are_obfuscated = True articles_are_obfuscated = True
def get_obfuscated_article(self, url, logger): def get_obfuscated_article(self, url, logger):
br = self.get_browser() br = self.get_browser()
br.open(url) br.open(url)
@ -55,4 +55,4 @@ class InternationalHeraldTribune(BasicNewsRecipe):
self.temp_files.append(PersistentTemporaryFile('_iht.html')) self.temp_files.append(PersistentTemporaryFile('_iht.html'))
self.temp_files[-1].write(html) self.temp_files[-1].write(html)
self.temp_files[-1].close() self.temp_files[-1].close()
return self.temp_files[-1].name return self.temp_files[-1].name

View File

@ -0,0 +1,61 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
msdn.microsoft.com/en-us/magazine
'''
from calibre.web.feeds.news import BasicNewsRecipe
class MSDNMagazine_en(BasicNewsRecipe):
title = 'MSDN Magazine'
__author__ = 'Darko Miletic'
description = 'The Microsoft Journal for Developers'
publisher = 'Microsoft Press'
category = 'news, IT, Microsoft, programming, windows'
oldest_article = 31
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'utf-8'
remove_javascript = True
current_issue = 'http://msdn.microsoft.com/en-us/magazine/default.aspx'
language = _('English')
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
feeds = [(u'Articles', u'http://msdn.microsoft.com/en-us/magazine/rss/default.aspx?z=z&iss=1')]
keep_only_tags = [dict(name='div', attrs={'class':'topic'})]
remove_tags = [
dict(name=['object','link','base','table'])
,dict(name='div', attrs={'class':'MTPS_CollapsibleRegion'})
]
def get_cover_url(self):
cover_url = None
soup = self.index_to_soup(self.current_issue)
link_item = soup.find('span',attrs={'class':'ContentsImageSpacer'})
if link_item:
imgt = link_item.find('img')
if imgt:
cover_url = imgt['src']
return cover_url
def preprocess_html(self, soup):
for item in soup.findAll('div',attrs={'class':['FeatureSmallHead','ColumnTypeSubTitle']}):
item.name="h2"
for item in soup.findAll('div',attrs={'class':['FeatureHeadline','ColumnTypeTitle']}):
item.name="h1"
for item in soup.findAll('div',attrs={'class':'ArticleTypeTitle'}):
item.name="h3"
return soup

View File

@ -9,11 +9,12 @@ newyorker.com
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class NewYorker(BasicNewsRecipe): class NewYorker(BasicNewsRecipe):
title = u'The New Yorker' title = u'The New Yorker'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'The best of US journalism' description = 'The best of US journalism'
oldest_article = 7 oldest_article = 7
language = _('English') language = _('English')
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = False no_stylesheets = False
use_embedded_content = False use_embedded_content = False
@ -24,7 +25,7 @@ class NewYorker(BasicNewsRecipe):
.calibre_recipe_title {font-size:normal} .calibre_recipe_title {font-size:normal}
.calibre_feed_description {font-size:xx-small} .calibre_feed_description {font-size:xx-small}
''' '''
keep_only_tags = [ keep_only_tags = [
dict(name='div' , attrs={'id':'printbody' }) dict(name='div' , attrs={'id':'printbody' })
@ -41,3 +42,12 @@ class NewYorker(BasicNewsRecipe):
def print_version(self, url): def print_version(self, url):
return url + '?printable=true' return url + '?printable=true'
def postprocess_html(self, soup, x):
body = soup.find('body')
if body:
html = soup.find('html')
if html:
body.extract()
html.insert(-1, body)
return soup

View File

@ -8,9 +8,10 @@ nspm.rs
import re import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class Nspm(BasicNewsRecipe): class Nspm(BasicNewsRecipe):
title = u'Nova srpska politicka misao' title = 'Nova srpska politicka misao'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'Casopis za politicku teoriju i drustvena istrazivanja' description = 'Casopis za politicku teoriju i drustvena istrazivanja'
publisher = 'NSPM' publisher = 'NSPM'
@ -36,7 +37,7 @@ class Nspm(BasicNewsRecipe):
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
remove_tags = [ remove_tags = [
dict(name=['a','img','link','object','embed']) dict(name=['link','object','embed'])
,dict(name='td', attrs={'class':'buttonheading'}) ,dict(name='td', attrs={'class':'buttonheading'})
] ]
@ -50,6 +51,21 @@ class Nspm(BasicNewsRecipe):
def print_version(self, url): def print_version(self, url):
return url.replace('.html','/stampa.html') return url.replace('.html','/stampa.html')
def cleanup_image_tags(self,soup):
for item in soup.findAll('img'):
for attrib in ['height','width','border','align']:
if item.has_key(attrib):
del item[attrib]
oldParent = item.parent
myIndex = oldParent.contents.index(item)
item.extract()
divtag = Tag(soup,'div')
brtag = Tag(soup,'br')
oldParent.insert(myIndex,divtag)
divtag.append(item)
divtag.append(brtag)
return soup
def preprocess_html(self, soup): def preprocess_html(self, soup):
lng = 'sr-Latn-RS' lng = 'sr-Latn-RS'
soup.html['xml:lang'] = lng soup.html['xml:lang'] = lng
@ -59,4 +75,4 @@ class Nspm(BasicNewsRecipe):
ftag['content'] = lng ftag['content'] = lng
for item in soup.findAll(style=True): for item in soup.findAll(style=True):
del item['style'] del item['style']
return soup return self.cleanup_image_tags(soup)

View File

@ -1,38 +1,47 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
''' '''
tomshardware.com tomshardware.com/us
''' '''
import urllib
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
class Tomshardware(BasicNewsRecipe): class Tomshardware(BasicNewsRecipe):
title = "Tom's Hardware US"
__author__ = 'Darko Miletic'
description = 'Hardware reviews and News'
publisher = "Tom's Hardware"
category = 'news, IT, hardware, USA'
no_stylesheets = True
needs_subscription = True
language = _('English')
INDEX = 'http://www.tomshardware.com'
LOGIN = INDEX + '/membres/'
remove_javascript = True
use_embedded_content= False
title = "Tom's Hardware US" html2lrf_options = [
__author__ = 'Darko Miletic' '--comment', description
description = 'Hardware reviews and News' , '--category', category
no_stylesheets = True , '--publisher', publisher
needs_subscription = True ]
language = _('English')
INDEX = 'http://www.tomshardware.com'
LOGIN = 'http://www.tomshardware.com/membres/?r=%2Fus%2F#loginForm'
cover_url = 'http://img.bestofmedia.com/img/tomshardware/design/tomshardware.jpg'
html2lrf_options = [ '--comment' , description html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
, '--category' , 'hardware,news'
, '--base-font-size', '10'
]
def get_browser(self): def get_browser(self):
br = BasicNewsRecipe.get_browser() br = BasicNewsRecipe.get_browser()
br.open(self.INDEX+'/us/')
if self.username is not None and self.password is not None: if self.username is not None and self.password is not None:
br.open(self.LOGIN) data = urllib.urlencode({ 'action':'login_action'
br.select_form(name='connexion') ,'r':self.INDEX+'/us/'
br['login'] = self.username ,'login':self.username
br['mdp' ] = self.password ,'mdp':self.password
br.submit() })
br.open(self.LOGIN,data)
return br return br
remove_tags = [ remove_tags = [
@ -41,18 +50,30 @@ class Tomshardware(BasicNewsRecipe):
] ]
feeds = [ feeds = [
(u'Latest Articles', u'http://www.tomshardware.com/feeds/atom/tom-s-hardware-us,18-2.xml') (u'Latest Articles', u'http://www.tomshardware.com/feeds/atom/tom-s-hardware-us,18-2.xml' )
,(u'Latest News' , u'http://www.tomshardware.com/feeds/atom/tom-s-hardware-us,18-1.xml') ,(u'Latest News' , u'http://www.tomshardware.com/feeds/atom/tom-s-hardware-us,18-1.xml')
] ]
def print_version(self, url): def print_version(self, url):
main, sep, rest = url.rpartition('.html') main, sep, rest = url.rpartition('.html')
rmain, rsep, article_id = main.rpartition(',') rmain, rsep, article_id = main.rpartition(',')
tmain, tsep, trest = rmain.rpartition('/reviews/') tmain, tsep, trest = rmain.rpartition('/reviews/')
rind = 'http://www.tomshardware.com/news_print.php?p1='
if tsep: if tsep:
return 'http://www.tomshardware.com/review_print.php?p1=' + article_id rind = 'http://www.tomshardware.com/review_print.php?p1='
return 'http://www.tomshardware.com/news_print.php?p1=' + article_id return rind + article_id
def cleanup_image_tags(self,soup):
for item in soup.findAll('img'):
for attrib in ['height','width','border','align']:
if item.has_key(attrib):
del item[attrib]
return soup
def preprocess_html(self, soup): def preprocess_html(self, soup):
del(soup.body['onload']) del(soup.body['onload'])
return soup for item in soup.findAll(style=True):
del item['style']
for it in soup.findAll('span'):
it.name="div"
return self.cleanup_image_tags(soup)

View File

@ -11,13 +11,13 @@ class WashingtonPost(BasicNewsRecipe):
max_articles_per_feed = 20 max_articles_per_feed = 20
language = _('English') language = _('English')
remove_javascript = True
remove_javascript = True
feeds = [ ('Today\'s Highlights', 'http://www.washingtonpost.com/wp-dyn/rss/linkset/2005/03/24/LI2005032400102.xml'), feeds = [ ('Today\'s Highlights', 'http://www.washingtonpost.com/wp-dyn/rss/linkset/2005/03/24/LI2005032400102.xml'),
('Politics', 'http://www.washingtonpost.com/wp-dyn/rss/politics/index.xml'), ('Politics', 'http://www.washingtonpost.com/wp-dyn/rss/politics/index.xml'),
('Nation', 'http://www.www.washingtonpost.com/wp-dyn/rss/nation/index.xml'), ('Nation', 'http://www.washingtonpost.com/wp-dyn/rss/nation/index.xml'),
('World', 'http://www.washingtonpost.com/wp-dyn/rss/world/index.xml'), ('World', 'http://www.washingtonpost.com/wp-dyn/rss/world/index.xml'),
('Business', 'http://www.washingtonpost.com/wp-dyn/rss/business/index.xml'), ('Business', 'http://www.washingtonpost.com/wp-dyn/rss/business/index.xml'),
('Technology', 'http://www.washingtonpost.com/wp-dyn/rss/technology/index.xml'), ('Technology', 'http://www.washingtonpost.com/wp-dyn/rss/technology/index.xml'),
@ -25,7 +25,7 @@ class WashingtonPost(BasicNewsRecipe):
('Education', 'http://www.washingtonpost.com/wp-dyn/rss/education/index.xml'), ('Education', 'http://www.washingtonpost.com/wp-dyn/rss/education/index.xml'),
('Editorials', 'http://www.washingtonpost.com/wp-dyn/rss/linkset/2005/05/30/LI2005053000331.xml'), ('Editorials', 'http://www.washingtonpost.com/wp-dyn/rss/linkset/2005/05/30/LI2005053000331.xml'),
] ]
remove_tags = [{'id':['pfmnav', 'ArticleCommentsWrapper']}] remove_tags = [{'id':['pfmnav', 'ArticleCommentsWrapper']}]
@ -34,7 +34,7 @@ class WashingtonPost(BasicNewsRecipe):
def print_version(self, url): def print_version(self, url):
return url.rpartition('.')[0] + '_pf.html' return url.rpartition('.')[0] + '_pf.html'
def postprocess_html(self, soup, first): def postprocess_html(self, soup, first):
for div in soup.findAll(name='div', style=re.compile('margin')): for div in soup.findAll(name='div', style=re.compile('margin')):
div['style'] = '' div['style'] = ''

View File

@ -7,18 +7,19 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
Fetch a webpage and its links recursively. The webpages are saved to disk in Fetch a webpage and its links recursively. The webpages are saved to disk in
UTF-8 encoding with any charset declarations removed. UTF-8 encoding with any charset declarations removed.
''' '''
import sys, socket, os, urlparse, logging, re, time, copy, urllib2, threading, traceback import sys, socket, os, urlparse, re, time, copy, urllib2, threading, traceback
from urllib import url2pathname, quote from urllib import url2pathname, quote
from threading import RLock from threading import RLock
from httplib import responses from httplib import responses
from PIL import Image from PIL import Image
from cStringIO import StringIO from cStringIO import StringIO
from calibre import setup_cli_handlers, browser, sanitize_file_name, \ from calibre import browser, sanitize_file_name, \
relpath, unicode_path relpath, unicode_path
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
from calibre.ebooks.chardet import xml_to_unicode from calibre.ebooks.chardet import xml_to_unicode
from calibre.utils.config import OptionParser from calibre.utils.config import OptionParser
from calibre.utils.logging import Log
class FetchError(Exception): class FetchError(Exception):
pass pass
@ -28,10 +29,10 @@ class closing(object):
def __init__(self, thing): def __init__(self, thing):
self.thing = thing self.thing = thing
def __enter__(self): def __enter__(self):
return self.thing return self.thing
def __exit__(self, *exc_info): def __exit__(self, *exc_info):
try: try:
self.thing.close() self.thing.close()
@ -55,47 +56,48 @@ def save_soup(soup, target):
for meta in metas: for meta in metas:
if 'charset' in meta.get('content', '').lower(): if 'charset' in meta.get('content', '').lower():
meta.replaceWith(nm) meta.replaceWith(nm)
selfdir = os.path.dirname(target) selfdir = os.path.dirname(target)
for tag in soup.findAll(['img', 'link', 'a']): for tag in soup.findAll(['img', 'link', 'a']):
for key in ('src', 'href'): for key in ('src', 'href'):
path = tag.get(key, None) path = tag.get(key, None)
if path and os.path.isfile(path) and os.path.exists(path) and os.path.isabs(path): if path and os.path.isfile(path) and os.path.exists(path) and os.path.isabs(path):
tag[key] = unicode_path(relpath(path, selfdir).replace(os.sep, '/')) tag[key] = unicode_path(relpath(path, selfdir).replace(os.sep, '/'))
html = unicode(soup) html = unicode(soup)
with open(target, 'wb') as f: with open(target, 'wb') as f:
f.write(html.encode('utf-8')) f.write(html.encode('utf-8'))
class response(str): class response(str):
def __new__(cls, *args): def __new__(cls, *args):
obj = super(response, cls).__new__(cls, *args) obj = super(response, cls).__new__(cls, *args)
obj.newurl = None obj.newurl = None
return obj return obj
class DummyLock(object): class DummyLock(object):
def __enter__(self, *args): return self def __enter__(self, *args): return self
def __exit__(self, *args): pass def __exit__(self, *args): pass
class RecursiveFetcher(object): class RecursiveFetcher(object):
LINK_FILTER = tuple(re.compile(i, re.IGNORECASE) for i in LINK_FILTER = tuple(re.compile(i, re.IGNORECASE) for i in
('.exe\s*$', '.mp3\s*$', '.ogg\s*$', '^\s*mailto:', '^\s*$')) ('.exe\s*$', '.mp3\s*$', '.ogg\s*$', '^\s*mailto:', '^\s*$'))
#ADBLOCK_FILTER = tuple(re.compile(i, re.IGNORECASE) for it in #ADBLOCK_FILTER = tuple(re.compile(i, re.IGNORECASE) for it in
# ( # (
# #
# ) # )
# ) # )
CSS_IMPORT_PATTERN = re.compile(r'\@import\s+url\((.*?)\)', re.IGNORECASE) CSS_IMPORT_PATTERN = re.compile(r'\@import\s+url\((.*?)\)', re.IGNORECASE)
default_timeout = socket.getdefaulttimeout() # Needed here as it is used in __del__ default_timeout = socket.getdefaulttimeout() # Needed here as it is used in __del__
DUMMY_LOCK = DummyLock() DUMMY_LOCK = DummyLock()
def __init__(self, options, logger, image_map={}, css_map={}, job_info=None): def __init__(self, options, log, image_map={}, css_map={}, job_info=None):
self.base_dir = os.path.abspath(os.path.expanduser(options.dir)) self.base_dir = os.path.abspath(os.path.expanduser(options.dir))
if not os.path.exists(self.base_dir): if not os.path.exists(self.base_dir):
os.makedirs(self.base_dir) os.makedirs(self.base_dir)
self.log = log
self.default_timeout = socket.getdefaulttimeout() self.default_timeout = socket.getdefaulttimeout()
socket.setdefaulttimeout(options.timeout) socket.setdefaulttimeout(options.timeout)
self.verbose = options.verbose self.verbose = options.verbose
@ -122,19 +124,19 @@ class RecursiveFetcher(object):
self.remove_tags_after = getattr(options, 'remove_tags_after', None) self.remove_tags_after = getattr(options, 'remove_tags_after', None)
self.remove_tags_before = getattr(options, 'remove_tags_before', None) self.remove_tags_before = getattr(options, 'remove_tags_before', None)
self.keep_only_tags = getattr(options, 'keep_only_tags', []) self.keep_only_tags = getattr(options, 'keep_only_tags', [])
self.preprocess_html_ext = getattr(options, 'preprocess_html', lambda soup: soup) self.preprocess_html_ext = getattr(options, 'preprocess_html', lambda soup: soup)
self.postprocess_html_ext= getattr(options, 'postprocess_html', None) self.postprocess_html_ext= getattr(options, 'postprocess_html', None)
self.download_stylesheets = not options.no_stylesheets self.download_stylesheets = not options.no_stylesheets
self.show_progress = True self.show_progress = True
self.failed_links = [] self.failed_links = []
self.job_info = job_info self.job_info = job_info
def get_soup(self, src): def get_soup(self, src):
nmassage = copy.copy(BeautifulSoup.MARKUP_MASSAGE) nmassage = copy.copy(BeautifulSoup.MARKUP_MASSAGE)
nmassage.extend(self.preprocess_regexps) nmassage.extend(self.preprocess_regexps)
nmassage += [(re.compile(r'<!DOCTYPE .+?>', re.DOTALL), lambda m: '')] # Some websites have buggy doctype declarations that mess up beautifulsoup nmassage += [(re.compile(r'<!DOCTYPE .+?>', re.DOTALL), lambda m: '')] # Some websites have buggy doctype declarations that mess up beautifulsoup
soup = BeautifulSoup(xml_to_unicode(src, self.verbose, strip_encoding_pats=True)[0], markupMassage=nmassage) soup = BeautifulSoup(xml_to_unicode(src, self.verbose, strip_encoding_pats=True)[0], markupMassage=nmassage)
if self.keep_only_tags: if self.keep_only_tags:
body = Tag(soup, 'body') body = Tag(soup, 'body')
try: try:
@ -146,7 +148,7 @@ class RecursiveFetcher(object):
soup.find('body').replaceWith(body) soup.find('body').replaceWith(body)
except AttributeError: # soup has no body element except AttributeError: # soup has no body element
pass pass
def remove_beyond(tag, next): def remove_beyond(tag, next):
while tag is not None and tag.name != 'body': while tag is not None and tag.name != 'body':
after = getattr(tag, next) after = getattr(tag, next)
@ -155,31 +157,34 @@ class RecursiveFetcher(object):
after.extract() after.extract()
after = ns after = ns
tag = tag.parent tag = tag.parent
if self.remove_tags_after is not None: if self.remove_tags_after is not None:
rt = [self.remove_tags_after] if isinstance(self.remove_tags_after, dict) else self.remove_tags_after rt = [self.remove_tags_after] if isinstance(self.remove_tags_after, dict) else self.remove_tags_after
for spec in rt: for spec in rt:
tag = soup.find(**spec) tag = soup.find(**spec)
remove_beyond(tag, 'nextSibling') remove_beyond(tag, 'nextSibling')
if self.remove_tags_before is not None: if self.remove_tags_before is not None:
tag = soup.find(**self.remove_tags_before) tag = soup.find(**self.remove_tags_before)
remove_beyond(tag, 'previousSibling') remove_beyond(tag, 'previousSibling')
for kwds in self.remove_tags: for kwds in self.remove_tags:
for tag in soup.findAll(**kwds): for tag in soup.findAll(**kwds):
tag.extract() tag.extract()
return self.preprocess_html_ext(soup) return self.preprocess_html_ext(soup)
def fetch_url(self, url): def fetch_url(self, url):
data = None data = None
self.log_debug('Fetching %s', url) self.log.debug('Fetching', url)
delta = time.time() - self.last_fetch_at delta = time.time() - self.last_fetch_at
if delta < self.delay: if delta < self.delay:
time.sleep(delta) time.sleep(delta)
if re.search(r'\s+', url) is not None: if re.search(r'\s+|,', url) is not None:
url = quote(url) purl = list(urlparse.urlparse(url))
for i in range(2, 6):
purl[i] = quote(purl[i])
url = urlparse.urlunparse(purl)
with self.browser_lock: with self.browser_lock:
try: try:
with closing(self.browser.open(url)) as f: with closing(self.browser.open(url)) as f:
@ -190,43 +195,43 @@ class RecursiveFetcher(object):
raise FetchError, responses[err.code] raise FetchError, responses[err.code]
if getattr(err, 'reason', [0])[0] == 104 or \ if getattr(err, 'reason', [0])[0] == 104 or \
getattr(getattr(err, 'args', [None])[0], 'errno', None) == -2: # Connection reset by peer or Name or service not know getattr(getattr(err, 'args', [None])[0], 'errno', None) == -2: # Connection reset by peer or Name or service not know
self.log_debug('Temporary error, retrying in 1 second') self.log.debug('Temporary error, retrying in 1 second')
time.sleep(1) time.sleep(1)
with closing(self.browser.open(url)) as f: with closing(self.browser.open(url)) as f:
data = response(f.read()+f.read()) data = response(f.read()+f.read())
data.newurl = f.geturl() data.newurl = f.geturl()
else: else:
raise err raise err
finally: finally:
self.last_fetch_at = time.time() self.last_fetch_at = time.time()
return data return data
def start_fetch(self, url): def start_fetch(self, url):
soup = BeautifulSoup(u'<a href="'+url+'" />') soup = BeautifulSoup(u'<a href="'+url+'" />')
self.log_info('Downloading') self.log.debug('Downloading')
res = self.process_links(soup, url, 0, into_dir='') res = self.process_links(soup, url, 0, into_dir='')
self.log_info('%s saved to %s', url, res) self.log.debug('%s saved to %s'%( url, res))
return res return res
def is_link_ok(self, url): def is_link_ok(self, url):
for i in self.__class__.LINK_FILTER: for i in self.__class__.LINK_FILTER:
if i.search(url): if i.search(url):
return False return False
return True return True
def is_link_wanted(self, url): def is_link_wanted(self, url):
if self.filter_regexps: if self.filter_regexps:
for f in self.filter_regexps: for f in self.filter_regexps:
if f.search(url): if f.search(url):
return False return False
if self.match_regexps: if self.match_regexps:
for m in self.match_regexps: for m in self.match_regexps:
if m.search(url): if m.search(url):
return True return True
return False return False
return True return True
def process_stylesheets(self, soup, baseurl): def process_stylesheets(self, soup, baseurl):
diskpath = unicode_path(os.path.join(self.current_dir, 'stylesheets')) diskpath = unicode_path(os.path.join(self.current_dir, 'stylesheets'))
if not os.path.exists(diskpath): if not os.path.exists(diskpath):
@ -243,8 +248,7 @@ class RecursiveFetcher(object):
try: try:
data = self.fetch_url(iurl) data = self.fetch_url(iurl)
except Exception, err: except Exception, err:
self.log_debug('Could not fetch stylesheet %s', iurl) self.log.exception('Could not fetch stylesheet %s'% iurl)
self.log_debug('Error: %s', str(err), exc_info=True)
continue continue
stylepath = os.path.join(diskpath, 'style'+str(c)+'.css') stylepath = os.path.join(diskpath, 'style'+str(c)+'.css')
with self.stylemap_lock: with self.stylemap_lock:
@ -253,7 +257,7 @@ class RecursiveFetcher(object):
x.write(data) x.write(data)
tag['href'] = stylepath tag['href'] = stylepath
else: else:
for ns in tag.findAll(text=True): for ns in tag.findAll(text=True):
src = str(ns) src = str(ns)
m = self.__class__.CSS_IMPORT_PATTERN.search(src) m = self.__class__.CSS_IMPORT_PATTERN.search(src)
if m: if m:
@ -267,8 +271,7 @@ class RecursiveFetcher(object):
try: try:
data = self.fetch_url(iurl) data = self.fetch_url(iurl)
except Exception, err: except Exception, err:
self.log_warning('Could not fetch stylesheet %s', iurl) self.log.exception('Could not fetch stylesheet %s'% iurl)
self.log_debug('Error: %s', str(err), exc_info=True)
continue continue
c += 1 c += 1
stylepath = os.path.join(diskpath, 'style'+str(c)+'.css') stylepath = os.path.join(diskpath, 'style'+str(c)+'.css')
@ -277,9 +280,9 @@ class RecursiveFetcher(object):
with open(stylepath, 'wb') as x: with open(stylepath, 'wb') as x:
x.write(data) x.write(data)
ns.replaceWith(src.replace(m.group(1), stylepath)) ns.replaceWith(src.replace(m.group(1), stylepath))
def process_images(self, soup, baseurl): def process_images(self, soup, baseurl):
diskpath = unicode_path(os.path.join(self.current_dir, 'images')) diskpath = unicode_path(os.path.join(self.current_dir, 'images'))
if not os.path.exists(diskpath): if not os.path.exists(diskpath):
@ -291,9 +294,6 @@ class RecursiveFetcher(object):
iurl = self.image_url_processor(baseurl, iurl) iurl = self.image_url_processor(baseurl, iurl)
ext = os.path.splitext(iurl)[1] ext = os.path.splitext(iurl)[1]
ext = ext[:5] ext = ext[:5]
#if not ext:
# self.log_debug('Skipping extensionless image %s', iurl)
# continue
if not urlparse.urlsplit(iurl).scheme: if not urlparse.urlsplit(iurl).scheme:
iurl = urlparse.urljoin(baseurl, iurl, False) iurl = urlparse.urljoin(baseurl, iurl, False)
with self.imagemap_lock: with self.imagemap_lock:
@ -303,8 +303,7 @@ class RecursiveFetcher(object):
try: try:
data = self.fetch_url(iurl) data = self.fetch_url(iurl)
except Exception, err: except Exception, err:
self.log_warning('Could not fetch image %s', iurl) self.log.exception('Could not fetch image %s'% iurl)
self.log_debug('Error: %s', str(err), exc_info=True)
continue continue
c += 1 c += 1
fname = sanitize_file_name('img'+str(c)+ext) fname = sanitize_file_name('img'+str(c)+ext)
@ -322,7 +321,7 @@ class RecursiveFetcher(object):
traceback.print_exc() traceback.print_exc()
continue continue
def absurl(self, baseurl, tag, key, filter=True): def absurl(self, baseurl, tag, key, filter=True):
iurl = tag[key] iurl = tag[key]
parts = urlparse.urlsplit(iurl) parts = urlparse.urlsplit(iurl)
if not parts.netloc and not parts.path: if not parts.netloc and not parts.path:
@ -330,32 +329,32 @@ class RecursiveFetcher(object):
if not parts.scheme: if not parts.scheme:
iurl = urlparse.urljoin(baseurl, iurl, False) iurl = urlparse.urljoin(baseurl, iurl, False)
if not self.is_link_ok(iurl): if not self.is_link_ok(iurl):
self.log_debug('Skipping invalid link: %s', iurl) self.log.debug('Skipping invalid link:', iurl)
return None return None
if filter and not self.is_link_wanted(iurl): if filter and not self.is_link_wanted(iurl):
self.log_debug('Filtered link: '+iurl) self.log.debug('Filtered link: '+iurl)
return None return None
return iurl return iurl
def normurl(self, url): def normurl(self, url):
parts = list(urlparse.urlsplit(url)) parts = list(urlparse.urlsplit(url))
parts[4] = '' parts[4] = ''
return urlparse.urlunsplit(parts) return urlparse.urlunsplit(parts)
def localize_link(self, tag, key, path): def localize_link(self, tag, key, path):
parts = urlparse.urlsplit(tag[key]) parts = urlparse.urlsplit(tag[key])
suffix = '#'+parts.fragment if parts.fragment else '' suffix = '#'+parts.fragment if parts.fragment else ''
tag[key] = path+suffix tag[key] = path+suffix
def process_return_links(self, soup, baseurl): def process_return_links(self, soup, baseurl):
for tag in soup.findAll(lambda tag: tag.name.lower()=='a' and tag.has_key('href')): for tag in soup.findAll(lambda tag: tag.name.lower()=='a' and tag.has_key('href')):
iurl = self.absurl(baseurl, tag, 'href') iurl = self.absurl(baseurl, tag, 'href')
if not iurl: if not iurl:
continue continue
nurl = self.normurl(iurl) nurl = self.normurl(iurl)
if self.filemap.has_key(nurl): if self.filemap.has_key(nurl):
self.localize_link(tag, 'href', self.filemap[nurl]) self.localize_link(tag, 'href', self.filemap[nurl])
def process_links(self, soup, baseurl, recursion_level, into_dir='links'): def process_links(self, soup, baseurl, recursion_level, into_dir='links'):
res = '' res = ''
diskpath = os.path.join(self.current_dir, into_dir) diskpath = os.path.join(self.current_dir, into_dir)
@ -365,7 +364,7 @@ class RecursiveFetcher(object):
try: try:
self.current_dir = diskpath self.current_dir = diskpath
tags = list(soup.findAll('a', href=True)) tags = list(soup.findAll('a', href=True))
for c, tag in enumerate(tags): for c, tag in enumerate(tags):
if self.show_progress: if self.show_progress:
print '.', print '.',
@ -395,17 +394,17 @@ class RecursiveFetcher(object):
dsrc = dsrc.decode(self.encoding, 'ignore') dsrc = dsrc.decode(self.encoding, 'ignore')
else: else:
dsrc = xml_to_unicode(dsrc, self.verbose)[0] dsrc = xml_to_unicode(dsrc, self.verbose)[0]
soup = self.get_soup(dsrc) soup = self.get_soup(dsrc)
base = soup.find('base', href=True) base = soup.find('base', href=True)
if base is not None: if base is not None:
newbaseurl = base['href'] newbaseurl = base['href']
self.log_debug('Processing images...') self.log.debug('Processing images...')
self.process_images(soup, newbaseurl) self.process_images(soup, newbaseurl)
if self.download_stylesheets: if self.download_stylesheets:
self.process_stylesheets(soup, newbaseurl) self.process_stylesheets(soup, newbaseurl)
_fname = basename(iurl) _fname = basename(iurl)
if not isinstance(_fname, unicode): if not isinstance(_fname, unicode):
_fname.decode('latin1', 'replace') _fname.decode('latin1', 'replace')
@ -416,56 +415,55 @@ class RecursiveFetcher(object):
self.downloaded_paths.append(res) self.downloaded_paths.append(res)
self.filemap[nurl] = res self.filemap[nurl] = res
if recursion_level < self.max_recursions: if recursion_level < self.max_recursions:
self.log_debug('Processing links...') self.log.debug('Processing links...')
self.process_links(soup, newbaseurl, recursion_level+1) self.process_links(soup, newbaseurl, recursion_level+1)
else: else:
self.process_return_links(soup, newbaseurl) self.process_return_links(soup, newbaseurl)
self.log_debug('Recursion limit reached. Skipping links in %s', iurl) self.log.debug('Recursion limit reached. Skipping links in', iurl)
if callable(self.postprocess_html_ext): if callable(self.postprocess_html_ext):
soup = self.postprocess_html_ext(soup, soup = self.postprocess_html_ext(soup,
c==0 and recursion_level==0 and not getattr(self, 'called_first', False), c==0 and recursion_level==0 and not getattr(self, 'called_first', False),
self.job_info) self.job_info)
if c==0 and recursion_level == 0: if c==0 and recursion_level == 0:
self.called_first = True self.called_first = True
save_soup(soup, res) save_soup(soup, res)
self.localize_link(tag, 'href', res) self.localize_link(tag, 'href', res)
except Exception, err: except Exception, err:
self.failed_links.append((iurl, traceback.format_exc())) self.failed_links.append((iurl, traceback.format_exc()))
self.log_warning('Could not fetch link %s', iurl) self.log.exception('Could not fetch link', iurl)
self.log_debug('Error: %s', str(err), exc_info=True)
finally: finally:
self.current_dir = diskpath self.current_dir = diskpath
self.files += 1 self.files += 1
finally: finally:
self.current_dir = prev_dir self.current_dir = prev_dir
if self.show_progress: if self.show_progress:
print print
return res return res
def __del__(self): def __del__(self):
dt = getattr(self, 'default_timeout', None) dt = getattr(self, 'default_timeout', None)
if dt is not None: if dt is not None:
socket.setdefaulttimeout(dt) socket.setdefaulttimeout(dt)
def option_parser(usage=_('%prog URL\n\nWhere URL is for example http://google.com')): def option_parser(usage=_('%prog URL\n\nWhere URL is for example http://google.com')):
parser = OptionParser(usage=usage) parser = OptionParser(usage=usage)
parser.add_option('-d', '--base-dir', parser.add_option('-d', '--base-dir',
help=_('Base directory into which URL is saved. Default is %default'), help=_('Base directory into which URL is saved. Default is %default'),
default='.', type='string', dest='dir') default='.', type='string', dest='dir')
parser.add_option('-t', '--timeout', parser.add_option('-t', '--timeout',
help=_('Timeout in seconds to wait for a response from the server. Default: %default s'), help=_('Timeout in seconds to wait for a response from the server. Default: %default s'),
default=10.0, type='float', dest='timeout') default=10.0, type='float', dest='timeout')
parser.add_option('-r', '--max-recursions', default=1, parser.add_option('-r', '--max-recursions', default=1,
help=_('Maximum number of levels to recurse i.e. depth of links to follow. Default %default'), help=_('Maximum number of levels to recurse i.e. depth of links to follow. Default %default'),
type='int', dest='max_recursions') type='int', dest='max_recursions')
parser.add_option('-n', '--max-files', default=sys.maxint, type='int', dest='max_files', parser.add_option('-n', '--max-files', default=sys.maxint, type='int', dest='max_files',
help=_('The maximum number of files to download. This only applies to files from <a href> tags. Default is %default')) help=_('The maximum number of files to download. This only applies to files from <a href> tags. Default is %default'))
parser.add_option('--delay', default=0, dest='delay', type='int', parser.add_option('--delay', default=0, dest='delay', type='int',
help=_('Minimum interval in seconds between consecutive fetches. Default is %default s')) help=_('Minimum interval in seconds between consecutive fetches. Default is %default s'))
parser.add_option('--encoding', default=None, parser.add_option('--encoding', default=None,
help=_('The character encoding for the websites you are trying to download. The default is to try and guess the encoding.')) help=_('The character encoding for the websites you are trying to download. The default is to try and guess the encoding.'))
parser.add_option('--match-regexp', default=[], action='append', dest='match_regexps', parser.add_option('--match-regexp', default=[], action='append', dest='match_regexps',
help=_('Only links that match this regular expression will be followed. This option can be specified multiple times, in which case as long as a link matches any one regexp, it will be followed. By default all links are followed.')) help=_('Only links that match this regular expression will be followed. This option can be specified multiple times, in which case as long as a link matches any one regexp, it will be followed. By default all links are followed.'))
@ -478,23 +476,21 @@ def option_parser(usage=_('%prog URL\n\nWhere URL is for example http://google.c
return parser return parser
def create_fetcher(options, logger=None, image_map={}): def create_fetcher(options, image_map={}, log=None):
if logger is None: if log is None:
level = logging.DEBUG if options.verbose else logging.INFO log = Log()
logger = logging.getLogger('web2disk') return RecursiveFetcher(options, log, image_map={})
setup_cli_handlers(logger, level)
return RecursiveFetcher(options, logger, image_map={})
def main(args=sys.argv): def main(args=sys.argv):
parser = option_parser() parser = option_parser()
options, args = parser.parse_args(args) options, args = parser.parse_args(args)
if len(args) != 2: if len(args) != 2:
parser.print_help() parser.print_help()
return 1 return 1
fetcher = create_fetcher(options)
fetcher.start_fetch(args[1])
if __name__ == '__main__': fetcher = create_fetcher(options)
fetcher.start_fetch(args[1])
if __name__ == '__main__':
sys.exit(main()) sys.exit(main())