mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 02:34:06 -04:00
Sync to pluginize
This commit is contained in:
commit
1edbd88e73
@ -14,7 +14,20 @@ IMAGEMAGICK_DIR = 'C:\\ImageMagick'
|
|||||||
FONTCONFIG_DIR = 'C:\\fontconfig'
|
FONTCONFIG_DIR = 'C:\\fontconfig'
|
||||||
VC90 = r'C:\VC90.CRT'
|
VC90 = r'C:\VC90.CRT'
|
||||||
|
|
||||||
import sys, os, py2exe, shutil, zipfile, glob, re
|
# ModuleFinder can't handle runtime changes to __path__, but win32com uses them
|
||||||
|
import sys
|
||||||
|
import py2exe.mf as modulefinder
|
||||||
|
import win32com
|
||||||
|
for p in win32com.__path__[1:]:
|
||||||
|
modulefinder.AddPackagePath("win32com", p)
|
||||||
|
for extra in ["win32com.shell"]: #,"win32com.mapi"
|
||||||
|
__import__(extra)
|
||||||
|
m = sys.modules[extra]
|
||||||
|
for p in m.__path__[1:]:
|
||||||
|
modulefinder.AddPackagePath(extra, p)
|
||||||
|
|
||||||
|
|
||||||
|
import os, py2exe, shutil, zipfile, glob, re
|
||||||
from distutils.core import setup
|
from distutils.core import setup
|
||||||
BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
|
BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
|
||||||
sys.path.insert(0, BASE_DIR)
|
sys.path.insert(0, BASE_DIR)
|
||||||
|
@ -2,7 +2,7 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
__appname__ = 'calibre'
|
__appname__ = 'calibre'
|
||||||
__version__ = '0.5.5'
|
__version__ = '0.5.6'
|
||||||
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
||||||
'''
|
'''
|
||||||
Various run time constants.
|
Various run time constants.
|
||||||
|
@ -189,6 +189,7 @@ class ComicMetadataReader(MetadataReaderPlugin):
|
|||||||
def get_metadata(self, stream, ftype):
|
def get_metadata(self, stream, ftype):
|
||||||
if ftype == 'cbr':
|
if ftype == 'cbr':
|
||||||
from calibre.libunrar import extract_member as extract_first
|
from calibre.libunrar import extract_member as extract_first
|
||||||
|
extract_first
|
||||||
else:
|
else:
|
||||||
from calibre.libunzip import extract_member as extract_first
|
from calibre.libunzip import extract_member as extract_first
|
||||||
from calibre.ebooks.metadata import MetaInformation
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
@ -267,12 +268,14 @@ from calibre.ebooks.epub.input import EPUBInput
|
|||||||
from calibre.ebooks.mobi.input import MOBIInput
|
from calibre.ebooks.mobi.input import MOBIInput
|
||||||
from calibre.ebooks.pdf.input import PDFInput
|
from calibre.ebooks.pdf.input import PDFInput
|
||||||
from calibre.ebooks.txt.input import TXTInput
|
from calibre.ebooks.txt.input import TXTInput
|
||||||
|
from calibre.ebooks.html.input import HTMLInput
|
||||||
from calibre.ebooks.oeb.output import OEBOutput
|
from calibre.ebooks.oeb.output import OEBOutput
|
||||||
from calibre.ebooks.txt.output import TXTOutput
|
from calibre.ebooks.txt.output import TXTOutput
|
||||||
from calibre.ebooks.pdf.output import PDFOutput
|
from calibre.ebooks.pdf.output import PDFOutput
|
||||||
from calibre.customize.profiles import input_profiles, output_profiles
|
from calibre.customize.profiles import input_profiles, output_profiles
|
||||||
|
|
||||||
plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDFInput, TXTInput, OEBOutput, TXTOutput, PDFOutput]
|
plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDFInput, HTMLInput,
|
||||||
|
TXTInput, OEBOutput, TXTOutput, PDFOutput]
|
||||||
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
||||||
x.__name__.endswith('MetadataReader')]
|
x.__name__.endswith('MetadataReader')]
|
||||||
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
||||||
|
@ -163,9 +163,9 @@ class InputFormatPlugin(Plugin):
|
|||||||
for x in os.listdir('.'):
|
for x in os.listdir('.'):
|
||||||
shutil.rmtree(x) if os.path.isdir(x) else os.remove(x)
|
shutil.rmtree(x) if os.path.isdir(x) else os.remove(x)
|
||||||
|
|
||||||
|
|
||||||
ret = self.convert(stream, options, file_ext,
|
ret = self.convert(stream, options, file_ext,
|
||||||
log, accelerators)
|
log, accelerators)
|
||||||
|
|
||||||
if options.debug_input is not None:
|
if options.debug_input is not None:
|
||||||
options.debug_input = os.path.abspath(options.debug_input)
|
options.debug_input = os.path.abspath(options.debug_input)
|
||||||
if not os.path.exists(options.debug_input):
|
if not os.path.exists(options.debug_input):
|
||||||
|
@ -13,7 +13,9 @@ def devices():
|
|||||||
from calibre.devices.kindle.driver import KINDLE
|
from calibre.devices.kindle.driver import KINDLE
|
||||||
from calibre.devices.kindle.driver import KINDLE2
|
from calibre.devices.kindle.driver import KINDLE2
|
||||||
from calibre.devices.blackberry.driver import BLACKBERRY
|
from calibre.devices.blackberry.driver import BLACKBERRY
|
||||||
return (PRS500, PRS505, PRS700, CYBOOKG3, KINDLE, KINDLE2, BLACKBERRY)
|
from calibre.devices.eb600.driver import EB600
|
||||||
|
return (PRS500, PRS505, PRS700, CYBOOKG3, KINDLE, KINDLE2,
|
||||||
|
BLACKBERRY, EB600)
|
||||||
|
|
||||||
import time
|
import time
|
||||||
|
|
||||||
|
@ -11,7 +11,6 @@ from calibre.ebooks.metadata import authors_to_string
|
|||||||
from calibre.devices.errors import FreeSpaceError
|
from calibre.devices.errors import FreeSpaceError
|
||||||
from calibre.devices.usbms.driver import USBMS
|
from calibre.devices.usbms.driver import USBMS
|
||||||
import calibre.devices.cybookg3.t2b as t2b
|
import calibre.devices.cybookg3.t2b as t2b
|
||||||
from calibre.devices.errors import FreeSpaceError
|
|
||||||
|
|
||||||
class CYBOOKG3(USBMS):
|
class CYBOOKG3(USBMS):
|
||||||
# Ordered list of supported formats
|
# Ordered list of supported formats
|
||||||
@ -123,7 +122,7 @@ class CYBOOKG3(USBMS):
|
|||||||
|
|
||||||
filepath, ext = os.path.splitext(path)
|
filepath, ext = os.path.splitext(path)
|
||||||
|
|
||||||
# Delete the ebook auxiliary files
|
# Delete the ebook auxiliary file
|
||||||
if os.path.exists(filepath + '.mbp'):
|
if os.path.exists(filepath + '.mbp'):
|
||||||
os.unlink(filepath + '.mbp')
|
os.unlink(filepath + '.mbp')
|
||||||
if os.path.exists(filepath + '.dat'):
|
if os.path.exists(filepath + '.dat'):
|
||||||
|
2
src/calibre/devices/eb600/__init__.py
Executable file
2
src/calibre/devices/eb600/__init__.py
Executable file
@ -0,0 +1,2 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
41
src/calibre/devices/eb600/driver.py
Executable file
41
src/calibre/devices/eb600/driver.py
Executable file
@ -0,0 +1,41 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
'''
|
||||||
|
Device driver for the Netronix EB600
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.devices.usbms.driver import USBMS
|
||||||
|
|
||||||
|
class EB600(USBMS):
|
||||||
|
# Ordered list of supported formats
|
||||||
|
FORMATS = ['epub', 'prc', 'chm', 'djvu', 'html', 'rtf', 'txt', 'pdf']
|
||||||
|
DRM_FORMATS = ['prc', 'mobi', 'html', 'pdf', 'txt']
|
||||||
|
|
||||||
|
VENDOR_ID = [0x1f85]
|
||||||
|
PRODUCT_ID = [0x1688]
|
||||||
|
BCD = [0x110]
|
||||||
|
|
||||||
|
VENDOR_NAME = 'NETRONIX'
|
||||||
|
WINDOWS_MAIN_MEM = 'EBOOK'
|
||||||
|
WINDOWS_CARD_MEM = 'EBOOK'
|
||||||
|
|
||||||
|
OSX_MAIN_MEM = 'EB600 Internal Storage Media'
|
||||||
|
OSX_CARD_MEM = 'EB600 Card Storage Media'
|
||||||
|
|
||||||
|
MAIN_MEMORY_VOLUME_LABEL = 'EB600 Main Memory'
|
||||||
|
STORAGE_CARD_VOLUME_LABEL = 'EB600 Storage Card'
|
||||||
|
|
||||||
|
EBOOK_DIR_MAIN = ''
|
||||||
|
EBOOK_DIR_CARD = ''
|
||||||
|
SUPPORTS_SUB_DIRS = True
|
||||||
|
|
||||||
|
def windows_sort_drives(self, drives):
|
||||||
|
main = drives['main']
|
||||||
|
card = drives['card']
|
||||||
|
if card and main and card < main:
|
||||||
|
drives['main'] = card
|
||||||
|
drives['card'] = main
|
||||||
|
|
||||||
|
return drives
|
||||||
|
|
||||||
|
|
@ -174,6 +174,14 @@ class Device(_Device):
|
|||||||
|
|
||||||
return prefix
|
return prefix
|
||||||
|
|
||||||
|
def windows_sort_drives(self, drives):
|
||||||
|
'''
|
||||||
|
Called to disambiguate main memory and storage card for devices that
|
||||||
|
do not distinguish between them on the basis of `WINDOWS_CARD_NAME`.
|
||||||
|
For e.g.: The EB600
|
||||||
|
'''
|
||||||
|
return drives
|
||||||
|
|
||||||
def open_windows(self):
|
def open_windows(self):
|
||||||
time.sleep(6)
|
time.sleep(6)
|
||||||
drives = {}
|
drives = {}
|
||||||
@ -188,11 +196,14 @@ class Device(_Device):
|
|||||||
if 'main' in drives.keys() and 'card' in drives.keys():
|
if 'main' in drives.keys() and 'card' in drives.keys():
|
||||||
break
|
break
|
||||||
|
|
||||||
|
drives = self.windows_sort_drives(drives)
|
||||||
self._main_prefix = drives.get('main')
|
self._main_prefix = drives.get('main')
|
||||||
self._card_prefix = drives.get('card')
|
self._card_prefix = drives.get('card')
|
||||||
|
|
||||||
if not self._main_prefix:
|
if not self._main_prefix:
|
||||||
raise DeviceError(_('Unable to detect the %s disk drive. Try rebooting.') % self.__class__.__name__)
|
raise DeviceError(
|
||||||
|
_('Unable to detect the %s disk drive. Try rebooting.') %
|
||||||
|
self.__class__.__name__)
|
||||||
|
|
||||||
def get_osx_mountpoints(self, raw=None):
|
def get_osx_mountpoints(self, raw=None):
|
||||||
if raw is None:
|
if raw is None:
|
||||||
|
@ -36,7 +36,7 @@ import os, sys, cStringIO, logging, re, functools, shutil
|
|||||||
|
|
||||||
from lxml.etree import XPath
|
from lxml.etree import XPath
|
||||||
from lxml import html, etree
|
from lxml import html, etree
|
||||||
from PyQt4.Qt import QApplication, QPixmap
|
from PyQt4.Qt import QApplication, QPixmap, Qt
|
||||||
|
|
||||||
from calibre.ebooks.html_old import Processor, merge_metadata, get_filelist,\
|
from calibre.ebooks.html_old import Processor, merge_metadata, get_filelist,\
|
||||||
opf_traverse, create_metadata, rebase_toc, Link, parser
|
opf_traverse, create_metadata, rebase_toc, Link, parser
|
||||||
@ -50,7 +50,7 @@ from calibre.ebooks.epub.pages import add_page_map
|
|||||||
from calibre.ebooks.epub.fonts import Rationalizer
|
from calibre.ebooks.epub.fonts import Rationalizer
|
||||||
from calibre.constants import preferred_encoding
|
from calibre.constants import preferred_encoding
|
||||||
from calibre.customize.ui import run_plugins_on_postprocess
|
from calibre.customize.ui import run_plugins_on_postprocess
|
||||||
from calibre import walk, CurrentDir, to_unicode
|
from calibre import walk, CurrentDir, to_unicode, fit_image
|
||||||
|
|
||||||
content = functools.partial(os.path.join, u'content')
|
content = functools.partial(os.path.join, u'content')
|
||||||
|
|
||||||
@ -112,6 +112,31 @@ def find_html_index(files):
|
|||||||
return f, os.path.splitext(f)[1].lower()[1:]
|
return f, os.path.splitext(f)[1].lower()[1:]
|
||||||
return html_files[-1], os.path.splitext(html_files[-1])[1].lower()[1:]
|
return html_files[-1], os.path.splitext(html_files[-1])[1].lower()[1:]
|
||||||
|
|
||||||
|
def rescale_images(imgdir, screen_size, log):
|
||||||
|
pwidth, pheight = screen_size
|
||||||
|
if QApplication.instance() is None:
|
||||||
|
QApplication([])
|
||||||
|
for f in os.listdir(imgdir):
|
||||||
|
path = os.path.join(imgdir, f)
|
||||||
|
if os.path.splitext(f)[1] in ('.css', '.js'):
|
||||||
|
continue
|
||||||
|
|
||||||
|
p = QPixmap()
|
||||||
|
p.load(path)
|
||||||
|
if p.isNull():
|
||||||
|
continue
|
||||||
|
width, height = p.width(), p.height()
|
||||||
|
scaled, new_width, new_height = fit_image(width, height, pwidth,
|
||||||
|
pheight)
|
||||||
|
if scaled:
|
||||||
|
log.info('Rescaling image: '+f)
|
||||||
|
p.scaled(new_width, new_height, Qt.IgnoreAspectRatio,
|
||||||
|
Qt.SmoothTransformation).save(path, 'JPEG')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class HTMLProcessor(Processor, Rationalizer):
|
class HTMLProcessor(Processor, Rationalizer):
|
||||||
|
|
||||||
def __init__(self, htmlfile, opts, tdir, resource_map, htmlfiles, stylesheets):
|
def __init__(self, htmlfile, opts, tdir, resource_map, htmlfiles, stylesheets):
|
||||||
@ -482,6 +507,10 @@ def convert(htmlfile, opts, notification=None, create_epub=True,
|
|||||||
if os.stat(ncx_path).st_size > opts.profile.flow_size:
|
if os.stat(ncx_path).st_size > opts.profile.flow_size:
|
||||||
logger.warn('NCX still larger than allowed size at %d bytes. Menu based Table of Contents may not work on device.'%os.stat(ncx_path).st_size)
|
logger.warn('NCX still larger than allowed size at %d bytes. Menu based Table of Contents may not work on device.'%os.stat(ncx_path).st_size)
|
||||||
|
|
||||||
|
if opts.profile.screen_size is not None:
|
||||||
|
rescale_images(os.path.join(tdir, 'content', 'resources'),
|
||||||
|
opts.profile.screen_size, logger)
|
||||||
|
|
||||||
if create_epub:
|
if create_epub:
|
||||||
epub = initialize_container(opts.output)
|
epub = initialize_container(opts.output)
|
||||||
epub.add_dir(tdir)
|
epub.add_dir(tdir)
|
||||||
|
@ -17,7 +17,7 @@ def tostring(root, strip_comments=False, pretty_print=False):
|
|||||||
root.set('xmlns', 'http://www.w3.org/1999/xhtml')
|
root.set('xmlns', 'http://www.w3.org/1999/xhtml')
|
||||||
root.set('{http://www.w3.org/1999/xhtml}xlink', 'http://www.w3.org/1999/xlink')
|
root.set('{http://www.w3.org/1999/xhtml}xlink', 'http://www.w3.org/1999/xlink')
|
||||||
for x in root.iter():
|
for x in root.iter():
|
||||||
if x.tag.rpartition('}')[-1].lower() == 'svg':
|
if hasattr(x.tag, 'rpartition') and x.tag.rpartition('}')[-1].lower() == 'svg':
|
||||||
x.set('xmlns', 'http://www.w3.org/2000/svg')
|
x.set('xmlns', 'http://www.w3.org/2000/svg')
|
||||||
|
|
||||||
ans = _tostring(root, encoding='utf-8', pretty_print=pretty_print)
|
ans = _tostring(root, encoding='utf-8', pretty_print=pretty_print)
|
||||||
|
@ -11,14 +11,12 @@ __docformat__ = 'restructuredtext en'
|
|||||||
Input plugin for HTML or OPF ebooks.
|
Input plugin for HTML or OPF ebooks.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import os, re, sys, cStringIO
|
import os, re, sys
|
||||||
from urlparse import urlparse, urlunparse
|
from urlparse import urlparse, urlunparse
|
||||||
from urllib import unquote
|
from urllib import unquote
|
||||||
|
|
||||||
from calibre.customize.conversion import InputFormatPlugin
|
from calibre.customize.conversion import InputFormatPlugin
|
||||||
from calibre.ebooks.metadata.meta import get_metadata
|
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||||
from calibre.ebooks.metadata.opf2 import OPF, OPFCreator
|
|
||||||
from calibre.ebooks.metadata import MetaInformation
|
|
||||||
from calibre.ebooks.chardet import xml_to_unicode
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
from calibre.customize.conversion import OptionRecommendation
|
from calibre.customize.conversion import OptionRecommendation
|
||||||
from calibre import unicode_path
|
from calibre import unicode_path
|
||||||
@ -213,63 +211,12 @@ def traverse(path_to_html_file, max_levels=sys.maxint, verbose=0, encoding=None)
|
|||||||
sys.setrecursionlimit(orec)
|
sys.setrecursionlimit(orec)
|
||||||
|
|
||||||
|
|
||||||
def opf_traverse(opf_reader, verbose=0, encoding=None):
|
|
||||||
'''
|
|
||||||
Return a list of :class:`HTMLFile` objects in the order specified by the
|
|
||||||
`<spine>` element of the OPF.
|
|
||||||
|
|
||||||
:param opf_reader: An :class:`calibre.ebooks.metadata.opf2.OPF` instance.
|
|
||||||
:param encoding: Specify character encoding of HTML files. If `None` it is
|
|
||||||
auto-detected.
|
|
||||||
'''
|
|
||||||
if not opf_reader.spine:
|
|
||||||
raise ValueError('OPF does not have a spine')
|
|
||||||
flat = []
|
|
||||||
for path in opf_reader.spine.items():
|
|
||||||
path = os.path.abspath(path)
|
|
||||||
if path not in flat:
|
|
||||||
flat.append(os.path.abspath(path))
|
|
||||||
for item in opf_reader.manifest:
|
|
||||||
if 'html' in item.mime_type:
|
|
||||||
path = os.path.abspath(item.path)
|
|
||||||
if path not in flat:
|
|
||||||
flat.append(path)
|
|
||||||
for i, path in enumerate(flat):
|
|
||||||
if not os.path.exists(path):
|
|
||||||
path = path.replace('&', '%26')
|
|
||||||
if os.path.exists(path):
|
|
||||||
flat[i] = path
|
|
||||||
for item in opf_reader.itermanifest():
|
|
||||||
item.set('href', item.get('href').replace('&', '%26'))
|
|
||||||
ans = []
|
|
||||||
for path in flat:
|
|
||||||
if os.path.exists(path):
|
|
||||||
ans.append(HTMLFile(path, 0, encoding, verbose))
|
|
||||||
else:
|
|
||||||
print 'WARNING: OPF spine item %s does not exist'%path
|
|
||||||
ans = [f for f in ans if not f.is_binary]
|
|
||||||
return ans
|
|
||||||
|
|
||||||
def search_for_opf(dir):
|
|
||||||
for f in os.listdir(dir):
|
|
||||||
if f.lower().endswith('.opf'):
|
|
||||||
return OPF(open(os.path.join(dir, f), 'rb'), dir)
|
|
||||||
|
|
||||||
def get_filelist(htmlfile, dir, opts, log):
|
def get_filelist(htmlfile, dir, opts, log):
|
||||||
'''
|
'''
|
||||||
Build list of files referenced by html file or try to detect and use an
|
Build list of files referenced by html file or try to detect and use an
|
||||||
OPF file instead.
|
OPF file instead.
|
||||||
'''
|
'''
|
||||||
print 'Building file list...'
|
log.info('Building file list...')
|
||||||
opf = search_for_opf(dir)
|
|
||||||
filelist = None
|
|
||||||
if opf is not None:
|
|
||||||
try:
|
|
||||||
filelist = opf_traverse(opf, verbose=opts.verbose,
|
|
||||||
encoding=opts.input_encoding)
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
if not filelist:
|
|
||||||
filelist = traverse(htmlfile, max_levels=int(opts.max_levels),
|
filelist = traverse(htmlfile, max_levels=int(opts.max_levels),
|
||||||
verbose=opts.verbose,
|
verbose=opts.verbose,
|
||||||
encoding=opts.input_encoding)\
|
encoding=opts.input_encoding)\
|
||||||
@ -278,7 +225,7 @@ def get_filelist(htmlfile, dir, opts, log):
|
|||||||
log.debug('\tFound files...')
|
log.debug('\tFound files...')
|
||||||
for f in filelist:
|
for f in filelist:
|
||||||
log.debug('\t\t', f)
|
log.debug('\t\t', f)
|
||||||
return opf, filelist
|
return filelist
|
||||||
|
|
||||||
|
|
||||||
class HTMLInput(InputFormatPlugin):
|
class HTMLInput(InputFormatPlugin):
|
||||||
@ -309,34 +256,32 @@ class HTMLInput(InputFormatPlugin):
|
|||||||
|
|
||||||
def convert(self, stream, opts, file_ext, log,
|
def convert(self, stream, opts, file_ext, log,
|
||||||
accelerators):
|
accelerators):
|
||||||
|
from calibre.ebooks.metadata.meta import get_metadata
|
||||||
|
|
||||||
basedir = os.getcwd()
|
basedir = os.getcwd()
|
||||||
|
|
||||||
if hasattr(stream, 'name'):
|
if hasattr(stream, 'name'):
|
||||||
basedir = os.path.dirname(stream.name)
|
basedir = os.path.dirname(stream.name)
|
||||||
if file_ext == 'opf':
|
if file_ext == 'opf':
|
||||||
opf = OPF(stream, basedir)
|
opfpath = stream.name
|
||||||
filelist = opf_traverse(opf, verbose=opts.verbose,
|
|
||||||
encoding=opts.input_encoding)
|
|
||||||
mi = MetaInformation(opf)
|
|
||||||
else:
|
else:
|
||||||
opf, filelist = get_filelist(stream.name, basedir, opts, log)
|
filelist = get_filelist(stream.name, basedir, opts, log)
|
||||||
mi = MetaInformation(opf)
|
mi = get_metadata(stream, 'html')
|
||||||
mi.smart_update(get_metadata(stream, 'html'))
|
|
||||||
|
|
||||||
mi = OPFCreator(os.getcwdu(), mi)
|
mi = OPFCreator(os.getcwdu(), mi)
|
||||||
mi.guide = None
|
mi.guide = None
|
||||||
entries = [(f.path, 'application/xhtml+xml') for f in filelist]
|
entries = [(f.path, 'application/xhtml+xml') for f in filelist]
|
||||||
mi.create_manifest(entries)
|
mi.create_manifest(entries)
|
||||||
mi.create_spine([f.path for f in filelist])
|
mi.create_spine([f.path for f in filelist])
|
||||||
|
|
||||||
tocbuf = cStringIO.StringIO()
|
mi.render(open('metadata.opf', 'wb'))
|
||||||
mi.render(open('metadata.opf', 'wb'), tocbuf, 'toc.ncx')
|
opfpath = os.path.abspath('metadata.opf')
|
||||||
toc = tocbuf.getvalue()
|
|
||||||
if toc:
|
|
||||||
open('toc.ncx', 'wb').write(toc)
|
|
||||||
|
|
||||||
from calibre.ebooks.conversion.plumber import create_oebbook
|
from calibre.ebooks.conversion.plumber import create_oebbook
|
||||||
return create_oebbook(log, os.path.abspath('metadata.opf'))
|
oeb = create_oebbook(log, opfpath)
|
||||||
|
|
||||||
|
from calibre.ebooks.oeb.transforms.package import Package
|
||||||
|
Package(os.getcwdu())(oeb, opts)
|
||||||
|
|
||||||
|
return oeb
|
||||||
|
|
||||||
|
|
||||||
|
@ -11,9 +11,7 @@ from urllib import unquote, quote
|
|||||||
from urlparse import urlparse
|
from urlparse import urlparse
|
||||||
|
|
||||||
|
|
||||||
from calibre.constants import __version__ as VERSION
|
|
||||||
from calibre import relpath
|
from calibre import relpath
|
||||||
from calibre.utils.config import OptionParser
|
|
||||||
|
|
||||||
def string_to_authors(raw):
|
def string_to_authors(raw):
|
||||||
raw = raw.replace('&&', u'\uffff')
|
raw = raw.replace('&&', u'\uffff')
|
||||||
@ -189,11 +187,11 @@ class MetaInformation(object):
|
|||||||
'publisher', 'series', 'series_index', 'rating',
|
'publisher', 'series', 'series_index', 'rating',
|
||||||
'isbn', 'tags', 'cover_data', 'application_id', 'guide',
|
'isbn', 'tags', 'cover_data', 'application_id', 'guide',
|
||||||
'manifest', 'spine', 'toc', 'cover', 'language',
|
'manifest', 'spine', 'toc', 'cover', 'language',
|
||||||
'book_producer', 'timestamp'):
|
'book_producer', 'timestamp', 'lccn', 'lcc', 'ddc'):
|
||||||
if hasattr(mi, attr):
|
if hasattr(mi, attr):
|
||||||
setattr(ans, attr, getattr(mi, attr))
|
setattr(ans, attr, getattr(mi, attr))
|
||||||
|
|
||||||
def __init__(self, title, authors=[_('Unknown')]):
|
def __init__(self, title, authors=(_('Unknown'),)):
|
||||||
'''
|
'''
|
||||||
@param title: title or ``_('Unknown')`` or a MetaInformation object
|
@param title: title or ``_('Unknown')`` or a MetaInformation object
|
||||||
@param authors: List of strings or []
|
@param authors: List of strings or []
|
||||||
@ -204,9 +202,9 @@ class MetaInformation(object):
|
|||||||
title = mi.title
|
title = mi.title
|
||||||
authors = mi.authors
|
authors = mi.authors
|
||||||
self.title = title
|
self.title = title
|
||||||
self.author = authors # Needed for backward compatibility
|
self.author = list(authors) if authors else []# Needed for backward compatibility
|
||||||
#: List of strings or []
|
#: List of strings or []
|
||||||
self.authors = authors
|
self.authors = list(authors) if authors else []
|
||||||
self.tags = getattr(mi, 'tags', [])
|
self.tags = getattr(mi, 'tags', [])
|
||||||
#: mi.cover_data = (ext, data)
|
#: mi.cover_data = (ext, data)
|
||||||
self.cover_data = getattr(mi, 'cover_data', (None, None))
|
self.cover_data = getattr(mi, 'cover_data', (None, None))
|
||||||
@ -214,7 +212,7 @@ class MetaInformation(object):
|
|||||||
for x in ('author_sort', 'title_sort', 'comments', 'category', 'publisher',
|
for x in ('author_sort', 'title_sort', 'comments', 'category', 'publisher',
|
||||||
'series', 'series_index', 'rating', 'isbn', 'language',
|
'series', 'series_index', 'rating', 'isbn', 'language',
|
||||||
'application_id', 'manifest', 'toc', 'spine', 'guide', 'cover',
|
'application_id', 'manifest', 'toc', 'spine', 'guide', 'cover',
|
||||||
'book_producer', 'timestamp'
|
'book_producer', 'timestamp', 'lccn', 'lcc', 'ddc'
|
||||||
):
|
):
|
||||||
setattr(self, x, getattr(mi, x, None))
|
setattr(self, x, getattr(mi, x, None))
|
||||||
|
|
||||||
@ -229,13 +227,13 @@ class MetaInformation(object):
|
|||||||
if mi.authors and mi.authors[0] != _('Unknown'):
|
if mi.authors and mi.authors[0] != _('Unknown'):
|
||||||
self.authors = mi.authors
|
self.authors = mi.authors
|
||||||
|
|
||||||
|
|
||||||
for attr in ('author_sort', 'title_sort', 'category',
|
for attr in ('author_sort', 'title_sort', 'category',
|
||||||
'publisher', 'series', 'series_index', 'rating',
|
'publisher', 'series', 'series_index', 'rating',
|
||||||
'isbn', 'application_id', 'manifest', 'spine', 'toc',
|
'isbn', 'application_id', 'manifest', 'spine', 'toc',
|
||||||
'cover', 'language', 'guide', 'book_producer',
|
'cover', 'language', 'guide', 'book_producer',
|
||||||
'timestamp'):
|
'timestamp', 'lccn', 'lcc', 'ddc'):
|
||||||
val = getattr(mi, attr, None)
|
if hasattr(mi, attr):
|
||||||
|
val = getattr(mi, attr)
|
||||||
if val is not None:
|
if val is not None:
|
||||||
setattr(self, attr, val)
|
setattr(self, attr, val)
|
||||||
|
|
||||||
@ -293,6 +291,13 @@ class MetaInformation(object):
|
|||||||
fmt('Rating', self.rating)
|
fmt('Rating', self.rating)
|
||||||
if self.timestamp is not None:
|
if self.timestamp is not None:
|
||||||
fmt('Timestamp', self.timestamp.isoformat(' '))
|
fmt('Timestamp', self.timestamp.isoformat(' '))
|
||||||
|
if self.lccn:
|
||||||
|
fmt('LCCN', unicode(self.lccn))
|
||||||
|
if self.lcc:
|
||||||
|
fmt('LCC', unicode(self.lcc))
|
||||||
|
if self.ddc:
|
||||||
|
fmt('DDC', unicode(self.ddc))
|
||||||
|
|
||||||
return u'\n'.join(ans)
|
return u'\n'.join(ans)
|
||||||
|
|
||||||
def to_html(self):
|
def to_html(self):
|
||||||
@ -302,6 +307,12 @@ class MetaInformation(object):
|
|||||||
ans += [(_('Producer'), unicode(self.book_producer))]
|
ans += [(_('Producer'), unicode(self.book_producer))]
|
||||||
ans += [(_('Comments'), unicode(self.comments))]
|
ans += [(_('Comments'), unicode(self.comments))]
|
||||||
ans += [('ISBN', unicode(self.isbn))]
|
ans += [('ISBN', unicode(self.isbn))]
|
||||||
|
if self.lccn:
|
||||||
|
ans += [('LCCN', unicode(self.lccn))]
|
||||||
|
if self.lcc:
|
||||||
|
ans += [('LCC', unicode(self.lcc))]
|
||||||
|
if self.ddc:
|
||||||
|
ans += [('DDC', unicode(self.ddc))]
|
||||||
ans += [(_('Tags'), u', '.join([unicode(t) for t in self.tags]))]
|
ans += [(_('Tags'), u', '.join([unicode(t) for t in self.tags]))]
|
||||||
if self.series:
|
if self.series:
|
||||||
ans += [(_('Series'), unicode(self.series)+ ' #%s'%self.format_series_index())]
|
ans += [(_('Series'), unicode(self.series)+ ' #%s'%self.format_series_index())]
|
||||||
|
@ -59,7 +59,8 @@ class EXTHHeader(object):
|
|||||||
elif id == 502:
|
elif id == 502:
|
||||||
# last update time
|
# last update time
|
||||||
pass
|
pass
|
||||||
elif id == 503 and (not title or title == _('Unknown')):
|
elif id == 503: # Long title
|
||||||
|
if not title or title == _('Unknown'):
|
||||||
title = content
|
title = content
|
||||||
#else:
|
#else:
|
||||||
# print 'unknown record', id, repr(content)
|
# print 'unknown record', id, repr(content)
|
||||||
@ -87,6 +88,8 @@ class EXTHHeader(object):
|
|||||||
content, '%Y-%m-%d',).date()
|
content, '%Y-%m-%d',).date()
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
elif id == 108:
|
||||||
|
pass # Producer
|
||||||
#else:
|
#else:
|
||||||
# print 'unhandled metadata record', id, repr(content)
|
# print 'unhandled metadata record', id, repr(content)
|
||||||
|
|
||||||
@ -522,7 +525,8 @@ class MobiReader(object):
|
|||||||
else:
|
else:
|
||||||
raise MobiError('Unknown compression algorithm: %s'%repr(self.book_header.compression_type))
|
raise MobiError('Unknown compression algorithm: %s'%repr(self.book_header.compression_type))
|
||||||
if self.book_header.ancient and '<html' not in self.mobi_html[:300].lower():
|
if self.book_header.ancient and '<html' not in self.mobi_html[:300].lower():
|
||||||
self.mobi_html = self.mobi_html.replace('\r ', '\n\n').replace('\0', '')
|
self.mobi_html = self.mobi_html.replace('\r ', '\n\n ')
|
||||||
|
self.mobi_html = self.mobi_html.replace('\0', '')
|
||||||
return processed_records
|
return processed_records
|
||||||
|
|
||||||
|
|
||||||
|
@ -573,7 +573,7 @@ class OEBReader(object):
|
|||||||
item = self._find_ncx(opf)
|
item = self._find_ncx(opf)
|
||||||
self._toc_from_opf(opf, item)
|
self._toc_from_opf(opf, item)
|
||||||
self._pages_from_opf(opf, item)
|
self._pages_from_opf(opf, item)
|
||||||
self._ensure_cover_image()
|
#self._ensure_cover_image()
|
||||||
|
|
||||||
|
|
||||||
def main(argv=sys.argv):
|
def main(argv=sys.argv):
|
||||||
|
@ -6,13 +6,14 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import os
|
import os, re
|
||||||
from urllib import unquote as urlunquote
|
from urllib import unquote as urlunquote
|
||||||
from functools import partial
|
from functools import partial
|
||||||
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
import cssutils
|
import cssutils
|
||||||
|
|
||||||
|
from calibre import sanitize_file_name
|
||||||
from calibre.constants import islinux
|
from calibre.constants import islinux
|
||||||
from calibre.ebooks.oeb.base import OEB_DOCS, urlnormalize, urldefrag, \
|
from calibre.ebooks.oeb.base import OEB_DOCS, urlnormalize, urldefrag, \
|
||||||
rewrite_links
|
rewrite_links
|
||||||
@ -36,15 +37,21 @@ class Package(object):
|
|||||||
self.new_base_path = os.path.abspath(base)
|
self.new_base_path = os.path.abspath(base)
|
||||||
|
|
||||||
def rewrite_links_in(self, item):
|
def rewrite_links_in(self, item):
|
||||||
base = os.path.join(self.new_base_path, *item.href.split('/'))
|
old_href = item.old_href.split('#')[0]
|
||||||
|
new_href = item.href.split('#')[0]
|
||||||
|
base = os.path.join(self.old_base_path, *old_href.split('/'))
|
||||||
base = os.path.dirname(base)
|
base = os.path.dirname(base)
|
||||||
|
self.log.debug('\tRewriting links in', base+'/'+
|
||||||
|
item.href.rpartition('/')[-1])
|
||||||
|
new_base = os.path.join(self.new_base_path, *new_href.split('/'))
|
||||||
|
new_base = os.path.dirname(new_base)
|
||||||
|
|
||||||
if etree.iselement(item.data):
|
if etree.iselement(item.data):
|
||||||
self.rewrite_links_in_xml(item.data, base)
|
self.rewrite_links_in_xml(item.data, base, new_base)
|
||||||
elif hasattr(item.data, 'cssText'):
|
elif hasattr(item.data, 'cssText'):
|
||||||
self.rewrite_links_in_css(item.data, base)
|
self.rewrite_links_in_css(item.data, base, new_base)
|
||||||
|
|
||||||
def link_replacer(self, link_, base=''):
|
def link_replacer(self, link_, base='', new_base=''):
|
||||||
link = urlnormalize(link_)
|
link = urlnormalize(link_)
|
||||||
link, frag = urldefrag(link)
|
link, frag = urldefrag(link)
|
||||||
link = urlunquote(link).replace('/', os.sep)
|
link = urlunquote(link).replace('/', os.sep)
|
||||||
@ -55,20 +62,33 @@ class Package(object):
|
|||||||
link = link.lower()
|
link = link.lower()
|
||||||
if link not in self.map:
|
if link not in self.map:
|
||||||
return link_
|
return link_
|
||||||
nlink = os.path.relpath(self.map[link], base)
|
nlink = os.path.relpath(self.map[link], new_base)
|
||||||
if frag:
|
if frag:
|
||||||
nlink = '#'.join(nlink, frag)
|
nlink = '#'.join((nlink, frag))
|
||||||
return nlink.replace(os.sep, '/')
|
return nlink.replace(os.sep, '/')
|
||||||
|
|
||||||
def rewrite_links_in_css(self, sheet, base):
|
def rewrite_links_in_css(self, sheet, base, new_base):
|
||||||
repl = partial(self.link_replacer, base=base)
|
repl = partial(self.link_replacer, base=base, new_base=new_base)
|
||||||
cssutils.replaceUrls(sheet, repl)
|
cssutils.replaceUrls(sheet, repl)
|
||||||
|
|
||||||
def rewrite_links_in_xml(self, root, base):
|
def rewrite_links_in_xml(self, root, base, new_base):
|
||||||
repl = partial(self.link_replacer, base=base)
|
repl = partial(self.link_replacer, base=base, new_base=new_base)
|
||||||
rewrite_links(root, repl)
|
rewrite_links(root, repl)
|
||||||
|
|
||||||
def move_manifest_item(self, item):
|
def uniqify_name(self, new_href, hrefs):
|
||||||
|
c = 0
|
||||||
|
while new_href in hrefs:
|
||||||
|
c += 1
|
||||||
|
parts = new_href.split('/')
|
||||||
|
name, ext = os.path.splitext(parts[-1])
|
||||||
|
name = re.sub(r'_\d+$', '', name)
|
||||||
|
name += '_%d'%c
|
||||||
|
parts[-1] = name + ext
|
||||||
|
new_href = '/'.join(parts)
|
||||||
|
return new_href
|
||||||
|
|
||||||
|
|
||||||
|
def move_manifest_item(self, item, hrefs):
|
||||||
item.data # Make sure the data has been loaded and cached
|
item.data # Make sure the data has been loaded and cached
|
||||||
old_abspath = os.path.join(self.old_base_path,
|
old_abspath = os.path.join(self.old_base_path,
|
||||||
*(urldefrag(item.href)[0].split('/')))
|
*(urldefrag(item.href)[0].split('/')))
|
||||||
@ -79,11 +99,17 @@ class Package(object):
|
|||||||
new_href = 'content/'
|
new_href = 'content/'
|
||||||
elif item.href.lower().endswith('.ncx'):
|
elif item.href.lower().endswith('.ncx'):
|
||||||
new_href = ''
|
new_href = ''
|
||||||
new_href += bname
|
new_href += sanitize_file_name(bname)
|
||||||
|
|
||||||
|
if new_href in hrefs:
|
||||||
|
new_href = self.uniqify_name(new_href, hrefs)
|
||||||
|
hrefs.add(new_href)
|
||||||
|
|
||||||
new_abspath = os.path.join(self.new_base_path, *new_href.split('/'))
|
new_abspath = os.path.join(self.new_base_path, *new_href.split('/'))
|
||||||
new_abspath = os.path.abspath(new_abspath)
|
new_abspath = os.path.abspath(new_abspath)
|
||||||
|
item.old_href = self.oeb.manifest.hrefs.pop(item.href).href
|
||||||
item.href = new_href
|
item.href = new_href
|
||||||
|
self.oeb.manifest.hrefs[item.href] = item
|
||||||
if not islinux:
|
if not islinux:
|
||||||
old_abspath, new_abspath = old_abspath.lower(), new_abspath.lower()
|
old_abspath, new_abspath = old_abspath.lower(), new_abspath.lower()
|
||||||
if old_abspath != new_abspath:
|
if old_abspath != new_abspath:
|
||||||
@ -91,25 +117,33 @@ class Package(object):
|
|||||||
|
|
||||||
def rewrite_links_in_toc(self, toc):
|
def rewrite_links_in_toc(self, toc):
|
||||||
if toc.href:
|
if toc.href:
|
||||||
toc.href = self.link_replacer(toc.href, base=self.new_base_path)
|
toc.href = self.link_replacer(toc.href, base=self.old_base_path,
|
||||||
|
new_base=self.new_base_path)
|
||||||
|
|
||||||
for x in toc:
|
for x in toc:
|
||||||
self.rewrite_links_in_toc(x)
|
self.rewrite_links_in_toc(x)
|
||||||
|
|
||||||
def __call__(self, oeb, context):
|
def __call__(self, oeb, context):
|
||||||
self.map = {}
|
self.map = {}
|
||||||
self.log = self.oeb.log
|
self.log = oeb.log
|
||||||
|
self.oeb = oeb
|
||||||
self.old_base_path = os.path.abspath(oeb.container.rootdir)
|
self.old_base_path = os.path.abspath(oeb.container.rootdir)
|
||||||
|
|
||||||
|
hrefs = set([])
|
||||||
for item in self.oeb.manifest:
|
for item in self.oeb.manifest:
|
||||||
self.move_manifest_item(item)
|
self.move_manifest_item(item, hrefs)
|
||||||
|
|
||||||
|
self.log.debug('Rewriting links in OEB documents...')
|
||||||
for item in self.oeb.manifest:
|
for item in self.oeb.manifest:
|
||||||
self.rewrite_links_in(item)
|
self.rewrite_links_in(item)
|
||||||
|
|
||||||
if getattr(oeb.toc, 'nodes', False):
|
if getattr(oeb.toc, 'nodes', False):
|
||||||
|
self.log.debug('Rewriting links in TOC...')
|
||||||
self.rewrite_links_in_toc(oeb.toc)
|
self.rewrite_links_in_toc(oeb.toc)
|
||||||
|
|
||||||
if hasattr(oeb, 'guide'):
|
if hasattr(oeb, 'guide'):
|
||||||
|
self.log.debug('Rewriting links in guide...')
|
||||||
for ref in oeb.guide.values():
|
for ref in oeb.guide.values():
|
||||||
ref.href = self.link_replacer(ref.href, base=self.new_base_path)
|
ref.href = self.link_replacer(ref.href,
|
||||||
|
base=self.old_base_path,
|
||||||
|
new_base=self.new_base_path)
|
||||||
|
@ -48,7 +48,8 @@ class OEBWriter(object):
|
|||||||
pretty_print=pretty_print)
|
pretty_print=pretty_print)
|
||||||
|
|
||||||
def __call__(self, oeb, path):
|
def __call__(self, oeb, path):
|
||||||
"""Read the book in the :class:`OEBBook` object :param:`oeb` to a file
|
"""
|
||||||
|
Read the book in the :class:`OEBBook` object :param:`oeb` to a file
|
||||||
at :param:`path`.
|
at :param:`path`.
|
||||||
"""
|
"""
|
||||||
version = int(self.version[0])
|
version = int(self.version[0])
|
||||||
|
@ -466,5 +466,3 @@ class Application(QApplication):
|
|||||||
self.translator.loadFromData(data)
|
self.translator.loadFromData(data)
|
||||||
self.installTranslator(self.translator)
|
self.installTranslator(self.translator)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -199,7 +199,7 @@ class EmailAccounts(QAbstractTableModel):
|
|||||||
return (account, self.accounts[account])
|
return (account, self.accounts[account])
|
||||||
if role == Qt.ToolTipRole:
|
if role == Qt.ToolTipRole:
|
||||||
return self.tooltips[col]
|
return self.tooltips[col]
|
||||||
if role == Qt.DisplayRole:
|
if role in [Qt.DisplayRole, Qt.EditRole]:
|
||||||
if col == 0:
|
if col == 0:
|
||||||
return QVariant(account)
|
return QVariant(account)
|
||||||
if col == 1:
|
if col == 1:
|
||||||
@ -397,6 +397,9 @@ class ConfigDialog(QDialog, Ui_Dialog):
|
|||||||
self.separate_cover_flow.setChecked(config['separate_cover_flow'])
|
self.separate_cover_flow.setChecked(config['separate_cover_flow'])
|
||||||
self.setup_email_page()
|
self.setup_email_page()
|
||||||
self.category_view.setCurrentIndex(self.category_view.model().index(0))
|
self.category_view.setCurrentIndex(self.category_view.model().index(0))
|
||||||
|
self.delete_news.setEnabled(bool(self.sync_news.isChecked()))
|
||||||
|
self.connect(self.sync_news, SIGNAL('toggled(bool)'),
|
||||||
|
self.delete_news.setEnabled)
|
||||||
|
|
||||||
def setup_email_page(self):
|
def setup_email_page(self):
|
||||||
opts = smtp_prefs().parse()
|
opts = smtp_prefs().parse()
|
||||||
|
@ -371,7 +371,7 @@
|
|||||||
<item>
|
<item>
|
||||||
<widget class="QCheckBox" name="delete_news">
|
<widget class="QCheckBox" name="delete_news">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>&Delete news from library when it is sent to reader</string>
|
<string>&Delete news from library when it is automatically sent to reader</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
|
@ -324,7 +324,7 @@
|
|||||||
<string>Book </string>
|
<string>Book </string>
|
||||||
</property>
|
</property>
|
||||||
<property name="minimum">
|
<property name="minimum">
|
||||||
<number>1</number>
|
<number>0</number>
|
||||||
</property>
|
</property>
|
||||||
<property name="maximum">
|
<property name="maximum">
|
||||||
<number>10000</number>
|
<number>10000</number>
|
||||||
|
@ -82,7 +82,8 @@ def load_recipes():
|
|||||||
recipes = []
|
recipes = []
|
||||||
for r in config.get('scheduled_recipes', []):
|
for r in config.get('scheduled_recipes', []):
|
||||||
r = Recipe().unpickle(r)
|
r = Recipe().unpickle(r)
|
||||||
if r.builtin and not str(r.id).startswith('recipe_'):
|
if r.builtin and \
|
||||||
|
(not str(r.id).startswith('recipe_') or not str(r.id) in recipe_modules):
|
||||||
continue
|
continue
|
||||||
recipes.append(r)
|
recipes.append(r)
|
||||||
return recipes
|
return recipes
|
||||||
|
BIN
src/calibre/gui2/images/news/azstarnet.png
Normal file
BIN
src/calibre/gui2/images/news/azstarnet.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 360 B |
BIN
src/calibre/gui2/images/news/corriere_della_sera_en.png
Normal file
BIN
src/calibre/gui2/images/news/corriere_della_sera_en.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 524 B |
BIN
src/calibre/gui2/images/news/corriere_della_sera_it.png
Normal file
BIN
src/calibre/gui2/images/news/corriere_della_sera_it.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 524 B |
BIN
src/calibre/gui2/images/news/msdnmag_en.png
Normal file
BIN
src/calibre/gui2/images/news/msdnmag_en.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 694 B |
@ -1,3 +1,4 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
<ui version="4.0">
|
<ui version="4.0">
|
||||||
<class>ViewerConfig</class>
|
<class>ViewerConfig</class>
|
||||||
<widget class="QDialog" name="ViewerConfig">
|
<widget class="QDialog" name="ViewerConfig">
|
||||||
@ -5,8 +6,8 @@
|
|||||||
<rect>
|
<rect>
|
||||||
<x>0</x>
|
<x>0</x>
|
||||||
<y>0</y>
|
<y>0</y>
|
||||||
<width>281</width>
|
<width>373</width>
|
||||||
<height>214</height>
|
<height>264</height>
|
||||||
</rect>
|
</rect>
|
||||||
</property>
|
</property>
|
||||||
<property name="windowTitle">
|
<property name="windowTitle">
|
||||||
@ -16,8 +17,8 @@
|
|||||||
<iconset resource="../images.qrc">
|
<iconset resource="../images.qrc">
|
||||||
<normaloff>:/images/config.svg</normaloff>:/images/config.svg</iconset>
|
<normaloff>:/images/config.svg</normaloff>:/images/config.svg</iconset>
|
||||||
</property>
|
</property>
|
||||||
<layout class="QGridLayout" >
|
<layout class="QGridLayout" name="gridLayout">
|
||||||
<item row="0" column="0" colspan="2" >
|
<item row="0" column="0">
|
||||||
<widget class="QCheckBox" name="white_background">
|
<widget class="QCheckBox" name="white_background">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>Use white background</string>
|
<string>Use white background</string>
|
||||||
@ -34,17 +35,7 @@
|
|||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="3" column="1" >
|
<item row="2" column="0">
|
||||||
<widget class="QDialogButtonBox" name="buttonBox" >
|
|
||||||
<property name="orientation" >
|
|
||||||
<enum>Qt::Horizontal</enum>
|
|
||||||
</property>
|
|
||||||
<property name="standardButtons" >
|
|
||||||
<set>QDialogButtonBox::Cancel|QDialogButtonBox::Ok</set>
|
|
||||||
</property>
|
|
||||||
</widget>
|
|
||||||
</item>
|
|
||||||
<item row="2" column="0" colspan="2" >
|
|
||||||
<widget class="QLabel" name="label">
|
<widget class="QLabel" name="label">
|
||||||
<property name="frameShape">
|
<property name="frameShape">
|
||||||
<enum>QFrame::Box</enum>
|
<enum>QFrame::Box</enum>
|
||||||
@ -63,6 +54,16 @@
|
|||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
|
<item row="3" column="0">
|
||||||
|
<widget class="QDialogButtonBox" name="buttonBox">
|
||||||
|
<property name="orientation">
|
||||||
|
<enum>Qt::Horizontal</enum>
|
||||||
|
</property>
|
||||||
|
<property name="standardButtons">
|
||||||
|
<set>QDialogButtonBox::Cancel|QDialogButtonBox::Ok</set>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
</layout>
|
</layout>
|
||||||
</widget>
|
</widget>
|
||||||
<resources>
|
<resources>
|
||||||
|
@ -1110,27 +1110,30 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
|
|||||||
return
|
return
|
||||||
self._view_file(job.result)
|
self._view_file(job.result)
|
||||||
|
|
||||||
def _view_file(self, name):
|
def _launch_viewer(self, name=None, viewer='ebook-viewer', internal=True):
|
||||||
self.setCursor(Qt.BusyCursor)
|
self.setCursor(Qt.BusyCursor)
|
||||||
try:
|
try:
|
||||||
ext = os.path.splitext(name)[1].upper().replace('.', '')
|
if internal:
|
||||||
if ext in config['internally_viewed_formats']:
|
args = [viewer]
|
||||||
if ext == 'LRF':
|
if isosx and 'ebook' in viewer:
|
||||||
args = ['lrfviewer', name]
|
|
||||||
self.job_manager.server.run_free_job('lrfviewer',
|
|
||||||
kwdargs=dict(args=args))
|
|
||||||
else:
|
|
||||||
args = ['ebook-viewer', name]
|
|
||||||
if isosx:
|
|
||||||
args.append('--raise-window')
|
args.append('--raise-window')
|
||||||
self.job_manager.server.run_free_job('ebook-viewer',
|
if name is not None:
|
||||||
|
args.append(name)
|
||||||
|
self.job_manager.server.run_free_job(viewer,
|
||||||
kwdargs=dict(args=args))
|
kwdargs=dict(args=args))
|
||||||
else:
|
else:
|
||||||
QDesktopServices.openUrl(QUrl('file:'+name))#launch(name)
|
QDesktopServices.openUrl(QUrl.fromLocalFile(name))#launch(name)
|
||||||
|
|
||||||
time.sleep(5) # User feedback
|
time.sleep(5) # User feedback
|
||||||
finally:
|
finally:
|
||||||
self.unsetCursor()
|
self.unsetCursor()
|
||||||
|
|
||||||
|
def _view_file(self, name):
|
||||||
|
ext = os.path.splitext(name)[1].upper().replace('.', '')
|
||||||
|
viewer = 'lrfviewer' if ext == 'LRF' else 'ebook-viewer'
|
||||||
|
internal = ext in config['internally_viewed_formats']
|
||||||
|
self._launch_viewer(name, viewer, internal)
|
||||||
|
|
||||||
def view_specific_format(self, triggered):
|
def view_specific_format(self, triggered):
|
||||||
rows = self.library_view.selectionModel().selectedRows()
|
rows = self.library_view.selectionModel().selectedRows()
|
||||||
if not rows or len(rows) == 0:
|
if not rows or len(rows) == 0:
|
||||||
@ -1165,8 +1168,7 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
|
|||||||
rows = self.current_view().selectionModel().selectedRows()
|
rows = self.current_view().selectionModel().selectedRows()
|
||||||
if self.current_view() is self.library_view:
|
if self.current_view() is self.library_view:
|
||||||
if not rows or len(rows) == 0:
|
if not rows or len(rows) == 0:
|
||||||
d = error_dialog(self, _('Cannot view'), _('No book selected'))
|
self._launch_viewer()
|
||||||
d.exec_()
|
|
||||||
return
|
return
|
||||||
|
|
||||||
row = rows[0].row()
|
row = rows[0].row()
|
||||||
|
@ -15,6 +15,7 @@ from calibre import terminal_controller, preferred_encoding
|
|||||||
from calibre.utils.config import OptionParser, prefs
|
from calibre.utils.config import OptionParser, prefs
|
||||||
try:
|
try:
|
||||||
from calibre.utils.single_qt_application import send_message
|
from calibre.utils.single_qt_application import send_message
|
||||||
|
send_message
|
||||||
except:
|
except:
|
||||||
send_message = None
|
send_message = None
|
||||||
from calibre.ebooks.metadata.meta import get_metadata
|
from calibre.ebooks.metadata.meta import get_metadata
|
||||||
|
@ -31,6 +31,21 @@ from calibre.customize.ui import run_plugins_on_import
|
|||||||
from calibre import sanitize_file_name
|
from calibre import sanitize_file_name
|
||||||
from calibre.ebooks import BOOK_EXTENSIONS
|
from calibre.ebooks import BOOK_EXTENSIONS
|
||||||
|
|
||||||
|
if iswindows:
|
||||||
|
import calibre.utils.winshell as winshell
|
||||||
|
|
||||||
|
def delete_file(path):
|
||||||
|
try:
|
||||||
|
winshell.delete_file(path, silent=True, no_confirm=True)
|
||||||
|
except:
|
||||||
|
os.remove(path)
|
||||||
|
|
||||||
|
def delete_tree(path):
|
||||||
|
try:
|
||||||
|
winshell.delete_file(path, silent=True, no_confirm=True)
|
||||||
|
except:
|
||||||
|
shutil.rmtree(path)
|
||||||
|
|
||||||
copyfile = os.link if hasattr(os, 'link') else shutil.copyfile
|
copyfile = os.link if hasattr(os, 'link') else shutil.copyfile
|
||||||
|
|
||||||
FIELD_MAP = {'id':0, 'title':1, 'authors':2, 'publisher':3, 'rating':4, 'timestamp':5,
|
FIELD_MAP = {'id':0, 'title':1, 'authors':2, 'publisher':3, 'rating':4, 'timestamp':5,
|
||||||
@ -499,7 +514,7 @@ class LibraryDatabase2(LibraryDatabase):
|
|||||||
|
|
||||||
def rmtree(self, path):
|
def rmtree(self, path):
|
||||||
if not self.normpath(self.library_path).startswith(self.normpath(path)):
|
if not self.normpath(self.library_path).startswith(self.normpath(path)):
|
||||||
shutil.rmtree(path)
|
delete_tree(path)
|
||||||
|
|
||||||
def normpath(self, path):
|
def normpath(self, path):
|
||||||
path = os.path.abspath(os.path.realpath(path))
|
path = os.path.abspath(os.path.realpath(path))
|
||||||
@ -745,6 +760,9 @@ class LibraryDatabase2(LibraryDatabase):
|
|||||||
path = os.path.join(self.library_path, self.path(id, index_is_id=True))
|
path = os.path.join(self.library_path, self.path(id, index_is_id=True))
|
||||||
self.data.remove(id)
|
self.data.remove(id)
|
||||||
if os.path.exists(path):
|
if os.path.exists(path):
|
||||||
|
if iswindows:
|
||||||
|
winshell.delete_file(path, no_confirm=True, silent=True)
|
||||||
|
else:
|
||||||
self.rmtree(path)
|
self.rmtree(path)
|
||||||
parent = os.path.dirname(path)
|
parent = os.path.dirname(path)
|
||||||
if len(os.listdir(parent)) == 0:
|
if len(os.listdir(parent)) == 0:
|
||||||
@ -764,7 +782,7 @@ class LibraryDatabase2(LibraryDatabase):
|
|||||||
ext = ('.' + format.lower()) if format else ''
|
ext = ('.' + format.lower()) if format else ''
|
||||||
path = os.path.join(path, name+ext)
|
path = os.path.join(path, name+ext)
|
||||||
try:
|
try:
|
||||||
os.remove(path)
|
delete_file(path)
|
||||||
except:
|
except:
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
self.conn.execute('DELETE FROM data WHERE book=? AND format=?', (id, format.upper()))
|
self.conn.execute('DELETE FROM data WHERE book=? AND format=?', (id, format.upper()))
|
||||||
|
@ -105,7 +105,7 @@ Device Integration
|
|||||||
|
|
||||||
What devices does |app| support?
|
What devices does |app| support?
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
At the moment |app| has full support for the SONY PRS 500/505/700, Cybook Gen 3, Amazon Kindle 1 and 2 as well as the iPhone. In addition, using the :guilabel:`Save to disk` function you can use it with any ebook reader that exports itself as a USB disk.
|
At the moment |app| has full support for the SONY PRS 500/505/700, Cybook Gen 3, Amazon Kindle 1/2, Netronix EB600 and the iPhone. In addition, using the :guilabel:`Save to disk` function you can use it with any ebook reader that exports itself as a USB disk.
|
||||||
|
|
||||||
I used |app| to transfer some books to my reader, and now the SONY software hangs every time I connect the reader?
|
I used |app| to transfer some books to my reader, and now the SONY software hangs every time I connect the reader?
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
@ -32,7 +32,8 @@ if not _run_once:
|
|||||||
lang = prefs['language']
|
lang = prefs['language']
|
||||||
if lang is not None:
|
if lang is not None:
|
||||||
return lang
|
return lang
|
||||||
lang = locale.getdefaultlocale()[0]
|
lang = locale.getdefaultlocale(['LANGUAGE', 'LC_ALL', 'LC_CTYPE',
|
||||||
|
'LC_MESSAGES', 'LANG'])[0]
|
||||||
if lang is None and os.environ.has_key('LANG'): # Needed for OS X
|
if lang is None and os.environ.has_key('LANG'): # Needed for OS X
|
||||||
try:
|
try:
|
||||||
lang = os.environ['LANG']
|
lang = os.environ['LANG']
|
||||||
|
@ -38,6 +38,7 @@ def get_linux_data(version='1.0.0'):
|
|||||||
('exherbo', 'Exherbo'),
|
('exherbo', 'Exherbo'),
|
||||||
('foresight', 'Foresight 2.1'),
|
('foresight', 'Foresight 2.1'),
|
||||||
('ubuntu', 'Ubuntu Jaunty Jackalope'),
|
('ubuntu', 'Ubuntu Jaunty Jackalope'),
|
||||||
|
('linux_mint', 'Linux Mint Gloria'),
|
||||||
]:
|
]:
|
||||||
data['supported'].append(CoolDistro(name, title,
|
data['supported'].append(CoolDistro(name, title,
|
||||||
prefix='http://calibre.kovidgoyal.net'))
|
prefix='http://calibre.kovidgoyal.net'))
|
||||||
|
BIN
src/calibre/trac/plugins/htdocs/images/linux_mint_logo.png
Normal file
BIN
src/calibre/trac/plugins/htdocs/images/linux_mint_logo.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 8.9 KiB |
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
400
src/calibre/utils/winshell.py
Normal file
400
src/calibre/utils/winshell.py
Normal file
@ -0,0 +1,400 @@
|
|||||||
|
"""winshell - convenience functions to access Windows shell functionality
|
||||||
|
|
||||||
|
Certain aspects of the Windows user interface are grouped by
|
||||||
|
Microsoft as Shell functions. These include the Desktop, shortcut
|
||||||
|
icons, special folders (such as My Documents) and a few other things.
|
||||||
|
|
||||||
|
These are mostly available via the shell module of the win32all
|
||||||
|
extensions, but whenever I need to use them, I've forgotten the
|
||||||
|
various constants and so on.
|
||||||
|
|
||||||
|
Several of the shell items have two variants: personal and common,
|
||||||
|
or User and All Users. These refer to systems with profiles in use:
|
||||||
|
anything from NT upwards, and 9x with Profiles turned on. Where
|
||||||
|
relevant, the Personal/User version refers to that owned by the
|
||||||
|
logged-on user and visible only to that user; the Common/All Users
|
||||||
|
version refers to that maintained by an Administrator and visible
|
||||||
|
to all users of the system.
|
||||||
|
|
||||||
|
(c) Tim Golden <winshell@timgolden.me.uk> 25th November 2003
|
||||||
|
Licensed under the (GPL-compatible) MIT License:
|
||||||
|
http://www.opensource.org/licenses/mit-license.php
|
||||||
|
|
||||||
|
9th Nov 2005 0.2 . License changed to MIT
|
||||||
|
. Added functionality using SHFileOperation
|
||||||
|
25th Nov 2003 0.1 . Initial release by Tim Golden
|
||||||
|
"""
|
||||||
|
|
||||||
|
__VERSION__ = "0.2"
|
||||||
|
|
||||||
|
import os
|
||||||
|
from win32com import storagecon
|
||||||
|
from win32com.shell import shell, shellcon
|
||||||
|
import pythoncom
|
||||||
|
|
||||||
|
class x_winshell (Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
#
|
||||||
|
# Although this can be done in one call, Win9x didn't
|
||||||
|
# support it, so I added this workaround.
|
||||||
|
#
|
||||||
|
def get_path (folder_id):
|
||||||
|
return shell.SHGetPathFromIDList (shell.SHGetSpecialFolderLocation (0, folder_id))
|
||||||
|
|
||||||
|
def desktop (common=0):
|
||||||
|
"What folder is equivalent to the current desktop?"
|
||||||
|
return get_path ((shellcon.CSIDL_DESKTOP, shellcon.CSIDL_COMMON_DESKTOPDIRECTORY)[common])
|
||||||
|
|
||||||
|
def common_desktop ():
|
||||||
|
#
|
||||||
|
# Only here because already used in code
|
||||||
|
#
|
||||||
|
return desktop (common=1)
|
||||||
|
|
||||||
|
def application_data (common=0):
|
||||||
|
"What folder holds application configuration files?"
|
||||||
|
return get_path ((shellcon.CSIDL_APPDATA, shellcon.CSIDL_COMMON_APPDATA)[common])
|
||||||
|
|
||||||
|
def favourites (common=0):
|
||||||
|
"What folder holds the Explorer favourites shortcuts?"
|
||||||
|
return get_path ((shellcon.CSIDL_FAVORITES, shellcon.CSIDL_COMMON_FAVORITES)[common])
|
||||||
|
bookmarks = favourites
|
||||||
|
|
||||||
|
def start_menu (common=0):
|
||||||
|
"What folder holds the Start Menu shortcuts?"
|
||||||
|
return get_path ((shellcon.CSIDL_STARTMENU, shellcon.CSIDL_COMMON_STARTMENU)[common])
|
||||||
|
|
||||||
|
def programs (common=0):
|
||||||
|
"What folder holds the Programs shortcuts (from the Start Menu)?"
|
||||||
|
return get_path ((shellcon.CSIDL_PROGRAMS, shellcon.CSIDL_COMMON_PROGRAMS)[common])
|
||||||
|
|
||||||
|
def startup (common=0):
|
||||||
|
"What folder holds the Startup shortcuts (from the Start Menu)?"
|
||||||
|
return get_path ((shellcon.CSIDL_STARTUP, shellcon.CSIDL_COMMON_STARTUP)[common])
|
||||||
|
|
||||||
|
def personal_folder ():
|
||||||
|
"What folder holds the My Documents files?"
|
||||||
|
return get_path (shellcon.CSIDL_PERSONAL)
|
||||||
|
my_documents = personal_folder
|
||||||
|
|
||||||
|
def recent ():
|
||||||
|
"What folder holds the Documents shortcuts (from the Start Menu)?"
|
||||||
|
return get_path (shellcon.CSIDL_RECENT)
|
||||||
|
|
||||||
|
def sendto ():
|
||||||
|
"What folder holds the SendTo shortcuts (from the Context Menu)?"
|
||||||
|
return get_path (shellcon.CSIDL_SENDTO)
|
||||||
|
|
||||||
|
#
|
||||||
|
# Internally abstracted function to handle one
|
||||||
|
# of several shell-based file manipulation
|
||||||
|
# routines. Not all the possible parameters
|
||||||
|
# are covered which might be passed to the
|
||||||
|
# underlying SHFileOperation API call, but
|
||||||
|
# only those which seemed useful to me at
|
||||||
|
# the time.
|
||||||
|
#
|
||||||
|
def _file_operation (
|
||||||
|
operation,
|
||||||
|
source_path,
|
||||||
|
target_path=None,
|
||||||
|
allow_undo=True,
|
||||||
|
no_confirm=False,
|
||||||
|
rename_on_collision=True,
|
||||||
|
silent=False,
|
||||||
|
hWnd=None
|
||||||
|
):
|
||||||
|
#
|
||||||
|
# At present the Python wrapper around SHFileOperation doesn't
|
||||||
|
# allow lists of files. Hopefully it will at some point, so
|
||||||
|
# take account of it here.
|
||||||
|
# If you pass this shell function a "/"-separated path with
|
||||||
|
# a wildcard, eg c:/temp/*.tmp, it gets confused. It's ok
|
||||||
|
# with a backslash, so convert here.
|
||||||
|
#
|
||||||
|
source_path = source_path or ""
|
||||||
|
if isinstance (source_path, basestring):
|
||||||
|
source_path = os.path.abspath (source_path)
|
||||||
|
else:
|
||||||
|
source_path = [os.path.abspath (i) for i in source_path]
|
||||||
|
|
||||||
|
target_path = target_path or ""
|
||||||
|
if isinstance (target_path, basestring):
|
||||||
|
target_path = os.path.abspath (target_path)
|
||||||
|
else:
|
||||||
|
target_path = [os.path.abspath (i) for i in target_path]
|
||||||
|
|
||||||
|
flags = 0
|
||||||
|
if allow_undo: flags |= shellcon.FOF_ALLOWUNDO
|
||||||
|
if no_confirm: flags |= shellcon.FOF_NOCONFIRMATION
|
||||||
|
if rename_on_collision: flags |= shellcon.FOF_RENAMEONCOLLISION
|
||||||
|
if silent: flags |= shellcon.FOF_SILENT
|
||||||
|
|
||||||
|
result, n_aborted = shell.SHFileOperation (
|
||||||
|
(hWnd or 0, operation, source_path, target_path, flags, None, None)
|
||||||
|
)
|
||||||
|
if result <> 0:
|
||||||
|
raise x_winshell, result
|
||||||
|
elif n_aborted:
|
||||||
|
raise x_winshell, "%d operations were aborted by the user" % n_aborted
|
||||||
|
|
||||||
|
def copy_file (
|
||||||
|
source_path,
|
||||||
|
target_path,
|
||||||
|
allow_undo=True,
|
||||||
|
no_confirm=False,
|
||||||
|
rename_on_collision=True,
|
||||||
|
silent=False,
|
||||||
|
hWnd=None
|
||||||
|
):
|
||||||
|
"""Perform a shell-based file copy. Copying in
|
||||||
|
this way allows the possibility of undo, auto-renaming,
|
||||||
|
and showing the "flying file" animation during the copy.
|
||||||
|
|
||||||
|
The default options allow for undo, don't automatically
|
||||||
|
clobber on a name clash, automatically rename on collision
|
||||||
|
and display the animation.
|
||||||
|
"""
|
||||||
|
_file_operation (
|
||||||
|
shellcon.FO_COPY,
|
||||||
|
source_path,
|
||||||
|
target_path,
|
||||||
|
allow_undo,
|
||||||
|
no_confirm,
|
||||||
|
rename_on_collision,
|
||||||
|
silent,
|
||||||
|
hWnd
|
||||||
|
)
|
||||||
|
|
||||||
|
def move_file (
|
||||||
|
source_path,
|
||||||
|
target_path,
|
||||||
|
allow_undo=True,
|
||||||
|
no_confirm=False,
|
||||||
|
rename_on_collision=True,
|
||||||
|
silent=False,
|
||||||
|
hWnd=None
|
||||||
|
):
|
||||||
|
"""Perform a shell-based file move. Moving in
|
||||||
|
this way allows the possibility of undo, auto-renaming,
|
||||||
|
and showing the "flying file" animation during the copy.
|
||||||
|
|
||||||
|
The default options allow for undo, don't automatically
|
||||||
|
clobber on a name clash, automatically rename on collision
|
||||||
|
and display the animation.
|
||||||
|
"""
|
||||||
|
_file_operation (
|
||||||
|
shellcon.FO_MOVE,
|
||||||
|
source_path,
|
||||||
|
target_path,
|
||||||
|
allow_undo,
|
||||||
|
no_confirm,
|
||||||
|
rename_on_collision,
|
||||||
|
silent,
|
||||||
|
hWnd
|
||||||
|
)
|
||||||
|
|
||||||
|
def rename_file (
|
||||||
|
source_path,
|
||||||
|
target_path,
|
||||||
|
allow_undo=True,
|
||||||
|
no_confirm=False,
|
||||||
|
rename_on_collision=True,
|
||||||
|
silent=False,
|
||||||
|
hWnd=None
|
||||||
|
):
|
||||||
|
"""Perform a shell-based file rename. Renaming in
|
||||||
|
this way allows the possibility of undo, auto-renaming,
|
||||||
|
and showing the "flying file" animation during the copy.
|
||||||
|
|
||||||
|
The default options allow for undo, don't automatically
|
||||||
|
clobber on a name clash, automatically rename on collision
|
||||||
|
and display the animation.
|
||||||
|
"""
|
||||||
|
_file_operation (
|
||||||
|
shellcon.FO_RENAME,
|
||||||
|
source_path,
|
||||||
|
target_path,
|
||||||
|
allow_undo,
|
||||||
|
no_confirm,
|
||||||
|
rename_on_collision,
|
||||||
|
silent,
|
||||||
|
hWnd
|
||||||
|
)
|
||||||
|
|
||||||
|
def delete_file (
|
||||||
|
source_path,
|
||||||
|
allow_undo=True,
|
||||||
|
no_confirm=False,
|
||||||
|
rename_on_collision=True,
|
||||||
|
silent=False,
|
||||||
|
hWnd=None
|
||||||
|
):
|
||||||
|
"""Perform a shell-based file delete. Deleting in
|
||||||
|
this way uses the system recycle bin, allows the
|
||||||
|
possibility of undo, and showing the "flying file"
|
||||||
|
animation during the delete.
|
||||||
|
|
||||||
|
The default options allow for undo, don't automatically
|
||||||
|
clobber on a name clash, automatically rename on collision
|
||||||
|
and display the animation.
|
||||||
|
"""
|
||||||
|
_file_operation (
|
||||||
|
shellcon.FO_DELETE,
|
||||||
|
source_path,
|
||||||
|
None,
|
||||||
|
allow_undo,
|
||||||
|
no_confirm,
|
||||||
|
rename_on_collision,
|
||||||
|
silent,
|
||||||
|
hWnd
|
||||||
|
)
|
||||||
|
|
||||||
|
def CreateShortcut (Path, Target, Arguments = "", StartIn = "", Icon = ("",0), Description = ""):
|
||||||
|
"""Create a Windows shortcut:
|
||||||
|
|
||||||
|
Path - As what file should the shortcut be created?
|
||||||
|
Target - What command should the desktop use?
|
||||||
|
Arguments - What arguments should be supplied to the command?
|
||||||
|
StartIn - What folder should the command start in?
|
||||||
|
Icon - (filename, index) What icon should be used for the shortcut?
|
||||||
|
Description - What description should the shortcut be given?
|
||||||
|
|
||||||
|
eg
|
||||||
|
CreateShortcut (
|
||||||
|
Path=os.path.join (desktop (), "PythonI.lnk"),
|
||||||
|
Target=r"c:\python\python.exe",
|
||||||
|
Icon=(r"c:\python\python.exe", 0),
|
||||||
|
Description="Python Interpreter"
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
sh = pythoncom.CoCreateInstance (
|
||||||
|
shell.CLSID_ShellLink,
|
||||||
|
None,
|
||||||
|
pythoncom.CLSCTX_INPROC_SERVER,
|
||||||
|
shell.IID_IShellLink
|
||||||
|
)
|
||||||
|
|
||||||
|
sh.SetPath (Target)
|
||||||
|
sh.SetDescription (Description)
|
||||||
|
sh.SetArguments (Arguments)
|
||||||
|
sh.SetWorkingDirectory (StartIn)
|
||||||
|
sh.SetIconLocation (Icon[0], Icon[1])
|
||||||
|
|
||||||
|
persist = sh.QueryInterface (pythoncom.IID_IPersistFile)
|
||||||
|
persist.Save (Path, 1)
|
||||||
|
|
||||||
|
#
|
||||||
|
# Constants for structured storage
|
||||||
|
#
|
||||||
|
# These come from ObjIdl.h
|
||||||
|
FMTID_USER_DEFINED_PROPERTIES = "{F29F85E0-4FF9-1068-AB91-08002B27B3D9}"
|
||||||
|
FMTID_CUSTOM_DEFINED_PROPERTIES = "{D5CDD505-2E9C-101B-9397-08002B2CF9AE}"
|
||||||
|
|
||||||
|
PIDSI_TITLE = 0x00000002
|
||||||
|
PIDSI_SUBJECT = 0x00000003
|
||||||
|
PIDSI_AUTHOR = 0x00000004
|
||||||
|
PIDSI_CREATE_DTM = 0x0000000c
|
||||||
|
PIDSI_KEYWORDS = 0x00000005
|
||||||
|
PIDSI_COMMENTS = 0x00000006
|
||||||
|
PIDSI_TEMPLATE = 0x00000007
|
||||||
|
PIDSI_LASTAUTHOR = 0x00000008
|
||||||
|
PIDSI_REVNUMBER = 0x00000009
|
||||||
|
PIDSI_EDITTIME = 0x0000000a
|
||||||
|
PIDSI_LASTPRINTED = 0x0000000b
|
||||||
|
PIDSI_LASTSAVE_DTM = 0x0000000d
|
||||||
|
PIDSI_PAGECOUNT = 0x0000000e
|
||||||
|
PIDSI_WORDCOUNT = 0x0000000f
|
||||||
|
PIDSI_CHARCOUNT = 0x00000010
|
||||||
|
PIDSI_THUMBNAIL = 0x00000011
|
||||||
|
PIDSI_APPNAME = 0x00000012
|
||||||
|
PROPERTIES = (
|
||||||
|
PIDSI_TITLE,
|
||||||
|
PIDSI_SUBJECT,
|
||||||
|
PIDSI_AUTHOR,
|
||||||
|
PIDSI_CREATE_DTM,
|
||||||
|
PIDSI_KEYWORDS,
|
||||||
|
PIDSI_COMMENTS,
|
||||||
|
PIDSI_TEMPLATE,
|
||||||
|
PIDSI_LASTAUTHOR,
|
||||||
|
PIDSI_EDITTIME,
|
||||||
|
PIDSI_LASTPRINTED,
|
||||||
|
PIDSI_LASTSAVE_DTM,
|
||||||
|
PIDSI_PAGECOUNT,
|
||||||
|
PIDSI_WORDCOUNT,
|
||||||
|
PIDSI_CHARCOUNT,
|
||||||
|
PIDSI_APPNAME
|
||||||
|
)
|
||||||
|
|
||||||
|
#
|
||||||
|
# This was taken from someone else's example,
|
||||||
|
# but I can't find where. If you know, please
|
||||||
|
# tell me so I can give due credit.
|
||||||
|
#
|
||||||
|
def structured_storage (filename):
|
||||||
|
"""Pick out info from MS documents with embedded
|
||||||
|
structured storage (typically MS Word docs etc.)
|
||||||
|
|
||||||
|
Returns a dictionary of information found
|
||||||
|
"""
|
||||||
|
|
||||||
|
if not pythoncom.StgIsStorageFile (filename):
|
||||||
|
return {}
|
||||||
|
|
||||||
|
flags = storagecon.STGM_READ | storagecon.STGM_SHARE_EXCLUSIVE
|
||||||
|
storage = pythoncom.StgOpenStorage (filename, None, flags)
|
||||||
|
try:
|
||||||
|
properties_storage = storage.QueryInterface (pythoncom.IID_IPropertySetStorage)
|
||||||
|
except pythoncom.com_error:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
property_sheet = properties_storage.Open (FMTID_USER_DEFINED_PROPERTIES)
|
||||||
|
try:
|
||||||
|
data = property_sheet.ReadMultiple (PROPERTIES)
|
||||||
|
finally:
|
||||||
|
property_sheet = None
|
||||||
|
|
||||||
|
title, subject, author, created_on, keywords, comments, template_used, \
|
||||||
|
updated_by, edited_on, printed_on, saved_on, \
|
||||||
|
n_pages, n_words, n_characters, \
|
||||||
|
application = data
|
||||||
|
|
||||||
|
result = {}
|
||||||
|
if title: result['title'] = title
|
||||||
|
if subject: result['subject'] = subject
|
||||||
|
if author: result['author'] = author
|
||||||
|
if created_on: result['created_on'] = created_on
|
||||||
|
if keywords: result['keywords'] = keywords
|
||||||
|
if comments: result['comments'] = comments
|
||||||
|
if template_used: result['template_used'] = template_used
|
||||||
|
if updated_by: result['updated_by'] = updated_by
|
||||||
|
if edited_on: result['edited_on'] = edited_on
|
||||||
|
if printed_on: result['printed_on'] = printed_on
|
||||||
|
if saved_on: result['saved_on'] = saved_on
|
||||||
|
if n_pages: result['n_pages'] = n_pages
|
||||||
|
if n_words: result['n_words'] = n_words
|
||||||
|
if n_characters: result['n_characters'] = n_characters
|
||||||
|
if application: result['application'] = application
|
||||||
|
return result
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
try:
|
||||||
|
print 'Desktop =>', desktop ()
|
||||||
|
print 'Common Desktop =>', desktop (1)
|
||||||
|
print 'Application Data =>', application_data ()
|
||||||
|
print 'Common Application Data =>', application_data (1)
|
||||||
|
print 'Bookmarks =>', bookmarks ()
|
||||||
|
print 'Common Bookmarks =>', bookmarks (1)
|
||||||
|
print 'Start Menu =>', start_menu ()
|
||||||
|
print 'Common Start Menu =>', start_menu (1)
|
||||||
|
print 'Programs =>', programs ()
|
||||||
|
print 'Common Programs =>', programs (1)
|
||||||
|
print 'Startup =>', startup ()
|
||||||
|
print 'Common Startup =>', startup (1)
|
||||||
|
print 'My Documents =>', my_documents ()
|
||||||
|
print 'Recent =>', recent ()
|
||||||
|
print 'SendTo =>', sendto ()
|
||||||
|
finally:
|
||||||
|
raw_input ("Press enter...")
|
||||||
|
|
@ -5,7 +5,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
|||||||
CLI for downloading feeds.
|
CLI for downloading feeds.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import sys, os, logging
|
import sys, os
|
||||||
from calibre.web.feeds.recipes import get_builtin_recipe, compile_recipe, titles
|
from calibre.web.feeds.recipes import get_builtin_recipe, compile_recipe, titles
|
||||||
from calibre.web.fetch.simple import option_parser as _option_parser
|
from calibre.web.fetch.simple import option_parser as _option_parser
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
@ -113,7 +113,7 @@ If you specify this option, any argument to %prog is ignored and a default recip
|
|||||||
class RecipeError(Exception):
|
class RecipeError(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def run_recipe(opts, recipe_arg, parser, notification=None, handler=None):
|
def run_recipe(opts, recipe_arg, parser, notification=None):
|
||||||
if notification is None:
|
if notification is None:
|
||||||
from calibre.utils.terminfo import TerminalController, ProgressBar
|
from calibre.utils.terminfo import TerminalController, ProgressBar
|
||||||
term = TerminalController(sys.stdout)
|
term = TerminalController(sys.stdout)
|
||||||
@ -137,14 +137,6 @@ def run_recipe(opts, recipe_arg, parser, notification=None, handler=None):
|
|||||||
if recipe is None:
|
if recipe is None:
|
||||||
raise RecipeError(recipe_arg+ ' is an invalid recipe')
|
raise RecipeError(recipe_arg+ ' is an invalid recipe')
|
||||||
|
|
||||||
|
|
||||||
if handler is None:
|
|
||||||
from calibre import ColoredFormatter
|
|
||||||
handler = logging.StreamHandler(sys.stdout)
|
|
||||||
handler.setLevel(logging.DEBUG if opts.debug else logging.INFO if opts.verbose else logging.WARN)
|
|
||||||
handler.setFormatter(ColoredFormatter('%(levelname)s: %(message)s\n')) # The trailing newline is need because of the progress bar
|
|
||||||
logging.getLogger('feeds2disk').addHandler(handler)
|
|
||||||
|
|
||||||
recipe = recipe(opts, parser, notification)
|
recipe = recipe(opts, parser, notification)
|
||||||
|
|
||||||
if not os.path.exists(recipe.output_dir):
|
if not os.path.exists(recipe.output_dir):
|
||||||
@ -153,7 +145,7 @@ def run_recipe(opts, recipe_arg, parser, notification=None, handler=None):
|
|||||||
|
|
||||||
return recipe
|
return recipe
|
||||||
|
|
||||||
def main(args=sys.argv, notification=None, handler=None):
|
def main(args=sys.argv, notification=None):
|
||||||
p = option_parser()
|
p = option_parser()
|
||||||
opts, args = p.parse_args(args=args[1:])
|
opts, args = p.parse_args(args=args[1:])
|
||||||
|
|
||||||
@ -161,7 +153,7 @@ def main(args=sys.argv, notification=None, handler=None):
|
|||||||
p.print_help()
|
p.print_help()
|
||||||
return 1
|
return 1
|
||||||
recipe_arg = args[0] if len(args) > 0 else None
|
recipe_arg = args[0] if len(args) > 0 else None
|
||||||
run_recipe(opts, recipe_arg, p, notification=notification, handler=handler)
|
run_recipe(opts, recipe_arg, p, notification=notification)
|
||||||
|
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
@ -7,7 +7,7 @@ Defines various abstract base classes that can be subclassed to create powerful
|
|||||||
__docformat__ = "restructuredtext en"
|
__docformat__ = "restructuredtext en"
|
||||||
|
|
||||||
|
|
||||||
import logging, os, cStringIO, time, traceback, re, urlparse, sys
|
import os, time, traceback, re, urlparse, sys
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from functools import partial
|
from functools import partial
|
||||||
from contextlib import nested, closing
|
from contextlib import nested, closing
|
||||||
@ -27,6 +27,7 @@ from calibre.web.feeds import feed_from_xml, templates, feeds_from_index, Feed
|
|||||||
from calibre.web.fetch.simple import option_parser as web2disk_option_parser
|
from calibre.web.fetch.simple import option_parser as web2disk_option_parser
|
||||||
from calibre.web.fetch.simple import RecursiveFetcher
|
from calibre.web.fetch.simple import RecursiveFetcher
|
||||||
from calibre.utils.threadpool import WorkRequest, ThreadPool, NoResultsPending
|
from calibre.utils.threadpool import WorkRequest, ThreadPool, NoResultsPending
|
||||||
|
from calibre.utils.logging import Log
|
||||||
from calibre.ptempfile import PersistentTemporaryFile, \
|
from calibre.ptempfile import PersistentTemporaryFile, \
|
||||||
PersistentTemporaryDirectory
|
PersistentTemporaryDirectory
|
||||||
|
|
||||||
@ -423,7 +424,7 @@ class BasicNewsRecipe(object):
|
|||||||
'''
|
'''
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def get_obfuscated_article(self, url, logger):
|
def get_obfuscated_article(self, url):
|
||||||
'''
|
'''
|
||||||
If you set :member:`articles_are_obfuscated` this method is called with
|
If you set :member:`articles_are_obfuscated` this method is called with
|
||||||
every article URL. It should return the path to a file on the filesystem
|
every article URL. It should return the path to a file on the filesystem
|
||||||
@ -443,6 +444,7 @@ class BasicNewsRecipe(object):
|
|||||||
:param parser: Command line option parser. Used to intelligently merge options.
|
:param parser: Command line option parser. Used to intelligently merge options.
|
||||||
:param progress_reporter: A Callable that takes two arguments: progress (a number between 0 and 1) and a string message. The message should be optional.
|
:param progress_reporter: A Callable that takes two arguments: progress (a number between 0 and 1) and a string message. The message should be optional.
|
||||||
'''
|
'''
|
||||||
|
self.log = Log()
|
||||||
if not isinstance(self.title, unicode):
|
if not isinstance(self.title, unicode):
|
||||||
self.title = unicode(self.title, 'utf-8', 'replace')
|
self.title = unicode(self.title, 'utf-8', 'replace')
|
||||||
|
|
||||||
@ -455,7 +457,6 @@ class BasicNewsRecipe(object):
|
|||||||
|
|
||||||
|
|
||||||
if self.debug:
|
if self.debug:
|
||||||
logging.getLogger('feeds2disk').setLevel(logging.DEBUG)
|
|
||||||
self.verbose = True
|
self.verbose = True
|
||||||
self.report_progress = progress_reporter
|
self.report_progress = progress_reporter
|
||||||
|
|
||||||
@ -560,20 +561,20 @@ class BasicNewsRecipe(object):
|
|||||||
res = self.build_index()
|
res = self.build_index()
|
||||||
self.report_progress(1, _('Download finished'))
|
self.report_progress(1, _('Download finished'))
|
||||||
if self.failed_downloads:
|
if self.failed_downloads:
|
||||||
self.log_warning(_('Failed to download the following articles:'))
|
self.log.warning(_('Failed to download the following articles:'))
|
||||||
for feed, article, debug in self.failed_downloads:
|
for feed, article, debug in self.failed_downloads:
|
||||||
self.log_warning(article.title+_(' from ')+feed.title)
|
self.log.warning(article.title+_(' from ')+feed.title)
|
||||||
self.log_debug(article.url)
|
self.log.debug(article.url)
|
||||||
self.log_debug(debug)
|
self.log.debug(debug)
|
||||||
if self.partial_failures:
|
if self.partial_failures:
|
||||||
self.log_warning(_('Failed to download parts of the following articles:'))
|
self.log.warning(_('Failed to download parts of the following articles:'))
|
||||||
for feed, atitle, aurl, debug in self.partial_failures:
|
for feed, atitle, aurl, debug in self.partial_failures:
|
||||||
self.log_warning(atitle + _(' from ') + feed)
|
self.log.warning(atitle + _(' from ') + feed)
|
||||||
self.log_debug(aurl)
|
self.log.debug(aurl)
|
||||||
self.log_warning(_('\tFailed links:'))
|
self.log.warning(_('\tFailed links:'))
|
||||||
for l, tb in debug:
|
for l, tb in debug:
|
||||||
self.log_warning(l)
|
self.log.warning(l)
|
||||||
self.log_debug(tb)
|
self.log.debug(tb)
|
||||||
return res
|
return res
|
||||||
finally:
|
finally:
|
||||||
self.cleanup()
|
self.cleanup()
|
||||||
@ -636,20 +637,11 @@ class BasicNewsRecipe(object):
|
|||||||
extra_css=self.extra_css).render(doctype='xhtml')
|
extra_css=self.extra_css).render(doctype='xhtml')
|
||||||
|
|
||||||
|
|
||||||
def create_logger(self, feed_number, article_number):
|
def _fetch_article(self, url, dir, f, a, num_of_feeds):
|
||||||
logger = logging.getLogger('feeds2disk.article_%d_%d'%(feed_number, article_number))
|
|
||||||
out = cStringIO.StringIO()
|
|
||||||
handler = logging.StreamHandler(out)
|
|
||||||
handler.setFormatter(logging.Formatter('%(levelname)s: %(message)s'))
|
|
||||||
handler.setLevel(logging.INFO if self.verbose else logging.WARNING)
|
|
||||||
if self.debug:
|
|
||||||
handler.setLevel(logging.DEBUG)
|
|
||||||
logger.addHandler(handler)
|
|
||||||
return logger, out
|
|
||||||
|
|
||||||
def _fetch_article(self, url, dir, logger, f, a, num_of_feeds):
|
|
||||||
self.web2disk_options.browser = self.get_browser() if self.multithreaded_fetch else self.browser
|
self.web2disk_options.browser = self.get_browser() if self.multithreaded_fetch else self.browser
|
||||||
fetcher = RecursiveFetcher(self.web2disk_options, logger, self.image_map, self.css_map, (url, f, a, num_of_feeds))
|
fetcher = RecursiveFetcher(self.web2disk_options, self.log,
|
||||||
|
self.image_map, self.css_map,
|
||||||
|
(url, f, a, num_of_feeds))
|
||||||
fetcher.base_dir = dir
|
fetcher.base_dir = dir
|
||||||
fetcher.current_dir = dir
|
fetcher.current_dir = dir
|
||||||
fetcher.show_progress = False
|
fetcher.show_progress = False
|
||||||
@ -661,21 +653,21 @@ class BasicNewsRecipe(object):
|
|||||||
raise Exception(_('Could not fetch article. Run with --debug to see the reason'))
|
raise Exception(_('Could not fetch article. Run with --debug to see the reason'))
|
||||||
return res, path, failures
|
return res, path, failures
|
||||||
|
|
||||||
def fetch_article(self, url, dir, logger, f, a, num_of_feeds):
|
def fetch_article(self, url, dir, f, a, num_of_feeds):
|
||||||
return self._fetch_article(url, dir, logger, f, a, num_of_feeds)
|
return self._fetch_article(url, dir, f, a, num_of_feeds)
|
||||||
|
|
||||||
def fetch_obfuscated_article(self, url, dir, logger, f, a, num_of_feeds):
|
def fetch_obfuscated_article(self, url, dir, f, a, num_of_feeds):
|
||||||
path = os.path.abspath(self.get_obfuscated_article(url, logger))
|
path = os.path.abspath(self.get_obfuscated_article(url))
|
||||||
url = ('file:'+path) if iswindows else ('file://'+path)
|
url = ('file:'+path) if iswindows else ('file://'+path)
|
||||||
return self._fetch_article(url, dir, logger, f, a, num_of_feeds)
|
return self._fetch_article(url, dir, f, a, num_of_feeds)
|
||||||
|
|
||||||
def fetch_embedded_article(self, article, dir, logger, f, a, num_of_feeds):
|
def fetch_embedded_article(self, article, dir, f, a, num_of_feeds):
|
||||||
templ = templates.EmbeddedContent()
|
templ = templates.EmbeddedContent()
|
||||||
raw = templ.generate(article).render('html')
|
raw = templ.generate(article).render('html')
|
||||||
with PersistentTemporaryFile('_feeds2disk.html') as pt:
|
with PersistentTemporaryFile('_feeds2disk.html') as pt:
|
||||||
pt.write(raw)
|
pt.write(raw)
|
||||||
url = ('file:'+pt.name) if iswindows else ('file://'+pt.name)
|
url = ('file:'+pt.name) if iswindows else ('file://'+pt.name)
|
||||||
return self._fetch_article(url, dir, logger, f, a, num_of_feeds)
|
return self._fetch_article(url, dir, f, a, num_of_feeds)
|
||||||
|
|
||||||
|
|
||||||
def build_index(self):
|
def build_index(self):
|
||||||
@ -716,7 +708,6 @@ class BasicNewsRecipe(object):
|
|||||||
art_dir = os.path.join(feed_dir, 'article_%d'%a)
|
art_dir = os.path.join(feed_dir, 'article_%d'%a)
|
||||||
if not os.path.isdir(art_dir):
|
if not os.path.isdir(art_dir):
|
||||||
os.makedirs(art_dir)
|
os.makedirs(art_dir)
|
||||||
logger, stream = self.create_logger(f, a)
|
|
||||||
try:
|
try:
|
||||||
url = self.print_version(article.url)
|
url = self.print_version(article.url)
|
||||||
except NotImplementedError:
|
except NotImplementedError:
|
||||||
@ -726,10 +717,9 @@ class BasicNewsRecipe(object):
|
|||||||
func, arg = (self.fetch_embedded_article, article) if self.use_embedded_content else \
|
func, arg = (self.fetch_embedded_article, article) if self.use_embedded_content else \
|
||||||
((self.fetch_obfuscated_article if self.articles_are_obfuscated \
|
((self.fetch_obfuscated_article if self.articles_are_obfuscated \
|
||||||
else self.fetch_article), url)
|
else self.fetch_article), url)
|
||||||
req = WorkRequest(func, (arg, art_dir, logger, f, a, len(feed)),
|
req = WorkRequest(func, (arg, art_dir, f, a, len(feed)),
|
||||||
{}, (f, a), self.article_downloaded,
|
{}, (f, a), self.article_downloaded,
|
||||||
self.error_in_article_download)
|
self.error_in_article_download)
|
||||||
req.stream = stream
|
|
||||||
req.feed = feed
|
req.feed = feed
|
||||||
req.article = article
|
req.article = article
|
||||||
req.feed_dir = feed_dir
|
req.feed_dir = feed_dir
|
||||||
@ -768,8 +758,8 @@ class BasicNewsRecipe(object):
|
|||||||
cu = self.get_cover_url()
|
cu = self.get_cover_url()
|
||||||
except Exception, err:
|
except Exception, err:
|
||||||
cu = None
|
cu = None
|
||||||
self.log_error(_('Could not download cover: %s')%str(err))
|
self.log.error(_('Could not download cover: %s')%str(err))
|
||||||
self.log_debug(traceback.format_exc())
|
self.log.debug(traceback.format_exc())
|
||||||
if cu is not None:
|
if cu is not None:
|
||||||
ext = cu.rpartition('.')[-1]
|
ext = cu.rpartition('.')[-1]
|
||||||
if '?' in ext:
|
if '?' in ext:
|
||||||
@ -841,8 +831,8 @@ class BasicNewsRecipe(object):
|
|||||||
f.write(html.encode('utf-8'))
|
f.write(html.encode('utf-8'))
|
||||||
renderer = render_html(hf)
|
renderer = render_html(hf)
|
||||||
if renderer.tb is not None:
|
if renderer.tb is not None:
|
||||||
self.logger.warning('Failed to render default cover')
|
self.log.warning('Failed to render default cover')
|
||||||
self.logger.debug(renderer.tb)
|
self.log.debug(renderer.tb)
|
||||||
else:
|
else:
|
||||||
cover_file.write(renderer.data)
|
cover_file.write(renderer.data)
|
||||||
cover_file.flush()
|
cover_file.flush()
|
||||||
@ -863,7 +853,7 @@ class BasicNewsRecipe(object):
|
|||||||
manifest.append(os.path.join(dir, 'index.ncx'))
|
manifest.append(os.path.join(dir, 'index.ncx'))
|
||||||
cpath = getattr(self, 'cover_path', None)
|
cpath = getattr(self, 'cover_path', None)
|
||||||
if cpath is None:
|
if cpath is None:
|
||||||
pf = PersistentTemporaryFile('_recipe_cover.jpg')
|
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
|
||||||
self.default_cover(pf)
|
self.default_cover(pf)
|
||||||
cpath = pf.name
|
cpath = pf.name
|
||||||
if cpath is not None and os.access(cpath, os.R_OK):
|
if cpath is not None and os.access(cpath, os.R_OK):
|
||||||
@ -944,7 +934,7 @@ class BasicNewsRecipe(object):
|
|||||||
a = request.requestID[1]
|
a = request.requestID[1]
|
||||||
|
|
||||||
article = request.article
|
article = request.article
|
||||||
self.log_debug(_('\nDownloaded article %s from %s\n%s')%(article.title, article.url, request.stream.getvalue().decode('utf-8', 'ignore')))
|
self.log.debug(_('\nDownloaded article %s from %s')%(article.title, article.url))
|
||||||
article.orig_url = article.url
|
article.orig_url = article.url
|
||||||
article.url = 'article_%d/index.html'%a
|
article.url = 'article_%d/index.html'%a
|
||||||
article.downloaded = True
|
article.downloaded = True
|
||||||
@ -956,11 +946,11 @@ class BasicNewsRecipe(object):
|
|||||||
|
|
||||||
def error_in_article_download(self, request, traceback):
|
def error_in_article_download(self, request, traceback):
|
||||||
self.jobs_done += 1
|
self.jobs_done += 1
|
||||||
self.log_error(_('Failed to download article: %s from %s\n')%(request.article.title, request.article.url))
|
self.log.error(_('Failed to download article: %s from %s\n')%(request.article.title, request.article.url))
|
||||||
debug = request.stream.getvalue().decode('utf-8', 'ignore')
|
debug = request.stream.getvalue().decode('utf-8', 'ignore')
|
||||||
self.log_debug(debug)
|
self.log.debug(debug)
|
||||||
self.log_debug(traceback)
|
self.log.debug(traceback)
|
||||||
self.log_debug('\n')
|
self.log.debug('\n')
|
||||||
self.report_progress(float(self.jobs_done)/len(self.jobs), _('Article download failed: %s')%request.article.title)
|
self.report_progress(float(self.jobs_done)/len(self.jobs), _('Article download failed: %s')%request.article.title)
|
||||||
self.failed_downloads.append((request.feed, request.article, debug))
|
self.failed_downloads.append((request.feed, request.article, debug))
|
||||||
|
|
||||||
@ -990,7 +980,7 @@ class BasicNewsRecipe(object):
|
|||||||
feed.populate_from_preparsed_feed(msg, [])
|
feed.populate_from_preparsed_feed(msg, [])
|
||||||
feed.description = unicode(err)
|
feed.description = unicode(err)
|
||||||
parsed_feeds.append(feed)
|
parsed_feeds.append(feed)
|
||||||
self.log_exception(msg)
|
self.log.exception(msg)
|
||||||
|
|
||||||
|
|
||||||
return parsed_feeds
|
return parsed_feeds
|
||||||
@ -1033,6 +1023,28 @@ class BasicNewsRecipe(object):
|
|||||||
nmassage.extend(entity_replace)
|
nmassage.extend(entity_replace)
|
||||||
return BeautifulSoup(raw, markupMassage=nmassage)
|
return BeautifulSoup(raw, markupMassage=nmassage)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def adeify_images(cls, soup):
|
||||||
|
'''
|
||||||
|
If your recipe when converted to EPUB has problems with images when
|
||||||
|
viewed in Adobe Digital Editions, call this method from within
|
||||||
|
:method:`postprocess_html`.
|
||||||
|
'''
|
||||||
|
for item in soup.findAll('img'):
|
||||||
|
for attrib in ['height','width','border','align','style']:
|
||||||
|
if item.has_key(attrib):
|
||||||
|
del item[attrib]
|
||||||
|
oldParent = item.parent
|
||||||
|
myIndex = oldParent.contents.index(item)
|
||||||
|
item.extract()
|
||||||
|
divtag = Tag(soup,'div')
|
||||||
|
brtag = Tag(soup,'br')
|
||||||
|
oldParent.insert(myIndex,divtag)
|
||||||
|
divtag.append(item)
|
||||||
|
divtag.append(brtag)
|
||||||
|
return soup
|
||||||
|
|
||||||
|
|
||||||
class CustomIndexRecipe(BasicNewsRecipe):
|
class CustomIndexRecipe(BasicNewsRecipe):
|
||||||
|
|
||||||
def custom_index(self):
|
def custom_index(self):
|
||||||
@ -1057,7 +1069,7 @@ class CustomIndexRecipe(BasicNewsRecipe):
|
|||||||
index = os.path.abspath(self.custom_index())
|
index = os.path.abspath(self.custom_index())
|
||||||
url = 'file:'+index if iswindows else 'file://'+index
|
url = 'file:'+index if iswindows else 'file://'+index
|
||||||
self.web2disk_options.browser = self.browser
|
self.web2disk_options.browser = self.browser
|
||||||
fetcher = RecursiveFetcher(self.web2disk_options, self.logger)
|
fetcher = RecursiveFetcher(self.web2disk_options, self.log)
|
||||||
fetcher.base_dir = self.output_dir
|
fetcher.base_dir = self.output_dir
|
||||||
fetcher.current_dir = self.output_dir
|
fetcher.current_dir = self.output_dir
|
||||||
fetcher.show_progress = False
|
fetcher.show_progress = False
|
||||||
@ -1069,7 +1081,7 @@ class AutomaticNewsRecipe(BasicNewsRecipe):
|
|||||||
|
|
||||||
keep_only_tags = [dict(name=['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'])]
|
keep_only_tags = [dict(name=['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'])]
|
||||||
|
|
||||||
def fetch_embedded_article(self, article, dir, logger, f, a, num_of_feeds):
|
def fetch_embedded_article(self, article, dir, f, a, num_of_feeds):
|
||||||
if self.use_embedded_content:
|
if self.use_embedded_content:
|
||||||
self.web2disk_options.keep_only_tags = []
|
self.web2disk_options.keep_only_tags = []
|
||||||
return BasicNewsRecipe.fetch_embedded_article(self, article, dir, logger, f, a, num_of_feeds)
|
return BasicNewsRecipe.fetch_embedded_article(self, article, dir, f, a, num_of_feeds)
|
||||||
|
@ -8,7 +8,7 @@ recipe_modules = ['recipe_' + r for r in (
|
|||||||
'newsweek', 'atlantic', 'economist', 'portfolio', 'the_register',
|
'newsweek', 'atlantic', 'economist', 'portfolio', 'the_register',
|
||||||
'usatoday', 'outlook_india', 'bbc', 'greader', 'wsj',
|
'usatoday', 'outlook_india', 'bbc', 'greader', 'wsj',
|
||||||
'wired', 'globe_and_mail', 'smh', 'espn', 'business_week', 'miami_herald',
|
'wired', 'globe_and_mail', 'smh', 'espn', 'business_week', 'miami_herald',
|
||||||
'ars_technica', 'upi', 'new_yorker', 'irish_times', 'iht', 'lanacion',
|
'ars_technica', 'upi', 'new_yorker', 'irish_times', 'lanacion',
|
||||||
'discover_magazine', 'scientific_american', 'new_york_review_of_books',
|
'discover_magazine', 'scientific_american', 'new_york_review_of_books',
|
||||||
'daily_telegraph', 'guardian', 'el_pais', 'new_scientist', 'b92',
|
'daily_telegraph', 'guardian', 'el_pais', 'new_scientist', 'b92',
|
||||||
'politika', 'moscow_times', 'latimes', 'japan_times', 'san_fran_chronicle',
|
'politika', 'moscow_times', 'latimes', 'japan_times', 'san_fran_chronicle',
|
||||||
@ -37,7 +37,8 @@ recipe_modules = ['recipe_' + r for r in (
|
|||||||
'new_york_review_of_books_no_sub', 'politico', 'adventuregamers',
|
'new_york_review_of_books_no_sub', 'politico', 'adventuregamers',
|
||||||
'mondedurable', 'instapaper', 'dnevnik_cro', 'vecernji_list',
|
'mondedurable', 'instapaper', 'dnevnik_cro', 'vecernji_list',
|
||||||
'nacional_cro', '24sata', 'dnevni_avaz', 'glas_srpske', '24sata_rs',
|
'nacional_cro', '24sata', 'dnevni_avaz', 'glas_srpske', '24sata_rs',
|
||||||
'krstarica', 'krstarica_en', 'tanjug', 'laprensa_ni',
|
'krstarica', 'krstarica_en', 'tanjug', 'laprensa_ni', 'azstarnet',
|
||||||
|
'corriere_della_sera_it', 'corriere_della_sera_en', 'msdnmag_en',
|
||||||
)]
|
)]
|
||||||
|
|
||||||
import re, imp, inspect, time, os
|
import re, imp, inspect, time, os
|
||||||
|
@ -9,6 +9,7 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||||
|
|
||||||
class Ser24Sata(BasicNewsRecipe):
|
class Ser24Sata(BasicNewsRecipe):
|
||||||
title = '24 Sata - Sr'
|
title = '24 Sata - Sr'
|
||||||
@ -39,14 +40,30 @@ class Ser24Sata(BasicNewsRecipe):
|
|||||||
|
|
||||||
feeds = [(u'Vesti Dana', u'http://www.24sata.rs/rss.php')]
|
feeds = [(u'Vesti Dana', u'http://www.24sata.rs/rss.php')]
|
||||||
|
|
||||||
|
def cleanup_image_tags(self,soup):
|
||||||
|
for item in soup.findAll('img'):
|
||||||
|
for attrib in ['height','width','border','align']:
|
||||||
|
if item.has_key(attrib):
|
||||||
|
del item[attrib]
|
||||||
|
oldParent = item.parent
|
||||||
|
myIndex = oldParent.contents.index(item)
|
||||||
|
item.extract()
|
||||||
|
divtag = Tag(soup,'div')
|
||||||
|
brtag = Tag(soup,'br')
|
||||||
|
oldParent.insert(myIndex,divtag)
|
||||||
|
divtag.append(item)
|
||||||
|
divtag.append(brtag)
|
||||||
|
return soup
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
soup.html['xml:lang'] = 'sr-Latn-RS'
|
soup.html['xml:lang'] = 'sr-Latn-RS'
|
||||||
soup.html['lang'] = 'sr-Latn-RS'
|
soup.html['lang'] = 'sr-Latn-RS'
|
||||||
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
|
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
|
||||||
soup.head.insert(0,mtag)
|
soup.head.insert(0,mtag)
|
||||||
return soup
|
return self.cleanup_image_tags(soup)
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
article, sep, rest = url.partition('#')
|
article, sep, rest = url.partition('#')
|
||||||
return article.replace('/show.php','/_print.php')
|
article_base, sep2, article_id = article.partition('id=')
|
||||||
|
return 'http://www.24sata.co.rs/_print.php?id=' + article_id
|
||||||
|
|
||||||
|
63
src/calibre/web/feeds/recipes/recipe_azstarnet.py
Normal file
63
src/calibre/web/feeds/recipes/recipe_azstarnet.py
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
www.azstarnet.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Azstarnet(BasicNewsRecipe):
|
||||||
|
title = 'Arizona Daily Star'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'news from Arizona'
|
||||||
|
publisher = 'azstarnet.com'
|
||||||
|
category = 'news, politics, Arizona, USA'
|
||||||
|
delay = 1
|
||||||
|
oldest_article = 1
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
encoding = 'utf-8'
|
||||||
|
needs_subscription = True
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
|
html2lrf_options = [
|
||||||
|
'--comment', description
|
||||||
|
, '--category', category
|
||||||
|
, '--publisher', publisher
|
||||||
|
]
|
||||||
|
|
||||||
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
|
def get_browser(self):
|
||||||
|
br = BasicNewsRecipe.get_browser()
|
||||||
|
if self.username is not None and self.password is not None:
|
||||||
|
br.open('http://azstarnet.com/registration/retro.php')
|
||||||
|
br.select_form(nr=1)
|
||||||
|
br['email'] = self.username
|
||||||
|
br['pass' ] = self.password
|
||||||
|
br.submit()
|
||||||
|
return br
|
||||||
|
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'id':'storycontent'})]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name=['object','link','iframe','base','img'])
|
||||||
|
,dict(name='div',attrs={'class':'bannerinstory'})
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [(u'Tucson Region', u'http://rss.azstarnet.com/index.php?site=metro')]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
soup.html['dir' ] = 'ltr'
|
||||||
|
soup.html['lang'] = 'en-US'
|
||||||
|
mtag = '\n<meta http-equiv="Content-Language" content="en-US"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n'
|
||||||
|
soup.head.insert(0,mtag)
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return soup
|
||||||
|
|
@ -8,11 +8,12 @@ blic.rs
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||||
|
|
||||||
class Blic(BasicNewsRecipe):
|
class Blic(BasicNewsRecipe):
|
||||||
title = u'Blic'
|
title = 'Blic'
|
||||||
__author__ = u'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = u'Blic.co.yu online verzija najtiraznije novine u Srbiji donosi najnovije vesti iz Srbije i sveta, komentare, politicke analize, poslovne i ekonomske vesti, vesti iz regiona, intervjue, informacije iz kulture, reportaze, pokriva sve sportske dogadjaje, detaljan tv program, nagradne igre, zabavu, fenomenalni Blic strip, dnevni horoskop, arhivu svih dogadjaja'
|
description = 'Blic.co.yu online verzija najtiraznije novine u Srbiji donosi najnovije vesti iz Srbije i sveta, komentare, politicke analize, poslovne i ekonomske vesti, vesti iz regiona, intervjue, informacije iz kulture, reportaze, pokriva sve sportske dogadjaje, detaljan tv program, nagradne igre, zabavu, fenomenalni Blic strip, dnevni horoskop, arhivu svih dogadjaja'
|
||||||
publisher = 'RINGIER d.o.o.'
|
publisher = 'RINGIER d.o.o.'
|
||||||
category = 'news, politics, Serbia'
|
category = 'news, politics, Serbia'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
@ -30,7 +31,7 @@ class Blic(BasicNewsRecipe):
|
|||||||
, '--ignore-tables'
|
, '--ignore-tables'
|
||||||
]
|
]
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} "'
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
@ -44,10 +45,26 @@ class Blic(BasicNewsRecipe):
|
|||||||
start_url, question, rest_url = url.partition('?')
|
start_url, question, rest_url = url.partition('?')
|
||||||
return u'http://www.blic.rs/_print.php?' + rest_url
|
return u'http://www.blic.rs/_print.php?' + rest_url
|
||||||
|
|
||||||
|
def cleanup_image_tags(self,soup):
|
||||||
|
for item in soup.findAll('img'):
|
||||||
|
for attrib in ['height','width','border','align']:
|
||||||
|
if item.has_key(attrib):
|
||||||
|
del item[attrib]
|
||||||
|
oldParent = item.parent
|
||||||
|
myIndex = oldParent.contents.index(item)
|
||||||
|
item.extract()
|
||||||
|
divtag = Tag(soup,'div')
|
||||||
|
brtag = Tag(soup,'br')
|
||||||
|
oldParent.insert(myIndex,divtag)
|
||||||
|
divtag.append(item)
|
||||||
|
divtag.append(brtag)
|
||||||
|
return soup
|
||||||
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
|
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
|
||||||
soup.head.insert(0,mtag)
|
soup.head.insert(0,mtag)
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
return soup
|
return self.cleanup_image_tags(soup)
|
||||||
|
|
@ -0,0 +1,45 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
www.corriere.it/english
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Corriere_en(BasicNewsRecipe):
|
||||||
|
title = 'Corriere della Sera in English'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'News from Milan and Italy'
|
||||||
|
oldest_article = 15
|
||||||
|
publisher = 'Corriere della Sera'
|
||||||
|
category = 'news, politics, Italy'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
encoding = 'cp1252'
|
||||||
|
remove_javascript = True
|
||||||
|
language = _('English')
|
||||||
|
|
||||||
|
html2lrf_options = [
|
||||||
|
'--comment', description
|
||||||
|
, '--category', category
|
||||||
|
, '--publisher', publisher
|
||||||
|
, '--ignore-tables'
|
||||||
|
]
|
||||||
|
|
||||||
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'class':['news-dettaglio article','article']})]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name=['base','object','link','embed','img'])
|
||||||
|
,dict(name='div', attrs={'class':'news-goback'})
|
||||||
|
,dict(name='ul', attrs={'class':'toolbar'})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags_after = dict(name='p', attrs={'class':'footnotes'})
|
||||||
|
|
||||||
|
feeds = [(u'Italian Life', u'http://www.corriere.it/rss/english.xml')]
|
||||||
|
|
@ -0,0 +1,55 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
www.corriere.it
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
class Corriere_it(BasicNewsRecipe):
|
||||||
|
title = 'Corriere della Sera'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'News from Milan and Italy'
|
||||||
|
oldest_article = 7
|
||||||
|
publisher = 'Corriere della Sera'
|
||||||
|
category = 'news, politics, Italy'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
encoding = 'cp1252'
|
||||||
|
remove_javascript = True
|
||||||
|
language = _('Italian')
|
||||||
|
|
||||||
|
html2lrf_options = [
|
||||||
|
'--comment', description
|
||||||
|
, '--category', category
|
||||||
|
, '--publisher', publisher
|
||||||
|
, '--ignore-tables'
|
||||||
|
]
|
||||||
|
|
||||||
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'class':['news-dettaglio article','article']})]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name=['base','object','link','embed','img'])
|
||||||
|
,dict(name='div', attrs={'class':'news-goback'})
|
||||||
|
,dict(name='ul', attrs={'class':'toolbar'})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags_after = dict(name='p', attrs={'class':'footnotes'})
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Ultimora' , u'http://www.corriere.it/rss/ultimora.xml' )
|
||||||
|
,(u'Cronache' , u'http://www.corriere.it/rss/cronache.xml' )
|
||||||
|
,(u'Economia' , u'http://www.corriere.it/rss/economia.xml' )
|
||||||
|
,(u'Editoriali', u'http://www.corriere.it/rss/editoriali.xml')
|
||||||
|
,(u'Esteri' , u'http://www.corriere.it/rss/esteri.xml' )
|
||||||
|
,(u'Politica' , u'http://www.corriere.it/rss/politica.xml' )
|
||||||
|
,(u'Salute' , u'http://www.corriere.it/rss/salute.xml' )
|
||||||
|
,(u'Scienze' , u'http://www.corriere.it/rss/scienze.xml' )
|
||||||
|
,(u'Spettacolo', u'http://www.corriere.it/rss/spettacoli.xml')
|
||||||
|
,(u'Sport' , u'http://www.corriere.it/rss/sport.xml' )
|
||||||
|
]
|
||||||
|
|
61
src/calibre/web/feeds/recipes/recipe_msdnmag_en.py
Normal file
61
src/calibre/web/feeds/recipes/recipe_msdnmag_en.py
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
msdn.microsoft.com/en-us/magazine
|
||||||
|
'''
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class MSDNMagazine_en(BasicNewsRecipe):
|
||||||
|
title = 'MSDN Magazine'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'The Microsoft Journal for Developers'
|
||||||
|
publisher = 'Microsoft Press'
|
||||||
|
category = 'news, IT, Microsoft, programming, windows'
|
||||||
|
oldest_article = 31
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
encoding = 'utf-8'
|
||||||
|
remove_javascript = True
|
||||||
|
current_issue = 'http://msdn.microsoft.com/en-us/magazine/default.aspx'
|
||||||
|
language = _('English')
|
||||||
|
|
||||||
|
html2lrf_options = [
|
||||||
|
'--comment', description
|
||||||
|
, '--category', category
|
||||||
|
, '--publisher', publisher
|
||||||
|
]
|
||||||
|
|
||||||
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
|
feeds = [(u'Articles', u'http://msdn.microsoft.com/en-us/magazine/rss/default.aspx?z=z&iss=1')]
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'class':'topic'})]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name=['object','link','base','table'])
|
||||||
|
,dict(name='div', attrs={'class':'MTPS_CollapsibleRegion'})
|
||||||
|
]
|
||||||
|
|
||||||
|
def get_cover_url(self):
|
||||||
|
cover_url = None
|
||||||
|
soup = self.index_to_soup(self.current_issue)
|
||||||
|
link_item = soup.find('span',attrs={'class':'ContentsImageSpacer'})
|
||||||
|
if link_item:
|
||||||
|
imgt = link_item.find('img')
|
||||||
|
if imgt:
|
||||||
|
cover_url = imgt['src']
|
||||||
|
return cover_url
|
||||||
|
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll('div',attrs={'class':['FeatureSmallHead','ColumnTypeSubTitle']}):
|
||||||
|
item.name="h2"
|
||||||
|
for item in soup.findAll('div',attrs={'class':['FeatureHeadline','ColumnTypeTitle']}):
|
||||||
|
item.name="h1"
|
||||||
|
for item in soup.findAll('div',attrs={'class':'ArticleTypeTitle'}):
|
||||||
|
item.name="h3"
|
||||||
|
return soup
|
||||||
|
|
@ -9,6 +9,7 @@ newyorker.com
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class NewYorker(BasicNewsRecipe):
|
class NewYorker(BasicNewsRecipe):
|
||||||
|
|
||||||
title = u'The New Yorker'
|
title = u'The New Yorker'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'The best of US journalism'
|
description = 'The best of US journalism'
|
||||||
@ -41,3 +42,12 @@ class NewYorker(BasicNewsRecipe):
|
|||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url + '?printable=true'
|
return url + '?printable=true'
|
||||||
|
|
||||||
|
def postprocess_html(self, soup, x):
|
||||||
|
body = soup.find('body')
|
||||||
|
if body:
|
||||||
|
html = soup.find('html')
|
||||||
|
if html:
|
||||||
|
body.extract()
|
||||||
|
html.insert(-1, body)
|
||||||
|
return soup
|
||||||
|
@ -8,9 +8,10 @@ nspm.rs
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||||
|
|
||||||
class Nspm(BasicNewsRecipe):
|
class Nspm(BasicNewsRecipe):
|
||||||
title = u'Nova srpska politicka misao'
|
title = 'Nova srpska politicka misao'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Casopis za politicku teoriju i drustvena istrazivanja'
|
description = 'Casopis za politicku teoriju i drustvena istrazivanja'
|
||||||
publisher = 'NSPM'
|
publisher = 'NSPM'
|
||||||
@ -36,7 +37,7 @@ class Nspm(BasicNewsRecipe):
|
|||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name=['a','img','link','object','embed'])
|
dict(name=['link','object','embed'])
|
||||||
,dict(name='td', attrs={'class':'buttonheading'})
|
,dict(name='td', attrs={'class':'buttonheading'})
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -50,6 +51,21 @@ class Nspm(BasicNewsRecipe):
|
|||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url.replace('.html','/stampa.html')
|
return url.replace('.html','/stampa.html')
|
||||||
|
|
||||||
|
def cleanup_image_tags(self,soup):
|
||||||
|
for item in soup.findAll('img'):
|
||||||
|
for attrib in ['height','width','border','align']:
|
||||||
|
if item.has_key(attrib):
|
||||||
|
del item[attrib]
|
||||||
|
oldParent = item.parent
|
||||||
|
myIndex = oldParent.contents.index(item)
|
||||||
|
item.extract()
|
||||||
|
divtag = Tag(soup,'div')
|
||||||
|
brtag = Tag(soup,'br')
|
||||||
|
oldParent.insert(myIndex,divtag)
|
||||||
|
divtag.append(item)
|
||||||
|
divtag.append(brtag)
|
||||||
|
return soup
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
lng = 'sr-Latn-RS'
|
lng = 'sr-Latn-RS'
|
||||||
soup.html['xml:lang'] = lng
|
soup.html['xml:lang'] = lng
|
||||||
@ -59,4 +75,4 @@ class Nspm(BasicNewsRecipe):
|
|||||||
ftag['content'] = lng
|
ftag['content'] = lng
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
return soup
|
return self.cleanup_image_tags(soup)
|
||||||
|
@ -1,38 +1,47 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
tomshardware.com
|
tomshardware.com/us
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
import urllib
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
class Tomshardware(BasicNewsRecipe):
|
class Tomshardware(BasicNewsRecipe):
|
||||||
|
|
||||||
title = "Tom's Hardware US"
|
title = "Tom's Hardware US"
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Hardware reviews and News'
|
description = 'Hardware reviews and News'
|
||||||
|
publisher = "Tom's Hardware"
|
||||||
|
category = 'news, IT, hardware, USA'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
needs_subscription = True
|
needs_subscription = True
|
||||||
language = _('English')
|
language = _('English')
|
||||||
INDEX = 'http://www.tomshardware.com'
|
INDEX = 'http://www.tomshardware.com'
|
||||||
LOGIN = 'http://www.tomshardware.com/membres/?r=%2Fus%2F#loginForm'
|
LOGIN = INDEX + '/membres/'
|
||||||
cover_url = 'http://img.bestofmedia.com/img/tomshardware/design/tomshardware.jpg'
|
remove_javascript = True
|
||||||
|
use_embedded_content= False
|
||||||
|
|
||||||
html2lrf_options = [ '--comment' , description
|
html2lrf_options = [
|
||||||
, '--category' , 'hardware,news'
|
'--comment', description
|
||||||
, '--base-font-size', '10'
|
, '--category', category
|
||||||
|
, '--publisher', publisher
|
||||||
]
|
]
|
||||||
|
|
||||||
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
def get_browser(self):
|
def get_browser(self):
|
||||||
br = BasicNewsRecipe.get_browser()
|
br = BasicNewsRecipe.get_browser()
|
||||||
|
br.open(self.INDEX+'/us/')
|
||||||
if self.username is not None and self.password is not None:
|
if self.username is not None and self.password is not None:
|
||||||
br.open(self.LOGIN)
|
data = urllib.urlencode({ 'action':'login_action'
|
||||||
br.select_form(name='connexion')
|
,'r':self.INDEX+'/us/'
|
||||||
br['login'] = self.username
|
,'login':self.username
|
||||||
br['mdp' ] = self.password
|
,'mdp':self.password
|
||||||
br.submit()
|
})
|
||||||
|
br.open(self.LOGIN,data)
|
||||||
return br
|
return br
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
@ -49,10 +58,22 @@ class Tomshardware(BasicNewsRecipe):
|
|||||||
main, sep, rest = url.rpartition('.html')
|
main, sep, rest = url.rpartition('.html')
|
||||||
rmain, rsep, article_id = main.rpartition(',')
|
rmain, rsep, article_id = main.rpartition(',')
|
||||||
tmain, tsep, trest = rmain.rpartition('/reviews/')
|
tmain, tsep, trest = rmain.rpartition('/reviews/')
|
||||||
|
rind = 'http://www.tomshardware.com/news_print.php?p1='
|
||||||
if tsep:
|
if tsep:
|
||||||
return 'http://www.tomshardware.com/review_print.php?p1=' + article_id
|
rind = 'http://www.tomshardware.com/review_print.php?p1='
|
||||||
return 'http://www.tomshardware.com/news_print.php?p1=' + article_id
|
return rind + article_id
|
||||||
|
|
||||||
|
def cleanup_image_tags(self,soup):
|
||||||
|
for item in soup.findAll('img'):
|
||||||
|
for attrib in ['height','width','border','align']:
|
||||||
|
if item.has_key(attrib):
|
||||||
|
del item[attrib]
|
||||||
|
return soup
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
del(soup.body['onload'])
|
del(soup.body['onload'])
|
||||||
return soup
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
for it in soup.findAll('span'):
|
||||||
|
it.name="div"
|
||||||
|
return self.cleanup_image_tags(soup)
|
||||||
|
@ -17,7 +17,7 @@ class WashingtonPost(BasicNewsRecipe):
|
|||||||
|
|
||||||
feeds = [ ('Today\'s Highlights', 'http://www.washingtonpost.com/wp-dyn/rss/linkset/2005/03/24/LI2005032400102.xml'),
|
feeds = [ ('Today\'s Highlights', 'http://www.washingtonpost.com/wp-dyn/rss/linkset/2005/03/24/LI2005032400102.xml'),
|
||||||
('Politics', 'http://www.washingtonpost.com/wp-dyn/rss/politics/index.xml'),
|
('Politics', 'http://www.washingtonpost.com/wp-dyn/rss/politics/index.xml'),
|
||||||
('Nation', 'http://www.www.washingtonpost.com/wp-dyn/rss/nation/index.xml'),
|
('Nation', 'http://www.washingtonpost.com/wp-dyn/rss/nation/index.xml'),
|
||||||
('World', 'http://www.washingtonpost.com/wp-dyn/rss/world/index.xml'),
|
('World', 'http://www.washingtonpost.com/wp-dyn/rss/world/index.xml'),
|
||||||
('Business', 'http://www.washingtonpost.com/wp-dyn/rss/business/index.xml'),
|
('Business', 'http://www.washingtonpost.com/wp-dyn/rss/business/index.xml'),
|
||||||
('Technology', 'http://www.washingtonpost.com/wp-dyn/rss/technology/index.xml'),
|
('Technology', 'http://www.washingtonpost.com/wp-dyn/rss/technology/index.xml'),
|
||||||
|
@ -7,18 +7,19 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
|||||||
Fetch a webpage and its links recursively. The webpages are saved to disk in
|
Fetch a webpage and its links recursively. The webpages are saved to disk in
|
||||||
UTF-8 encoding with any charset declarations removed.
|
UTF-8 encoding with any charset declarations removed.
|
||||||
'''
|
'''
|
||||||
import sys, socket, os, urlparse, logging, re, time, copy, urllib2, threading, traceback
|
import sys, socket, os, urlparse, re, time, copy, urllib2, threading, traceback
|
||||||
from urllib import url2pathname, quote
|
from urllib import url2pathname, quote
|
||||||
from threading import RLock
|
from threading import RLock
|
||||||
from httplib import responses
|
from httplib import responses
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
from cStringIO import StringIO
|
from cStringIO import StringIO
|
||||||
|
|
||||||
from calibre import setup_cli_handlers, browser, sanitize_file_name, \
|
from calibre import browser, sanitize_file_name, \
|
||||||
relpath, unicode_path
|
relpath, unicode_path
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||||
from calibre.ebooks.chardet import xml_to_unicode
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
from calibre.utils.config import OptionParser
|
from calibre.utils.config import OptionParser
|
||||||
|
from calibre.utils.logging import Log
|
||||||
|
|
||||||
class FetchError(Exception):
|
class FetchError(Exception):
|
||||||
pass
|
pass
|
||||||
@ -92,10 +93,11 @@ class RecursiveFetcher(object):
|
|||||||
default_timeout = socket.getdefaulttimeout() # Needed here as it is used in __del__
|
default_timeout = socket.getdefaulttimeout() # Needed here as it is used in __del__
|
||||||
DUMMY_LOCK = DummyLock()
|
DUMMY_LOCK = DummyLock()
|
||||||
|
|
||||||
def __init__(self, options, logger, image_map={}, css_map={}, job_info=None):
|
def __init__(self, options, log, image_map={}, css_map={}, job_info=None):
|
||||||
self.base_dir = os.path.abspath(os.path.expanduser(options.dir))
|
self.base_dir = os.path.abspath(os.path.expanduser(options.dir))
|
||||||
if not os.path.exists(self.base_dir):
|
if not os.path.exists(self.base_dir):
|
||||||
os.makedirs(self.base_dir)
|
os.makedirs(self.base_dir)
|
||||||
|
self.log = log
|
||||||
self.default_timeout = socket.getdefaulttimeout()
|
self.default_timeout = socket.getdefaulttimeout()
|
||||||
socket.setdefaulttimeout(options.timeout)
|
socket.setdefaulttimeout(options.timeout)
|
||||||
self.verbose = options.verbose
|
self.verbose = options.verbose
|
||||||
@ -174,12 +176,15 @@ class RecursiveFetcher(object):
|
|||||||
|
|
||||||
def fetch_url(self, url):
|
def fetch_url(self, url):
|
||||||
data = None
|
data = None
|
||||||
self.log_debug('Fetching %s', url)
|
self.log.debug('Fetching', url)
|
||||||
delta = time.time() - self.last_fetch_at
|
delta = time.time() - self.last_fetch_at
|
||||||
if delta < self.delay:
|
if delta < self.delay:
|
||||||
time.sleep(delta)
|
time.sleep(delta)
|
||||||
if re.search(r'\s+', url) is not None:
|
if re.search(r'\s+|,', url) is not None:
|
||||||
url = quote(url)
|
purl = list(urlparse.urlparse(url))
|
||||||
|
for i in range(2, 6):
|
||||||
|
purl[i] = quote(purl[i])
|
||||||
|
url = urlparse.urlunparse(purl)
|
||||||
with self.browser_lock:
|
with self.browser_lock:
|
||||||
try:
|
try:
|
||||||
with closing(self.browser.open(url)) as f:
|
with closing(self.browser.open(url)) as f:
|
||||||
@ -190,7 +195,7 @@ class RecursiveFetcher(object):
|
|||||||
raise FetchError, responses[err.code]
|
raise FetchError, responses[err.code]
|
||||||
if getattr(err, 'reason', [0])[0] == 104 or \
|
if getattr(err, 'reason', [0])[0] == 104 or \
|
||||||
getattr(getattr(err, 'args', [None])[0], 'errno', None) == -2: # Connection reset by peer or Name or service not know
|
getattr(getattr(err, 'args', [None])[0], 'errno', None) == -2: # Connection reset by peer or Name or service not know
|
||||||
self.log_debug('Temporary error, retrying in 1 second')
|
self.log.debug('Temporary error, retrying in 1 second')
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
with closing(self.browser.open(url)) as f:
|
with closing(self.browser.open(url)) as f:
|
||||||
data = response(f.read()+f.read())
|
data = response(f.read()+f.read())
|
||||||
@ -204,9 +209,9 @@ class RecursiveFetcher(object):
|
|||||||
|
|
||||||
def start_fetch(self, url):
|
def start_fetch(self, url):
|
||||||
soup = BeautifulSoup(u'<a href="'+url+'" />')
|
soup = BeautifulSoup(u'<a href="'+url+'" />')
|
||||||
self.log_info('Downloading')
|
self.log.debug('Downloading')
|
||||||
res = self.process_links(soup, url, 0, into_dir='')
|
res = self.process_links(soup, url, 0, into_dir='')
|
||||||
self.log_info('%s saved to %s', url, res)
|
self.log.debug('%s saved to %s'%( url, res))
|
||||||
return res
|
return res
|
||||||
|
|
||||||
def is_link_ok(self, url):
|
def is_link_ok(self, url):
|
||||||
@ -243,8 +248,7 @@ class RecursiveFetcher(object):
|
|||||||
try:
|
try:
|
||||||
data = self.fetch_url(iurl)
|
data = self.fetch_url(iurl)
|
||||||
except Exception, err:
|
except Exception, err:
|
||||||
self.log_debug('Could not fetch stylesheet %s', iurl)
|
self.log.exception('Could not fetch stylesheet %s'% iurl)
|
||||||
self.log_debug('Error: %s', str(err), exc_info=True)
|
|
||||||
continue
|
continue
|
||||||
stylepath = os.path.join(diskpath, 'style'+str(c)+'.css')
|
stylepath = os.path.join(diskpath, 'style'+str(c)+'.css')
|
||||||
with self.stylemap_lock:
|
with self.stylemap_lock:
|
||||||
@ -267,8 +271,7 @@ class RecursiveFetcher(object):
|
|||||||
try:
|
try:
|
||||||
data = self.fetch_url(iurl)
|
data = self.fetch_url(iurl)
|
||||||
except Exception, err:
|
except Exception, err:
|
||||||
self.log_warning('Could not fetch stylesheet %s', iurl)
|
self.log.exception('Could not fetch stylesheet %s'% iurl)
|
||||||
self.log_debug('Error: %s', str(err), exc_info=True)
|
|
||||||
continue
|
continue
|
||||||
c += 1
|
c += 1
|
||||||
stylepath = os.path.join(diskpath, 'style'+str(c)+'.css')
|
stylepath = os.path.join(diskpath, 'style'+str(c)+'.css')
|
||||||
@ -291,9 +294,6 @@ class RecursiveFetcher(object):
|
|||||||
iurl = self.image_url_processor(baseurl, iurl)
|
iurl = self.image_url_processor(baseurl, iurl)
|
||||||
ext = os.path.splitext(iurl)[1]
|
ext = os.path.splitext(iurl)[1]
|
||||||
ext = ext[:5]
|
ext = ext[:5]
|
||||||
#if not ext:
|
|
||||||
# self.log_debug('Skipping extensionless image %s', iurl)
|
|
||||||
# continue
|
|
||||||
if not urlparse.urlsplit(iurl).scheme:
|
if not urlparse.urlsplit(iurl).scheme:
|
||||||
iurl = urlparse.urljoin(baseurl, iurl, False)
|
iurl = urlparse.urljoin(baseurl, iurl, False)
|
||||||
with self.imagemap_lock:
|
with self.imagemap_lock:
|
||||||
@ -303,8 +303,7 @@ class RecursiveFetcher(object):
|
|||||||
try:
|
try:
|
||||||
data = self.fetch_url(iurl)
|
data = self.fetch_url(iurl)
|
||||||
except Exception, err:
|
except Exception, err:
|
||||||
self.log_warning('Could not fetch image %s', iurl)
|
self.log.exception('Could not fetch image %s'% iurl)
|
||||||
self.log_debug('Error: %s', str(err), exc_info=True)
|
|
||||||
continue
|
continue
|
||||||
c += 1
|
c += 1
|
||||||
fname = sanitize_file_name('img'+str(c)+ext)
|
fname = sanitize_file_name('img'+str(c)+ext)
|
||||||
@ -330,10 +329,10 @@ class RecursiveFetcher(object):
|
|||||||
if not parts.scheme:
|
if not parts.scheme:
|
||||||
iurl = urlparse.urljoin(baseurl, iurl, False)
|
iurl = urlparse.urljoin(baseurl, iurl, False)
|
||||||
if not self.is_link_ok(iurl):
|
if not self.is_link_ok(iurl):
|
||||||
self.log_debug('Skipping invalid link: %s', iurl)
|
self.log.debug('Skipping invalid link:', iurl)
|
||||||
return None
|
return None
|
||||||
if filter and not self.is_link_wanted(iurl):
|
if filter and not self.is_link_wanted(iurl):
|
||||||
self.log_debug('Filtered link: '+iurl)
|
self.log.debug('Filtered link: '+iurl)
|
||||||
return None
|
return None
|
||||||
return iurl
|
return iurl
|
||||||
|
|
||||||
@ -401,7 +400,7 @@ class RecursiveFetcher(object):
|
|||||||
base = soup.find('base', href=True)
|
base = soup.find('base', href=True)
|
||||||
if base is not None:
|
if base is not None:
|
||||||
newbaseurl = base['href']
|
newbaseurl = base['href']
|
||||||
self.log_debug('Processing images...')
|
self.log.debug('Processing images...')
|
||||||
self.process_images(soup, newbaseurl)
|
self.process_images(soup, newbaseurl)
|
||||||
if self.download_stylesheets:
|
if self.download_stylesheets:
|
||||||
self.process_stylesheets(soup, newbaseurl)
|
self.process_stylesheets(soup, newbaseurl)
|
||||||
@ -416,11 +415,11 @@ class RecursiveFetcher(object):
|
|||||||
self.downloaded_paths.append(res)
|
self.downloaded_paths.append(res)
|
||||||
self.filemap[nurl] = res
|
self.filemap[nurl] = res
|
||||||
if recursion_level < self.max_recursions:
|
if recursion_level < self.max_recursions:
|
||||||
self.log_debug('Processing links...')
|
self.log.debug('Processing links...')
|
||||||
self.process_links(soup, newbaseurl, recursion_level+1)
|
self.process_links(soup, newbaseurl, recursion_level+1)
|
||||||
else:
|
else:
|
||||||
self.process_return_links(soup, newbaseurl)
|
self.process_return_links(soup, newbaseurl)
|
||||||
self.log_debug('Recursion limit reached. Skipping links in %s', iurl)
|
self.log.debug('Recursion limit reached. Skipping links in', iurl)
|
||||||
|
|
||||||
if callable(self.postprocess_html_ext):
|
if callable(self.postprocess_html_ext):
|
||||||
soup = self.postprocess_html_ext(soup,
|
soup = self.postprocess_html_ext(soup,
|
||||||
@ -434,8 +433,7 @@ class RecursiveFetcher(object):
|
|||||||
self.localize_link(tag, 'href', res)
|
self.localize_link(tag, 'href', res)
|
||||||
except Exception, err:
|
except Exception, err:
|
||||||
self.failed_links.append((iurl, traceback.format_exc()))
|
self.failed_links.append((iurl, traceback.format_exc()))
|
||||||
self.log_warning('Could not fetch link %s', iurl)
|
self.log.exception('Could not fetch link', iurl)
|
||||||
self.log_debug('Error: %s', str(err), exc_info=True)
|
|
||||||
finally:
|
finally:
|
||||||
self.current_dir = diskpath
|
self.current_dir = diskpath
|
||||||
self.files += 1
|
self.files += 1
|
||||||
@ -478,12 +476,10 @@ def option_parser(usage=_('%prog URL\n\nWhere URL is for example http://google.c
|
|||||||
return parser
|
return parser
|
||||||
|
|
||||||
|
|
||||||
def create_fetcher(options, logger=None, image_map={}):
|
def create_fetcher(options, image_map={}, log=None):
|
||||||
if logger is None:
|
if log is None:
|
||||||
level = logging.DEBUG if options.verbose else logging.INFO
|
log = Log()
|
||||||
logger = logging.getLogger('web2disk')
|
return RecursiveFetcher(options, log, image_map={})
|
||||||
setup_cli_handlers(logger, level)
|
|
||||||
return RecursiveFetcher(options, logger, image_map={})
|
|
||||||
|
|
||||||
def main(args=sys.argv):
|
def main(args=sys.argv):
|
||||||
parser = option_parser()
|
parser = option_parser()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user