diff --git a/recipes/espn.recipe b/recipes/espn.recipe
index 34c772f767..03c95d0001 100644
--- a/recipes/espn.recipe
+++ b/recipes/espn.recipe
@@ -20,7 +20,7 @@ class ESPN(BasicNewsRecipe):
use_embedded_content = False
remove_javascript = True
- needs_subscription = True
+ needs_subscription = 'optional'
encoding= 'ISO-8859-1'
remove_tags_before = dict(name='font', attrs={'class':'date'})
@@ -75,32 +75,30 @@ class ESPN(BasicNewsRecipe):
return soup
-
-
def get_browser(self):
br = BasicNewsRecipe.get_browser()
- br.set_handle_refresh(False)
- url = ('https://r.espn.go.com/members/v3_1/login')
- raw = br.open(url).read()
- raw = re.sub(r'(?s)
', '', raw)
- with TemporaryFile(suffix='.htm') as fname:
- with open(fname, 'wb') as f:
- f.write(raw)
- br.open_local_file(fname)
+ if self.username and self.password:
+ br.set_handle_refresh(False)
+ url = ('https://r.espn.go.com/members/v3_1/login')
+ raw = br.open(url).read()
+ raw = re.sub(r'(?s)', '', raw)
+ with TemporaryFile(suffix='.htm') as fname:
+ with open(fname, 'wb') as f:
+ f.write(raw)
+ br.open_local_file(fname)
- br.form = br.forms().next()
- br.form.find_control(name='username', type='text').value = self.username
- br.form['password'] = self.password
- br.submit().read()
- br.open('http://espn.go.com').read()
- br.set_handle_refresh(True)
+ br.form = br.forms().next()
+ br.form.find_control(name='username', type='text').value = self.username
+ br.form['password'] = self.password
+ br.submit().read()
+ br.open('http://espn.go.com').read()
+ br.set_handle_refresh(True)
return br
def get_article_url(self, article):
return article.get('guid', None)
def print_version(self, url):
-
if 'eticket' in url:
return url.partition('&')[0].replace('story?', 'print?')
match = re.search(r'story\?(id=\d+)', url)
diff --git a/recipes/tweakers_net.recipe b/recipes/tweakers_net.recipe
new file mode 100644
index 0000000000..f9bbe27ec9
--- /dev/null
+++ b/recipes/tweakers_net.recipe
@@ -0,0 +1,66 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import with_statement
+
+__license__ = 'GPL v3'
+__docformat__ = 'restructuredtext en'
+
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Tweakers(BasicNewsRecipe):
+ title = u'Tweakers.net - with Reactions'
+ __author__ = 'Roedi06'
+ language = 'nl'
+ oldest_article = 7
+ max_articles_per_feed = 100
+ cover_url = 'http://img51.imageshack.us/img51/7470/tweakersnetebook.gif'
+
+ keep_only_tags = [dict(name='div', attrs={'class':'columnwrapper news'}),
+ {'id':'reacties'},
+ ]
+
+ remove_tags = [dict(name='div', attrs={'id' : ['utracker']}),
+ {'id' : ['channelNav']},
+ {'id' : ['contentArea']},
+ {'class' : ['breadCrumb']},
+ {'class' : ['nextPrevious ellipsis']},
+ {'class' : ['advertorial']},
+ {'class' : ['sidebar']},
+ {'class' : ['filterBox']},
+ {'id' : ['toggleButtonTxt']},
+ {'id' : ['socialButtons']},
+ {'class' : ['button']},
+ {'class' : ['textadTop']},
+ {'class' : ['commentLink']},
+ {'title' : ['Reageer op deze reactie']},
+ {'class' : ['pageIndex']},
+ {'class' : ['reactieHeader collapsed']},
+ ]
+ no_stylesheets=True
+
+ preprocess_regexps = [
+ (re.compile(r'
', re.IGNORECASE | re.DOTALL), lambda match : ''),
+ (re.compile(r'', re.IGNORECASE | re.DOTALL), lambda match : ''),
+ (re.compile(r'
', re.IGNORECASE | re.DOTALL), lambda match : ''),
+ (re.compile(r''), lambda h1: ''),
+ (re.compile(r''), lambda h2: ''),
+ (re.compile(r'', re.IGNORECASE | re.DOTALL), lambda match : ''),
+ (re.compile(r'', re.IGNORECASE | re.DOTALL), lambda match : ''),
+ (re.compile(r''), lambda h1: ''),
+ ]
+
+ extra_css = '.reactieHeader { color: #333333; font-size: 6px; border-bottom:solid 2px #333333; border-top:solid 1px #333333; } \
+ .reactieContent { font-family:"Times New Roman",Georgia,Serif; color: #000000; font-size: 8px; } \
+ .quote { font-family:"Times New Roman",Georgia,Serif; padding-left:2px; border-left:solid 3px #666666; color: #666666; }'
+
+
+ feeds = [(u'Tweakers.net', u'http://feeds.feedburner.com/tweakers/nieuws')]
+
+ def print_version(self, url):
+ return url + '?max=200'
+
diff --git a/src/calibre/db/cache.py b/src/calibre/db/cache.py
index e79d496cd9..10fe0bb014 100644
--- a/src/calibre/db/cache.py
+++ b/src/calibre/db/cache.py
@@ -14,6 +14,7 @@ from functools import wraps, partial
from calibre.db.locking import create_locks, RecordLock
from calibre.db.fields import create_field
from calibre.db.tables import VirtualTable
+from calibre.db.lazy import FormatMetadata, FormatsList
from calibre.ebooks.metadata.book.base import Metadata
from calibre.utils.date import now
@@ -127,14 +128,8 @@ class Cache(object):
if not formats:
good_formats = None
else:
- good_formats = []
- for f in formats:
- try:
- mi.format_metadata[f] = self._format_metadata(book_id, f)
- except:
- pass
- else:
- good_formats.append(f)
+ mi.format_metadata = FormatMetadata(self, id, formats)
+ good_formats = FormatsList(formats, mi.format_metadata)
mi.formats = good_formats
mi.has_cover = _('Yes') if self._field_for('cover', book_id,
default_value=False) else ''
diff --git a/src/calibre/db/lazy.py b/src/calibre/db/lazy.py
new file mode 100644
index 0000000000..be9334c056
--- /dev/null
+++ b/src/calibre/db/lazy.py
@@ -0,0 +1,99 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+ print_function)
+
+__license__ = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal '
+__docformat__ = 'restructuredtext en'
+
+import weakref
+from functools import wraps
+from collections import MutableMapping, MutableSequence
+
+'''
+Avoid doing stats on all files in a book when getting metadata for that book.
+Speeds up calibre startup with large libraries/libraries on a network share,
+with a composite custom column.
+'''
+
+# Lazy format metadata retrieval {{{
+def resolved(f):
+ @wraps(f)
+ def wrapper(self, *args, **kwargs):
+ if getattr(self, '_must_resolve', True):
+ self._resolve()
+ self._must_resolve = False
+ return f(self, *args, **kwargs)
+ return wrapper
+
+class MutableBase(object):
+
+ @resolved
+ def __str__(self):
+ return str(self._values)
+
+ @resolved
+ def __repr__(self):
+ return repr(self._values)
+
+ @resolved
+ def __unicode__(self):
+ return unicode(self._values)
+
+ @resolved
+ def __len__(self):
+ return len(self._values)
+
+ @resolved
+ def __iter__(self):
+ return iter(self._values)
+
+ @resolved
+ def __contains__(self, key):
+ return key in self._values
+
+ @resolved
+ def __getitem__(self, fmt):
+ return self._values[fmt]
+
+ @resolved
+ def __setitem__(self, key, val):
+ self._values[key] = val
+
+ @resolved
+ def __delitem__(self, key):
+ del self._values[key]
+
+
+class FormatMetadata(MutableBase, MutableMapping):
+
+ def __init__(self, db, id_, formats):
+ self._dbwref = weakref.ref(db)
+ self._id = id_
+ self._formats = formats
+
+ def _resolve(self):
+ db = self._dbwref()
+ self._values = {}
+ for f in self._formats:
+ try:
+ self._values[f] = db.format_metadata(self._id, f)
+ except:
+ pass
+
+class FormatsList(MutableBase, MutableSequence):
+
+ def __init__(self, formats, format_metadata):
+ self._formats = formats
+ self._format_metadata = format_metadata
+
+ def _resolve(self):
+ self._values = [f for f in self._formats if f in self._format_metadata]
+
+ @resolved
+ def insert(self, idx, val):
+ self._values.insert(idx, val)
+
+# }}}
+
diff --git a/src/calibre/ebooks/chm/input.py b/src/calibre/ebooks/chm/input.py
index 9aa8272ee9..f36685bd91 100644
--- a/src/calibre/ebooks/chm/input.py
+++ b/src/calibre/ebooks/chm/input.py
@@ -11,6 +11,7 @@ from calibre.customize.conversion import InputFormatPlugin
from calibre.ptempfile import TemporaryDirectory
from calibre.utils.localization import get_lang
from calibre.utils.filenames import ascii_filename
+from calibre.constants import filesystem_encoding
class CHMInput(InputFormatPlugin):
@@ -36,6 +37,8 @@ class CHMInput(InputFormatPlugin):
log.debug('Processing CHM...')
with TemporaryDirectory('_chm2oeb') as tdir:
+ if not isinstance(tdir, unicode):
+ tdir = tdir.decode(filesystem_encoding)
html_input = plugin_for_input_format('html')
for opt in html_input.options:
setattr(options, opt.option.name, opt.recommended_value)
diff --git a/src/calibre/ebooks/chm/metadata.py b/src/calibre/ebooks/chm/metadata.py
index 26b09c7676..ea67947231 100644
--- a/src/calibre/ebooks/chm/metadata.py
+++ b/src/calibre/ebooks/chm/metadata.py
@@ -6,13 +6,14 @@ __license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal '
__docformat__ = 'restructuredtext en'
-import re
+import re, codecs
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.ebooks.chardet import xml_to_unicode
from calibre.ebooks.metadata import string_to_authors, MetaInformation
from calibre.utils.logging import default_log
from calibre.ptempfile import TemporaryFile
+from calibre import force_unicode
def _clean(s):
return s.replace(u'\u00a0', u' ')
@@ -138,6 +139,13 @@ def get_metadata_from_reader(rdr):
resolve_entities=True)[0])
title = rdr.title
+ try:
+ x = rdr.GetEncoding()
+ codecs.lookup(x)
+ enc = x
+ except:
+ enc = 'cp1252'
+ title = force_unicode(title, enc)
authors = _get_authors(home)
mi = MetaInformation(title, authors)
publisher = _get_publisher(home)
diff --git a/src/calibre/ebooks/chm/reader.py b/src/calibre/ebooks/chm/reader.py
index 05ec388a9b..fc7d865265 100644
--- a/src/calibre/ebooks/chm/reader.py
+++ b/src/calibre/ebooks/chm/reader.py
@@ -4,7 +4,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal ,' \
' and Alex Bramley .'
-import os, re
+import os, re, codecs
from calibre import guess_type as guess_mimetype
from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString
@@ -99,8 +99,17 @@ class CHMReader(CHMFile):
def ExtractFiles(self, output_dir=os.getcwdu(), debug_dump=False):
html_files = set([])
+ try:
+ x = self.GetEncoding()
+ codecs.lookup(x)
+ enc = x
+ except:
+ enc = 'cp1252'
for path in self.Contents():
- lpath = os.path.join(output_dir, path)
+ fpath = path
+ if not isinstance(path, unicode):
+ fpath = path.decode(enc)
+ lpath = os.path.join(output_dir, fpath)
self._ensure_dir(lpath)
try:
data = self.GetFile(path)
@@ -123,6 +132,7 @@ class CHMReader(CHMFile):
self.log.warn('%r filename too long, skipping'%path)
continue
raise
+
if debug_dump:
import shutil
shutil.copytree(output_dir, os.path.join(debug_dump, 'debug_dump'))
diff --git a/src/calibre/ebooks/oeb/parse_utils.py b/src/calibre/ebooks/oeb/parse_utils.py
index 712427d457..f0b48afb39 100644
--- a/src/calibre/ebooks/oeb/parse_utils.py
+++ b/src/calibre/ebooks/oeb/parse_utils.py
@@ -103,7 +103,7 @@ def html5_parse(data, max_nesting_depth=100):
xmlns_declaration = '{%s}'%XMLNS_NS
non_html5_namespaces = {}
seen_namespaces = set()
- for elem in tuple(data.iter()):
+ for elem in tuple(data.iter(tag=etree.Element)):
elem.attrib.pop('xmlns', None)
namespaces = {}
for x in tuple(elem.attrib):
diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py
index 5fe010f829..00ca0e39a2 100644
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@@ -7,8 +7,8 @@ __docformat__ = 'restructuredtext en'
The database used to store ebook metadata
'''
import os, sys, shutil, cStringIO, glob, time, functools, traceback, re, \
- json, uuid, hashlib, copy, weakref
-from collections import defaultdict, MutableMapping, MutableSequence
+ json, uuid, hashlib, copy
+from collections import defaultdict
import threading, random
from itertools import repeat
from math import ceil
@@ -40,6 +40,7 @@ from calibre.utils.magick.draw import save_cover_data_to
from calibre.utils.recycle_bin import delete_file, delete_tree
from calibre.utils.formatter_functions import load_user_template_functions
from calibre.db.errors import NoSuchFormat
+from calibre.db.lazy import FormatMetadata, FormatsList
from calibre.utils.localization import (canonicalize_lang,
calibre_langcode_to_name)
@@ -81,90 +82,6 @@ class Tag(object):
def __repr__(self):
return str(self)
-class FormatMetadata(MutableMapping): # {{{
-
- def __init__(self, db, id_, formats):
- self.dbwref = weakref.ref(db)
- self.id_ = id_
- self.formats = formats
- self._must_do = True
- self.values = {}
-
- def _resolve(self):
- if self._must_do:
- for f in self.formats:
- try:
- self.values[f] = self.dbwref().format_metadata(self.id_, f)
- except:
- pass
- self._must_do = False
-
- def __getitem__(self, fmt):
- self._resolve()
- return self.values[fmt]
-
- def __setitem__(self, key, val):
- self._resolve()
- self.values[key] = val
-
- def __delitem__(self, key):
- self._resolve()
- self.values.__delitem__(key)
-
- def __len__(self):
- self._resolve()
- return len(self.values)
-
- def __iter__(self):
- self._resolve()
- return self.values.__iter__()
-
-class FormatsList(MutableSequence):
-
- def __init__(self, formats, format_metadata):
- self.formats = formats
- self.format_metadata = format_metadata
- self._must_do = True
- self.values = []
-
- def _resolve(self):
- if self._must_do:
- for f in self.formats:
- try:
- if f in self.format_metadata:
- self.values.append(f)
- except:
- pass
- self._must_do = False
-
- def __getitem__(self, dex):
- self._resolve()
- return self.values[dex]
-
- def __setitem__(self, key, dex):
- self._resolve()
- self.values[key] = dex
-
- def __delitem__(self, dex):
- self._resolve()
- self.values.__delitem__(dex)
-
- def __len__(self):
- self._resolve()
- return len(self.values)
-
- def __iter__(self):
- self._resolve()
- return self.values.__iter__()
-
- def insert(self, idx, val):
- self._resolve()
- self.values.insert(idx, val)
-
-# }}}
-
-
-
class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
'''
An ebook metadata database that stores references to ebook files on disk.
@@ -253,6 +170,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
except:
traceback.print_exc()
self.field_metadata = FieldMetadata()
+ self.format_filename_cache = defaultdict(dict)
self._library_id_ = None
# Create the lock to be used to guard access to the metadata writer
# queues. This must be an RLock, not a Lock
@@ -393,6 +311,12 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
if not self.is_second_db:
load_user_template_functions(self.prefs.get('user_template_functions', []))
+ # Load the format filename cache
+ self.format_filename_cache = defaultdict(dict)
+ for book_id, fmt, name in self.conn.get(
+ 'SELECT book,format,name FROM data'):
+ self.format_filename_cache[book_id][fmt.upper() if fmt else ''] = name
+
self.conn.executescript('''
DROP TRIGGER IF EXISTS author_insert_trg;
CREATE TEMP TRIGGER author_insert_trg
@@ -682,7 +606,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
fname = self.construct_file_name(id)
changed = False
for format in formats:
- name = self.conn.get('SELECT name FROM data WHERE book=? AND format=?', (id, format), all=False)
+ name = self.format_filename_cache[id].get(format.upper(), None)
if name and name != fname:
changed = True
break
@@ -1222,12 +1146,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
def format_files(self, index, index_is_id=False):
id = index if index_is_id else self.id(index)
- try:
- formats = self.conn.get('SELECT name,format FROM data WHERE book=?', (id,))
- formats = map(lambda x:(x[0], x[1]), formats)
- return formats
- except:
- return []
+ return [(v, k) for k, v in self.format_filename_cache[id].iteritems()]
def formats(self, index, index_is_id=False, verify_formats=True):
''' Return available formats as a comma separated list or None if there are no available formats '''
@@ -1313,7 +1232,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
'''
id = index if index_is_id else self.id(index)
try:
- name = self.conn.get('SELECT name FROM data WHERE book=? AND format=?', (id, format), all=False)
+ name = self.format_filename_cache[id][format.upper()]
except:
return None
if name:
@@ -1410,11 +1329,11 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
def add_format(self, index, format, stream, index_is_id=False, path=None,
notify=True, replace=True):
id = index if index_is_id else self.id(index)
- if format:
- self.format_metadata_cache[id].pop(format.upper(), None)
+ if not format: format = ''
+ self.format_metadata_cache[id].pop(format.upper(), None)
+ name = self.format_filename_cache[id].get(format.upper(), None)
if path is None:
path = os.path.join(self.library_path, self.path(id, index_is_id=True))
- name = self.conn.get('SELECT name FROM data WHERE book=? AND format=?', (id, format), all=False)
if name and not replace:
return False
name = self.construct_file_name(id)
@@ -1432,6 +1351,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
self.conn.execute('INSERT OR REPLACE INTO data (book,format,uncompressed_size,name) VALUES (?,?,?,?)',
(id, format.upper(), size, name))
self.conn.commit()
+ self.format_filename_cache[id][format.upper()] = name
self.refresh_ids([id])
if notify:
self.notify('metadata', [id])
@@ -1479,9 +1399,9 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
def remove_format(self, index, format, index_is_id=False, notify=True,
commit=True, db_only=False):
id = index if index_is_id else self.id(index)
- if format:
- self.format_metadata_cache[id].pop(format.upper(), None)
- name = self.conn.get('SELECT name FROM data WHERE book=? AND format=?', (id, format), all=False)
+ if not format: format = ''
+ self.format_metadata_cache[id].pop(format.upper(), None)
+ name = self.format_filename_cache[id].pop(format.upper(), None)
if name:
if not db_only:
try:
diff --git a/src/calibre/manual/images/bookmark.png b/src/calibre/manual/images/bookmark.png
index c6671a2541..4ba83fb49c 100644
Binary files a/src/calibre/manual/images/bookmark.png and b/src/calibre/manual/images/bookmark.png differ
diff --git a/src/calibre/manual/images/pref_button.png b/src/calibre/manual/images/pref_button.png
index f43f2d7627..52d9bae6e0 100644
Binary files a/src/calibre/manual/images/pref_button.png and b/src/calibre/manual/images/pref_button.png differ
diff --git a/src/calibre/manual/images/ref_mode_button.png b/src/calibre/manual/images/ref_mode_button.png
index efed1af26b..3ec70e91ab 100644
Binary files a/src/calibre/manual/images/ref_mode_button.png and b/src/calibre/manual/images/ref_mode_button.png differ