Merge from trunk

2026-03-30 22:02:40 -04:00 · 2012-01-18 11:23:48 +01:00 · 2012-01-18 11:23:48 +01:00 · 5162eb7c6d
commit 5162eb7c6d
parent 420edc45e0 a19fdb42cd
12 changed files with 229 additions and 130 deletions
--- a/recipes/espn.recipe
+++ b/recipes/espn.recipe
@ -20,7 +20,7 @@ class ESPN(BasicNewsRecipe):

    use_embedded_content = False
    remove_javascript     = True
-    needs_subscription = True
+    needs_subscription = 'optional'
    encoding= 'ISO-8859-1'

    remove_tags_before = dict(name='font', attrs={'class':'date'})
@ -75,32 +75,30 @@ class ESPN(BasicNewsRecipe):

        return soup

-
-
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
-        br.set_handle_refresh(False)
-        url = ('https://r.espn.go.com/members/v3_1/login')
-        raw = br.open(url).read()
-        raw = re.sub(r'(?s)<form>.*?id="regsigninbtn".*?</form>', '', raw)
-        with TemporaryFile(suffix='.htm') as fname:
-            with open(fname, 'wb') as f:
-                f.write(raw)
-            br.open_local_file(fname)
+        if self.username and self.password:
+            br.set_handle_refresh(False)
+            url = ('https://r.espn.go.com/members/v3_1/login')
+            raw = br.open(url).read()
+            raw = re.sub(r'(?s)<form>.*?id="regsigninbtn".*?</form>', '', raw)
+            with TemporaryFile(suffix='.htm') as fname:
+                with open(fname, 'wb') as f:
+                    f.write(raw)
+                br.open_local_file(fname)

-        br.form = br.forms().next()
-        br.form.find_control(name='username', type='text').value = self.username
-        br.form['password'] = self.password
-        br.submit().read()
-        br.open('http://espn.go.com').read()
-        br.set_handle_refresh(True)
+            br.form = br.forms().next()
+            br.form.find_control(name='username', type='text').value = self.username
+            br.form['password'] = self.password
+            br.submit().read()
+            br.open('http://espn.go.com').read()
+            br.set_handle_refresh(True)
        return br

    def get_article_url(self, article):
        return article.get('guid',  None)

    def print_version(self, url):
-
        if 'eticket' in url:
            return url.partition('&')[0].replace('story?', 'print?')
        match = re.search(r'story\?(id=\d+)', url)
--- a/recipes/tweakers_net.recipe
+++ b/recipes/tweakers_net.recipe
@ -0,0 +1,66 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import with_statement
+
+__license__   = 'GPL v3'
+__docformat__ = 'restructuredtext en'
+
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Tweakers(BasicNewsRecipe):
+     title          = u'Tweakers.net - with Reactions'
+     __author__     = 'Roedi06'
+     language       = 'nl'
+     oldest_article = 7
+     max_articles_per_feed = 100
+     cover_url       = 'http://img51.imageshack.us/img51/7470/tweakersnetebook.gif'
+
+     keep_only_tags = [dict(name='div', attrs={'class':'columnwrapper news'}),
+    {'id':'reacties'},
+      ]
+
+     remove_tags    = [dict(name='div', attrs={'id' : ['utracker']}),
+                        {'id' : ['channelNav']},
+                        {'id' : ['contentArea']},
+                        {'class' : ['breadCrumb']},
+                        {'class' : ['nextPrevious ellipsis']},
+                        {'class' : ['advertorial']},
+                        {'class' : ['sidebar']},
+                        {'class' : ['filterBox']},
+                        {'id' : ['toggleButtonTxt']},
+                        {'id' : ['socialButtons']},
+                        {'class' : ['button']},
+                        {'class' : ['textadTop']},
+                        {'class' : ['commentLink']},
+                        {'title' : ['Reageer op deze reactie']},
+                        {'class' : ['pageIndex']},
+        {'class' : ['reactieHeader collapsed']},
+                      ]
+     no_stylesheets=True
+
+     preprocess_regexps = [
+        (re.compile(r'<hr*?>', re.IGNORECASE | re.DOTALL), lambda match : ''),
+        (re.compile(r'<p>', re.IGNORECASE | re.DOTALL), lambda match : ''),
+        (re.compile(r'</p>', re.IGNORECASE | re.DOTALL), lambda match : ''),
+        (re.compile(r'<a.*?>'), lambda h1: '<b><u>'),
+        (re.compile(r'</a>'), lambda h2: '</u></b>'),
+        (re.compile(r'<span class="new">', re.IGNORECASE | re.DOTALL), lambda match : ''),
+        (re.compile(r'</span>', re.IGNORECASE | re.DOTALL), lambda match : ''),
+        (re.compile(r'<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_0'), lambda match : ' - moderated 0<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_0'),
+        (re.compile(r'<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_1'), lambda match : ' - moderated +1<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_1'),
+        (re.compile(r'<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_2'), lambda match : ' - moderated +2<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_2'),
+        (re.compile(r'<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_3'), lambda match : ' - moderated +3<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_3'),
+        (re.compile(r'<div class="moderation">.*?</div>'), lambda h1: ''),
+     ]
+
+     extra_css = '.reactieHeader { color: #333333; font-size: 6px; border-bottom:solid 2px #333333; border-top:solid 1px #333333; } \
+       .reactieContent { font-family:"Times New Roman",Georgia,Serif; color: #000000; font-size: 8px; } \
+       .quote { font-family:"Times New Roman",Georgia,Serif; padding-left:2px; border-left:solid 3px #666666; color: #666666; }'
+
+
+     feeds          = [(u'Tweakers.net', u'http://feeds.feedburner.com/tweakers/nieuws')]
+
+     def print_version(self, url):
+        return url + '?max=200'
+
--- a/src/calibre/db/cache.py
+++ b/src/calibre/db/cache.py
@ -14,6 +14,7 @@ from functools import wraps, partial
 from calibre.db.locking import create_locks, RecordLock
 from calibre.db.fields import create_field
 from calibre.db.tables import VirtualTable
+from calibre.db.lazy import FormatMetadata, FormatsList
 from calibre.ebooks.metadata.book.base import Metadata
 from calibre.utils.date import now

@ -127,14 +128,8 @@ class Cache(object):
        if not formats:
            good_formats = None
        else:
-            good_formats = []
-            for f in formats:
-                try:
-                    mi.format_metadata[f] = self._format_metadata(book_id, f)
-                except:
-                    pass
-                else:
-                    good_formats.append(f)
+            mi.format_metadata = FormatMetadata(self, id, formats)
+            good_formats = FormatsList(formats, mi.format_metadata)
        mi.formats = good_formats
        mi.has_cover = _('Yes') if self._field_for('cover', book_id,
                default_value=False) else ''
--- a/src/calibre/db/lazy.py
+++ b/src/calibre/db/lazy.py
@ -0,0 +1,99 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import weakref
+from functools import wraps
+from collections import MutableMapping, MutableSequence
+
+'''
+Avoid doing stats on all files in a book when getting metadata for that book.
+Speeds up calibre startup with large libraries/libraries on a network share,
+with a composite custom column.
+'''
+
+# Lazy format metadata retrieval {{{
+def resolved(f):
+    @wraps(f)
+    def wrapper(self, *args, **kwargs):
+        if getattr(self, '_must_resolve', True):
+            self._resolve()
+            self._must_resolve = False
+        return f(self, *args, **kwargs)
+    return wrapper
+
+class MutableBase(object):
+
+    @resolved
+    def __str__(self):
+        return str(self._values)
+
+    @resolved
+    def __repr__(self):
+        return repr(self._values)
+
+    @resolved
+    def __unicode__(self):
+        return unicode(self._values)
+
+    @resolved
+    def __len__(self):
+        return len(self._values)
+
+    @resolved
+    def __iter__(self):
+        return iter(self._values)
+
+    @resolved
+    def __contains__(self, key):
+        return key in self._values
+
+    @resolved
+    def __getitem__(self, fmt):
+        return self._values[fmt]
+
+    @resolved
+    def __setitem__(self, key, val):
+        self._values[key] = val
+
+    @resolved
+    def __delitem__(self, key):
+        del self._values[key]
+
+
+class FormatMetadata(MutableBase, MutableMapping):
+
+    def __init__(self, db, id_, formats):
+        self._dbwref = weakref.ref(db)
+        self._id = id_
+        self._formats = formats
+
+    def _resolve(self):
+        db = self._dbwref()
+        self._values = {}
+        for f in self._formats:
+            try:
+                self._values[f] = db.format_metadata(self._id, f)
+            except:
+                pass
+
+class FormatsList(MutableBase, MutableSequence):
+
+    def __init__(self, formats, format_metadata):
+        self._formats = formats
+        self._format_metadata = format_metadata
+
+    def _resolve(self):
+        self._values = [f for f in self._formats if f in self._format_metadata]
+
+    @resolved
+    def insert(self, idx, val):
+        self._values.insert(idx, val)
+
+# }}}
+
--- a/src/calibre/ebooks/chm/input.py
+++ b/src/calibre/ebooks/chm/input.py
@ -11,6 +11,7 @@ from calibre.customize.conversion import InputFormatPlugin
 from calibre.ptempfile import TemporaryDirectory
 from calibre.utils.localization import get_lang
 from calibre.utils.filenames import ascii_filename
+from calibre.constants import filesystem_encoding

 class CHMInput(InputFormatPlugin):

@ -36,6 +37,8 @@ class CHMInput(InputFormatPlugin):

        log.debug('Processing CHM...')
        with TemporaryDirectory('_chm2oeb') as tdir:
+            if not isinstance(tdir, unicode):
+                tdir = tdir.decode(filesystem_encoding)
            html_input = plugin_for_input_format('html')
            for opt in html_input.options:
                setattr(options, opt.option.name, opt.recommended_value)
--- a/src/calibre/ebooks/chm/metadata.py
+++ b/src/calibre/ebooks/chm/metadata.py
@ -6,13 +6,14 @@ __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import re
+import re, codecs

 from calibre.ebooks.BeautifulSoup import BeautifulSoup
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.ebooks.metadata import string_to_authors, MetaInformation
 from calibre.utils.logging import default_log
 from calibre.ptempfile import TemporaryFile
+from calibre import force_unicode

 def _clean(s):
    return s.replace(u'\u00a0', u' ')
@ -138,6 +139,13 @@ def get_metadata_from_reader(rdr):
        resolve_entities=True)[0])

    title = rdr.title
+    try:
+        x = rdr.GetEncoding()
+        codecs.lookup(x)
+        enc = x
+    except:
+        enc = 'cp1252'
+    title = force_unicode(title, enc)
    authors = _get_authors(home)
    mi = MetaInformation(title, authors)
    publisher = _get_publisher(home)
--- a/src/calibre/ebooks/chm/reader.py
+++ b/src/calibre/ebooks/chm/reader.py
@ -4,7 +4,7 @@ __license__ = 'GPL v3'
 __copyright__  = '2008, Kovid Goyal <kovid at kovidgoyal.net>,' \
                 ' and Alex Bramley <a.bramley at gmail.com>.'

-import os, re
+import os, re, codecs

 from calibre import guess_type as guess_mimetype
 from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString
@ -99,8 +99,17 @@ class CHMReader(CHMFile):

    def ExtractFiles(self, output_dir=os.getcwdu(), debug_dump=False):
        html_files = set([])
+        try:
+            x = self.GetEncoding()
+            codecs.lookup(x)
+            enc = x
+        except:
+            enc = 'cp1252'
        for path in self.Contents():
-            lpath = os.path.join(output_dir, path)
+            fpath = path
+            if not isinstance(path, unicode):
+                fpath = path.decode(enc)
+            lpath = os.path.join(output_dir, fpath)
            self._ensure_dir(lpath)
            try:
                data = self.GetFile(path)
@ -123,6 +132,7 @@ class CHMReader(CHMFile):
                    self.log.warn('%r filename too long, skipping'%path)
                    continue
                raise
+
        if debug_dump:
            import shutil
            shutil.copytree(output_dir, os.path.join(debug_dump, 'debug_dump'))
--- a/src/calibre/ebooks/oeb/parse_utils.py
+++ b/src/calibre/ebooks/oeb/parse_utils.py
@ -103,7 +103,7 @@ def html5_parse(data, max_nesting_depth=100):
    xmlns_declaration = '{%s}'%XMLNS_NS
    non_html5_namespaces = {}
    seen_namespaces = set()
-    for elem in tuple(data.iter()):
+    for elem in tuple(data.iter(tag=etree.Element)):
        elem.attrib.pop('xmlns', None)
        namespaces = {}
        for x in tuple(elem.attrib):
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@ -7,8 +7,8 @@ __docformat__ = 'restructuredtext en'
 The database used to store ebook metadata
 '''
 import os, sys, shutil, cStringIO, glob, time, functools, traceback, re, \
-        json, uuid, hashlib, copy, weakref
-from collections import defaultdict, MutableMapping, MutableSequence
+        json, uuid, hashlib, copy
+from collections import defaultdict
 import threading, random
 from itertools import repeat
 from math import ceil
@ -40,6 +40,7 @@ from calibre.utils.magick.draw import save_cover_data_to
 from calibre.utils.recycle_bin import delete_file, delete_tree
 from calibre.utils.formatter_functions import load_user_template_functions
 from calibre.db.errors import NoSuchFormat
+from calibre.db.lazy import FormatMetadata, FormatsList
 from calibre.utils.localization import (canonicalize_lang,
        calibre_langcode_to_name)

@ -81,90 +82,6 @@ class Tag(object):
    def __repr__(self):
        return str(self)

-class FormatMetadata(MutableMapping): # {{{
-
-    def __init__(self, db, id_, formats):
-        self.dbwref = weakref.ref(db)
-        self.id_ = id_
-        self.formats = formats
-        self._must_do = True
-        self.values = {}
-
-    def _resolve(self):
-        if self._must_do:
-            for f in self.formats:
-                try:
-                    self.values[f] = self.dbwref().format_metadata(self.id_, f)
-                except:
-                    pass
-            self._must_do = False
-
-    def __getitem__(self, fmt):
-        self._resolve()
-        return self.values[fmt]
-
-    def __setitem__(self, key, val):
-        self._resolve()
-        self.values[key] = val
-
-    def __delitem__(self, key):
-        self._resolve()
-        self.values.__delitem__(key)
-
-    def __len__(self):
-        self._resolve()
-        return len(self.values)
-
-    def __iter__(self):
-        self._resolve()
-        return self.values.__iter__()
-
-class FormatsList(MutableSequence):
-
-    def __init__(self, formats, format_metadata):
-        self.formats = formats
-        self.format_metadata = format_metadata
-        self._must_do = True
-        self.values = []
-
-    def _resolve(self):
-        if self._must_do:
-            for f in self.formats:
-                try:
-                    if f in self.format_metadata:
-                        self.values.append(f)
-                except:
-                    pass
-            self._must_do = False
-
-    def __getitem__(self, dex):
-        self._resolve()
-        return self.values[dex]
-
-    def __setitem__(self, key, dex):
-        self._resolve()
-        self.values[key] = dex
-
-    def __delitem__(self, dex):
-        self._resolve()
-        self.values.__delitem__(dex)
-
-    def __len__(self):
-        self._resolve()
-        return len(self.values)
-
-    def __iter__(self):
-        self._resolve()
-        return self.values.__iter__()
-
-    def insert(self, idx, val):
-        self._resolve()
-        self.values.insert(idx, val)
-
-# }}}
-
-
-
 class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
    '''
    An ebook metadata database that stores references to ebook files on disk.
@ -253,6 +170,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
        except:
            traceback.print_exc()
        self.field_metadata = FieldMetadata()
+        self.format_filename_cache = defaultdict(dict)
        self._library_id_ = None
        # Create the lock to be used to guard access to the metadata writer
        # queues. This must be an RLock, not a Lock
@ -393,6 +311,12 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
        if not self.is_second_db:
            load_user_template_functions(self.prefs.get('user_template_functions', []))

+        # Load the format filename cache
+        self.format_filename_cache = defaultdict(dict)
+        for book_id, fmt, name in self.conn.get(
+                'SELECT book,format,name FROM data'):
+            self.format_filename_cache[book_id][fmt.upper() if fmt else ''] = name
+
        self.conn.executescript('''
        DROP TRIGGER IF EXISTS author_insert_trg;
        CREATE TEMP TRIGGER author_insert_trg
@ -682,7 +606,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
        fname = self.construct_file_name(id)
        changed = False
        for format in formats:
-            name = self.conn.get('SELECT name FROM data WHERE book=? AND format=?', (id, format), all=False)
+            name = self.format_filename_cache[id].get(format.upper(), None)
            if name and name != fname:
                changed = True
                break
@ -1222,12 +1146,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):

    def format_files(self, index, index_is_id=False):
        id = index if index_is_id else self.id(index)
-        try:
-            formats = self.conn.get('SELECT name,format FROM data WHERE book=?', (id,))
-            formats = map(lambda x:(x[0], x[1]), formats)
-            return formats
-        except:
-            return []
+        return [(v, k) for k, v in self.format_filename_cache[id].iteritems()]

    def formats(self, index, index_is_id=False, verify_formats=True):
        ''' Return available formats as a comma separated list or None if there are no available formats '''
@ -1313,7 +1232,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
        '''
        id = index if index_is_id else self.id(index)
        try:
-            name = self.conn.get('SELECT name FROM data WHERE book=? AND format=?', (id, format), all=False)
+            name = self.format_filename_cache[id][format.upper()]
        except:
            return None
        if name:
@ -1410,11 +1329,11 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
    def add_format(self, index, format, stream, index_is_id=False, path=None,
            notify=True, replace=True):
        id = index if index_is_id else self.id(index)
-        if format:
-            self.format_metadata_cache[id].pop(format.upper(), None)
+        if not format: format = ''
+        self.format_metadata_cache[id].pop(format.upper(), None)
+        name = self.format_filename_cache[id].get(format.upper(), None)
        if path is None:
            path = os.path.join(self.library_path, self.path(id, index_is_id=True))
-        name = self.conn.get('SELECT name FROM data WHERE book=? AND format=?', (id, format), all=False)
        if name and not replace:
            return False
        name = self.construct_file_name(id)
@ -1432,6 +1351,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
        self.conn.execute('INSERT OR REPLACE INTO data (book,format,uncompressed_size,name) VALUES (?,?,?,?)',
                          (id, format.upper(), size, name))
        self.conn.commit()
+        self.format_filename_cache[id][format.upper()] = name
        self.refresh_ids([id])
        if notify:
            self.notify('metadata', [id])
@ -1479,9 +1399,9 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
    def remove_format(self, index, format, index_is_id=False, notify=True,
                      commit=True, db_only=False):
        id = index if index_is_id else self.id(index)
-        if format:
-            self.format_metadata_cache[id].pop(format.upper(), None)
-        name = self.conn.get('SELECT name FROM data WHERE book=? AND format=?', (id, format), all=False)
+        if not format: format = ''
+        self.format_metadata_cache[id].pop(format.upper(), None)
+        name = self.format_filename_cache[id].pop(format.upper(), None)
        if name:
            if not db_only:
                try:
--- a/src/calibre/manual/images/bookmark.png
+++ b/src/calibre/manual/images/bookmark.png
--- a/src/calibre/manual/images/pref_button.png
+++ b/src/calibre/manual/images/pref_button.png
--- a/src/calibre/manual/images/ref_mode_button.png
+++ b/src/calibre/manual/images/ref_mode_button.png