From d3814a3a5a4ecd5e26270f143ff40bd67338dcc1 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 30 Oct 2010 18:50:26 -0600
Subject: [PATCH 01/19] /browse: Make the top level page more semantic and
 pentadactyl firendly

---
 resources/content_server/browse/browse.css | 2 ++
 resources/content_server/browse/browse.js  | 2 +-
 setup/server.py                            | 8 +++++++-
 src/calibre/library/server/browse.py       | 5 +++--
 4 files changed, 13 insertions(+), 4 deletions(-)
diff --git a/resources/content_server/browse/browse.css b/resources/content_server/browse/browse.css
index 92ed4c3ce6..9a2125c0c0 100644
--- a/resources/content_server/browse/browse.css
+++ b/resources/content_server/browse/browse.css
@@ -208,6 +208,8 @@ h2.library_name {
 
 }
 
+.toplevel li a { text-decoration: none; }
+
 .toplevel li img {
     vertical-align: middle;
     margin-right: 1em;
diff --git a/resources/content_server/browse/browse.js b/resources/content_server/browse/browse.js
index 89ce679871..db4e602449 100644
--- a/resources/content_server/browse/browse.js
+++ b/resources/content_server/browse/browse.js
@@ -116,7 +116,7 @@ function toplevel() {
     $(".sort_select").hide();
 
     $(".toplevel li").click(function() {
-        var href = $(this).children("span.url").text();
+        var href = $(this).children("a").attr('href');
         window.location = href;
     });
 
diff --git a/setup/server.py b/setup/server.py
index 2103f4805a..d9c444fa55 100644
--- a/setup/server.py
+++ b/setup/server.py
@@ -24,6 +24,10 @@ class Server(Command):
         self.rebuild_monocole()
         p = subprocess.Popen(['calibre-server', '--develop'],
                 stderr=subprocess.STDOUT, stdout=log)
+        time.sleep(0.2)
+        if p.poll() is not None:
+            print 'Starting server failed'
+            raise SystemExit(1)
         return p
 
     def run(self, opts):
@@ -38,9 +42,11 @@ class Server(Command):
             try:
                 raw_input('Press Enter to kill/restart server. Ctrl+C to quit: ')
             except:
+                if p.poll() is None:
+                    p.kill()
                 break
             else:
-                while p.returncode is None:
+                while p.poll() is None:
                     p.terminate()
                     time.sleep(0.1)
                     p.kill()
diff --git a/src/calibre/library/server/browse.py b/src/calibre/library/server/browse.py
index 9c442acc11..935d472cb1 100644
--- a/src/calibre/library/server/browse.py
+++ b/src/calibre/library/server/browse.py
@@ -335,9 +335,10 @@ class BrowseServer(object):
                 icon = 'blank.png'
             cats.append((meta['name'], category, icon))
 
-        cats = [('<li title="{2} {0}"><img src="{3}{src}" alt="{0}" />'
+        cats = [('<li><a title="{2} {0}" href="/browse/category/{1}">&nbsp;</a>'
+                 '<img src="{3}{src}" alt="{0}" />'
                  '<span class="label">{0}</span>'
-                 '<span class="url">{3}/browse/category/{1}</span></li>')
+                 '</li>')
                 .format(xml(x, True), xml(quote(y)), xml(_('Browse books by')),
                     self.opts.url_prefix, src='/browse/icon/'+z)
                 for x, y, z in cats]

From 39e102e3f84a76f54a43f1ab2e476bd1089e5a64 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 30 Oct 2010 19:25:50 -0600
Subject: [PATCH 02/19] /browse: Make category listing also more semantic and
 pentadactyl friendly

---
 resources/content_server/browse/browse.css     | 11 +++++++++--
 resources/content_server/browse/browse.js      |  4 ++--
 resources/content_server/{ => read}/monocle.js |  0
 setup/server.py                                |  2 +-
 src/calibre/library/server/browse.py           | 16 +++++++++-------
 5 files changed, 21 insertions(+), 12 deletions(-)
 rename resources/content_server/{ => read}/monocle.js (100%)

diff --git a/resources/content_server/browse/browse.css b/resources/content_server/browse/browse.css
index 9a2125c0c0..1243795e55 100644
--- a/resources/content_server/browse/browse.css
+++ b/resources/content_server/browse/browse.css
@@ -263,9 +263,16 @@ h2.library_name {
 
 }
 
-.category div.category-item span.href { display: none }
+.category div.category-item a { text-decoration: none; color: inherit; }
 
-#groups span.load_href { display: none }
+#groups a.load_href { 
+    text-decoration: none;
+    color: inherit;
+    font-size: medium;
+    font-weight: normal;
+    padding: 0;
+    padding-left: 0.5em;
+}
 
 #groups h3 {
     font-weight: bold;
diff --git a/resources/content_server/browse/browse.js b/resources/content_server/browse/browse.js
index db4e602449..e0585a9afd 100644
--- a/resources/content_server/browse/browse.js
+++ b/resources/content_server/browse/browse.js
@@ -133,7 +133,7 @@ function render_error(msg) {
 // Category feed {{{
 
 function category_clicked() {
-   var href = $(this).find("span.href").html();
+   var href = $(this).find("a").attr('href');
    window.location = href;
 }
 
@@ -151,7 +151,7 @@ function category() {
 
         change: function(event, ui) {
             if (ui.newContent) {
-                var href = ui.newContent.children("span.load_href").html();
+                var href = ui.newContent.prev().children("a.load_href").attr('href');
                 ui.newContent.children(".loading").show();
                 if (href) {
                     $.ajax({
diff --git a/resources/content_server/monocle.js b/resources/content_server/read/monocle.js
similarity index 100%
rename from resources/content_server/monocle.js
rename to resources/content_server/read/monocle.js
diff --git a/setup/server.py b/setup/server.py
index d9c444fa55..443ffb7da9 100644
--- a/setup/server.py
+++ b/setup/server.py
@@ -18,7 +18,7 @@ class Server(Command):
     def rebuild_monocole(self):
         subprocess.check_call(['sprocketize', '-C', self.MONOCLE_PATH,
             '-I', 'src', 'src/monocle.js'],
-            stdout=open('resources/content_server/monocle.js', 'wb'))
+            stdout=open('resources/content_server/read/monocle.js', 'wb'))
 
     def launch_server(self, log):
         self.rebuild_monocole()
diff --git a/src/calibre/library/server/browse.py b/src/calibre/library/server/browse.py
index 935d472cb1..7131ead77f 100644
--- a/src/calibre/library/server/browse.py
+++ b/src/calibre/library/server/browse.py
@@ -123,9 +123,10 @@ def get_category_items(category, items, restriction, datatype, prefix): # {{{
 
     def item(i):
         templ = (u'<div title="{4}" class="category-item">'
-                '<div class="category-name">{0}</div><div>{1}</div>'
-                '<div>{2}'
-                '<span class="href">{5}{3}</span></div></div>')
+                '<div class="category-name">'
+                '<a href="{5}{3}" title="{4}">{0}</a></div>'
+                '<div>{1}</div>'
+                '<div>{2}</div></div>')
         rating, rstring = render_rating(i.avg_rating, prefix)
         name = xml(i.name)
         if datatype == 'rating':
@@ -142,7 +143,7 @@ def get_category_items(category, items, restriction, datatype, prefix): # {{{
             q = category
         href = '/browse/matches/%s/%s'%(quote(q), quote(id_))
         return templ.format(xml(name), rating,
-                xml(desc), xml(href), rstring, prefix)
+                xml(desc), xml(href, True), rstring, prefix)
 
     items = list(map(item, items))
     return '\n'.join(['<div class="category-container">'] + items + ['</div>'])
@@ -394,14 +395,15 @@ class BrowseServer(object):
             for x in sorted(starts):
                 category_groups[x] = len([y for y in items if
                     getter(y).upper().startswith(x)])
-            items = [(u'<h3 title="{0}">{0} <span>[{2}]</span></h3><div>'
+            items = [(u'<h3 title="{0}"><a class="load_href" title="{0}"'
+                      u' href="{4}{3}"><strong>{0}</strong> [{2}]</a></h3><div>'
                       u'<div class="loaded" style="display:none"></div>'
                       u'<div class="loading"><img alt="{1}" src="{4}/static/loading.gif" /><em>{1}</em></div>'
-                      u'<span class="load_href">{4}{3}</span></div>').format(
+                      u'</div>').format(
                         xml(s, True),
                         xml(_('Loading, please wait'))+'&hellip;',
                         unicode(c),
-                        xml(u'/browse/category_group/%s/%s'%(category, s)),
+                        xml(u'/browse/category_group/%s/%s'%(category, s), True),
                         self.opts.url_prefix)
                     for s, c in category_groups.items()]
             items = '\n\n'.join(items)

From 20e015ed748761ffc331e92aa9e998eff5d8622f Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 30 Oct 2010 22:40:13 -0600
Subject: [PATCH 03/19] Content server: Handle books with # in their
 title/authors correctly. Fixes #7354 (Having problems with calibre server)

---
 setup/server.py                       | 69 ++++++++++++++++++++++-----
 src/calibre/library/server/browse.py  |  3 +-
 src/calibre/library/server/content.py |  4 +-
 src/calibre/library/server/mobile.py  |  5 +-
 4 files changed, 63 insertions(+), 18 deletions(-)

diff --git a/setup/server.py b/setup/server.py
index 443ffb7da9..c48294ac70 100644
--- a/setup/server.py
+++ b/setup/server.py
@@ -5,10 +5,37 @@ __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 
-import subprocess, tempfile, os, time
+import subprocess, tempfile, os, time, sys
 
 from setup import Command
 
+try:
+    from pyinotify import WatchManager, ThreadedNotifier, EventsCodes, ProcessEvent
+except:
+    wm = None
+else:
+    wm = WatchManager()
+    flags = EventsCodes.ALL_FLAGS
+    mask = flags['IN_MODIFY']
+
+    class ProcessEvents(ProcessEvent):
+
+        def __init__(self, command):
+            ProcessEvent.__init__(self)
+            self.command = command
+
+        def process_default(self, event):
+            name = getattr(event,
+                    'name', None)
+            if name and os.path.splitext(name)[1].startswith('.py'):
+                print
+                print name, 'changed'
+                self.command.kill_server()
+                self.command.launch_server()
+                print self.command.prompt,
+                sys.stdout.flush()
+
+
 class Server(Command):
 
     description = 'Run the calibre server in development mode conveniently'
@@ -20,35 +47,51 @@ class Server(Command):
             '-I', 'src', 'src/monocle.js'],
             stdout=open('resources/content_server/read/monocle.js', 'wb'))
 
-    def launch_server(self, log):
+    def launch_server(self):
+        print 'Starting server...\n'
         self.rebuild_monocole()
         p = subprocess.Popen(['calibre-server', '--develop'],
-                stderr=subprocess.STDOUT, stdout=log)
+                stderr=subprocess.STDOUT, stdout=self.server_log)
         time.sleep(0.2)
         if p.poll() is not None:
             print 'Starting server failed'
             raise SystemExit(1)
         return p
 
+    def kill_server(self):
+        while self.server_proc.poll() is None:
+            self.server_proc.terminate()
+            time.sleep(0.1)
+            self.server_proc.kill()
+
+    def watch(self):
+        if wm is not None:
+            self.notifier = ThreadedNotifier(wm, ProcessEvents(self))
+            self.notifier.start()
+            self.wdd = wm.add_watch(os.path.abspath('src'), mask, rec=True)
+
     def run(self, opts):
         tdir = tempfile.gettempdir()
         logf = os.path.join(tdir, 'calibre-server.log')
-        log = open(logf, 'ab')
+        self.server_log = open(logf, 'ab')
+        self.prompt = 'Press Enter to kill/restart server. Ctrl+C to quit: '
         print 'Server log available at:', logf
+        print
+        self.server_proc = None
+        self.watch()
 
         while True:
-            print 'Starting server...'
-            p = self.launch_server(log)
+            self.server_proc = self.launch_server()
             try:
-                raw_input('Press Enter to kill/restart server. Ctrl+C to quit: ')
+                raw_input(self.prompt)
             except:
-                if p.poll() is None:
-                    p.kill()
+                if self.server_proc.poll() is None:
+                    self.server_proc.kill()
                 break
             else:
-                while p.poll() is None:
-                    p.terminate()
-                    time.sleep(0.1)
-                    p.kill()
+                self.kill_server()
         print
 
+        if hasattr(self, 'notifier'):
+            self.notifier.stop()
+
diff --git a/src/calibre/library/server/browse.py b/src/calibre/library/server/browse.py
index 7131ead77f..709d872ba2 100644
--- a/src/calibre/library/server/browse.py
+++ b/src/calibre/library/server/browse.py
@@ -566,7 +566,8 @@ class BrowseServer(object):
             if not val:
                 val = ''
             args[key] = xml(val, True)
-        fname = ascii_filename(args['title']) + ' - ' + ascii_filename(args['authors'])
+        fname = quote(ascii_filename(args['title']) + ' - ' +
+                ascii_filename(args['authors']))
         return args, fmt, fmts, fname
 
     @Endpoint(mimetype='application/json; charset=utf-8')
diff --git a/src/calibre/library/server/content.py b/src/calibre/library/server/content.py
index 670c31b9df..6437f02cb6 100644
--- a/src/calibre/library/server/content.py
+++ b/src/calibre/library/server/content.py
@@ -70,10 +70,10 @@ class ContentServer(object):
             id = id.rpartition('_')[-1].partition('.')[0]
             match = re.search(r'\d+', id)
             if not match:
-                raise cherrypy.HTTPError(400, 'id:%s not an integer'%id)
+                raise cherrypy.HTTPError(404, 'id:%s not an integer'%id)
             id = int(match.group())
         if not self.db.has_id(id):
-            raise cherrypy.HTTPError(400, 'id:%d does not exist in database'%id)
+            raise cherrypy.HTTPError(404, 'id:%d does not exist in database'%id)
         if what == 'thumb' or what.startswith('thumb_'):
             try:
                 width, height = map(int, what.split('_')[1:])
diff --git a/src/calibre/library/server/mobile.py b/src/calibre/library/server/mobile.py
index a889089109..d66e6d842f 100644
--- a/src/calibre/library/server/mobile.py
+++ b/src/calibre/library/server/mobile.py
@@ -7,6 +7,7 @@ __docformat__ = 'restructuredtext en'
 
 import re, os
 import __builtin__
+from urllib import quote
 
 import cherrypy
 from lxml import html
@@ -115,8 +116,8 @@ def build_index(books, num, search, sort, order, start, total, url_base, CKEYS,
 
         data = TD()
         for fmt in book['formats'].split(','):
-            a = ascii_filename(book['authors'])
-            t = ascii_filename(book['title'])
+            a = quote(ascii_filename(book['authors']))
+            t = quote(ascii_filename(book['title']))
             s = SPAN(
                 A(
                     fmt.lower(),

From b597410d84a084da3f7ca7566ec50c62ea6cb812 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 30 Oct 2010 23:25:30 -0600
Subject: [PATCH 04/19] calibre-server: Make auto reload control separate from
 --devlop with a new command line option --auto-reload

---
 setup/server.py                    | 35 +++++++++++++++++-------------
 src/calibre/library/server/base.py | 21 +++++++++---------
 src/calibre/library/server/main.py |  3 +++
 3 files changed, 34 insertions(+), 25 deletions(-)

diff --git a/setup/server.py b/setup/server.py
index c48294ac70..98a9e8fa90 100644
--- a/setup/server.py
+++ b/setup/server.py
@@ -6,6 +6,7 @@ __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 
 import subprocess, tempfile, os, time, sys
+from threading import RLock
 
 from setup import Command
 
@@ -27,10 +28,11 @@ else:
         def process_default(self, event):
             name = getattr(event,
                     'name', None)
-            if name and os.path.splitext(name)[1].startswith('.py'):
+            if name and os.path.splitext(name)[1] == '.py':
                 print
                 print name, 'changed'
                 self.command.kill_server()
+                time.sleep(0.1)
                 self.command.launch_server()
                 print self.command.prompt,
                 sys.stdout.flush()
@@ -49,20 +51,23 @@ class Server(Command):
 
     def launch_server(self):
         print 'Starting server...\n'
-        self.rebuild_monocole()
-        p = subprocess.Popen(['calibre-server', '--develop'],
-                stderr=subprocess.STDOUT, stdout=self.server_log)
-        time.sleep(0.2)
-        if p.poll() is not None:
-            print 'Starting server failed'
-            raise SystemExit(1)
-        return p
+        with self.lock:
+            self.rebuild_monocole()
+            p = subprocess.Popen(['calibre-server', '--develop'],
+                    stderr=subprocess.STDOUT, stdout=self.server_log)
+            time.sleep(0.2)
+            if p.poll() is not None:
+                print 'Starting server failed'
+                raise SystemExit(1)
+            return p
 
     def kill_server(self):
-        while self.server_proc.poll() is None:
-            self.server_proc.terminate()
-            time.sleep(0.1)
-            self.server_proc.kill()
+        print 'Killing server...\n'
+        with self.lock:
+            if self.server_proc.poll() is None:
+                self.server_proc.terminate()
+            while self.server_proc.poll() is None:
+                time.sleep(0.1)
 
     def watch(self):
         if wm is not None:
@@ -71,6 +76,7 @@ class Server(Command):
             self.wdd = wm.add_watch(os.path.abspath('src'), mask, rec=True)
 
     def run(self, opts):
+        self.lock = RLock()
         tdir = tempfile.gettempdir()
         logf = os.path.join(tdir, 'calibre-server.log')
         self.server_log = open(logf, 'ab')
@@ -85,8 +91,7 @@ class Server(Command):
             try:
                 raw_input(self.prompt)
             except:
-                if self.server_proc.poll() is None:
-                    self.server_proc.kill()
+                self.kill_server()
                 break
             else:
                 self.kill_server()
diff --git a/src/calibre/library/server/base.py b/src/calibre/library/server/base.py
index c9025a28f8..29636c5659 100644
--- a/src/calibre/library/server/base.py
+++ b/src/calibre/library/server/base.py
@@ -118,16 +118,17 @@ class LibraryServer(ContentServer, MobileServer, XMLServer, OPDSServer, Cache,
         self.set_database(db)
 
         cherrypy.config.update({
-                                'log.screen'             : opts.develop,
-                                'engine.autoreload_on'   : opts.develop,
-                                'tools.log_headers.on'   : opts.develop,
-                                'checker.on'             : opts.develop,
-                                'request.show_tracebacks': show_tracebacks,
-                                'server.socket_host'     : listen_on,
-                                'server.socket_port'     : opts.port,
-                                'server.socket_timeout'  : opts.timeout, #seconds
-                                'server.thread_pool'     : opts.thread_pool, # number of threads
-                               })
+            'log.screen'             : opts.develop,
+            'engine.autoreload_on'   : getattr(opts,
+                                        'auto_reload', False),
+            'tools.log_headers.on'   : opts.develop,
+            'checker.on'             : opts.develop,
+            'request.show_tracebacks': show_tracebacks,
+            'server.socket_host'     : listen_on,
+            'server.socket_port'     : opts.port,
+            'server.socket_timeout'  : opts.timeout, #seconds
+            'server.thread_pool'     : opts.thread_pool, # number of threads
+        })
         if embedded or wsgi:
             cherrypy.config.update({'engine.SIGHUP'          : None,
                                     'engine.SIGTERM'         : None,})
diff --git a/src/calibre/library/server/main.py b/src/calibre/library/server/main.py
index fbd811a1ab..b7cb3ecf12 100644
--- a/src/calibre/library/server/main.py
+++ b/src/calibre/library/server/main.py
@@ -58,6 +58,9 @@ The OPDS interface is advertised via BonJour automatically.
             help=_('Specifies a restriction to be used for this invocation. '
                    'This option overrides any per-library settings specified'
                    ' in the GUI'))
+    parser.add_option('--auto-reload', default=False, action='store_true',
+            help=_('Auto reload server when source code changes. May not'
+                ' work in all environments.'))
     return parser
 
 

From 32b21d78efc8b7d582702b0376b13b707579c116 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 30 Oct 2010 23:42:35 -0600
Subject: [PATCH 05/19] ...

---
 setup/server.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/setup/server.py b/setup/server.py
index 98a9e8fa90..276a606fc4 100644
--- a/setup/server.py
+++ b/setup/server.py
@@ -32,7 +32,6 @@ else:
                 print
                 print name, 'changed'
                 self.command.kill_server()
-                time.sleep(0.1)
                 self.command.launch_server()
                 print self.command.prompt,
                 sys.stdout.flush()
@@ -53,7 +52,7 @@ class Server(Command):
         print 'Starting server...\n'
         with self.lock:
             self.rebuild_monocole()
-            p = subprocess.Popen(['calibre-server', '--develop'],
+            self.server_proc = p = subprocess.Popen(['calibre-server', '--develop'],
                     stderr=subprocess.STDOUT, stdout=self.server_log)
             time.sleep(0.2)
             if p.poll() is not None:
@@ -63,11 +62,12 @@ class Server(Command):
 
     def kill_server(self):
         print 'Killing server...\n'
-        with self.lock:
-            if self.server_proc.poll() is None:
-                self.server_proc.terminate()
-            while self.server_proc.poll() is None:
-                time.sleep(0.1)
+        if self.server_proc is not None:
+            with self.lock:
+                if self.server_proc.poll() is None:
+                    self.server_proc.terminate()
+                while self.server_proc.poll() is None:
+                    time.sleep(0.1)
 
     def watch(self):
         if wm is not None:
@@ -83,14 +83,14 @@ class Server(Command):
         self.prompt = 'Press Enter to kill/restart server. Ctrl+C to quit: '
         print 'Server log available at:', logf
         print
-        self.server_proc = None
         self.watch()
 
         while True:
-            self.server_proc = self.launch_server()
+            self.launch_server()
             try:
                 raw_input(self.prompt)
             except:
+                print
                 self.kill_server()
                 break
             else:

From b4c3bcf9179c0d3f3b3085be48cd8608aea05ba6 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 31 Oct 2010 09:12:40 -0600
Subject: [PATCH 06/19] Fix #7356 (Error "'unicode' object has no attribute
 'isoformat'" when copying book to iTunes)

---
 src/calibre/devices/apple/driver.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/calibre/devices/apple/driver.py b/src/calibre/devices/apple/driver.py
index 9ad3cf3e08..74fa868255 100644
--- a/src/calibre/devices/apple/driver.py
+++ b/src/calibre/devices/apple/driver.py
@@ -19,7 +19,7 @@ from calibre.ebooks.metadata.book.base import Metadata
 from calibre.ebooks.metadata.epub import set_metadata
 from calibre.library.server.utils import strftime
 from calibre.utils.config import config_dir, prefs
-from calibre.utils.date import isoformat, now, parse_date
+from calibre.utils.date import now, parse_date
 from calibre.utils.logging import Log
 from calibre.utils.zipfile import ZipFile
 
@@ -2521,11 +2521,11 @@ class ITUNES(DriverBase):
                         metadata.timestamp = datetime.datetime(old_ts.year, old_ts.month, old_ts.day, old_ts.hour,
                                                    old_ts.minute, old_ts.second, old_ts.microsecond+1, old_ts.tzinfo)
                     else:
-                        metadata.timestamp = isoformat(now())
+                        metadata.timestamp = now()
                         if DEBUG:
                             self.log.info("   add timestamp: %s" % metadata.timestamp)
                 else:
-                    metadata.timestamp = isoformat(now())
+                    metadata.timestamp = now()
                     if DEBUG:
                         self.log.warning("   missing <metadata> block in OPF file")
                         self.log.info("   add timestamp: %s" % metadata.timestamp)

From 9c85c1b273df3226341215a1b422e4bf1fc583d7 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 31 Oct 2010 09:28:37 -0600
Subject: [PATCH 07/19] Content server: Fix bug that caused errors on systems
 that do not use UTF-8 encoding

---
 src/calibre/library/server/browse.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/calibre/library/server/browse.py b/src/calibre/library/server/browse.py
index 709d872ba2..9530a34c73 100644
--- a/src/calibre/library/server/browse.py
+++ b/src/calibre/library/server/browse.py
@@ -253,8 +253,6 @@ class BrowseServer(object):
         lp = self.db.library_path
         if isbytestring(lp):
             lp = force_unicode(lp, filesystem_encoding)
-        if isinstance(ans, unicode):
-            ans = ans.encode('utf-8')
         ans = ans.replace('{library_name}', xml(os.path.basename(lp)))
         ans = ans.replace('{library_path}', xml(lp, True))
         ans = ans.replace('{initial_search}', initial_search)

From a149cba9ebf201d010a74259d9bfc78b1953b2fb Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 31 Oct 2010 11:16:22 -0600
Subject: [PATCH 08/19] Fix #7357 (Support for Digma Q600)

---
 setup/server.py                   | 5 +++++
 src/calibre/customize/builtins.py | 3 ++-
 src/calibre/devices/misc.py       | 9 +++++++++
 3 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/setup/server.py b/setup/server.py
index 276a606fc4..66cb6adf7b 100644
--- a/setup/server.py
+++ b/setup/server.py
@@ -85,8 +85,13 @@ class Server(Command):
         print
         self.watch()
 
+        first = True
         while True:
             self.launch_server()
+            if first:
+                pass
+            first = False
+
             try:
                 raw_input(self.prompt)
             except:
diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index 2945cc6604..3cc84f248d 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -475,7 +475,7 @@ from calibre.devices.teclast.driver import TECLAST_K3, NEWSMY, IPAPYRUS, \
         SOVOS, PICO
 from calibre.devices.sne.driver import SNE
 from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL, KOGAN, \
-        GEMEI, VELOCITYMICRO, PDNOVEL_KOBO
+        GEMEI, VELOCITYMICRO, PDNOVEL_KOBO, Q600
 from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
 from calibre.devices.kobo.driver import KOBO
 
@@ -586,6 +586,7 @@ plugins += [
     AVANT,
     MENTOR,
     SWEEX,
+    Q600,
     KOGAN,
     PDNOVEL,
     SPECTRA,
diff --git a/src/calibre/devices/misc.py b/src/calibre/devices/misc.py
index 92e26d47e4..af5a77ce03 100644
--- a/src/calibre/devices/misc.py
+++ b/src/calibre/devices/misc.py
@@ -72,6 +72,15 @@ class SWEEX(USBMS):
     EBOOK_DIR_MAIN = ''
     SUPPORTS_SUB_DIRS = True
 
+class Q600(SWEEX):
+
+    name = 'Digma Q600 Device interface'
+    gui_name = 'Q600'
+    description    = _('Communicate with the Digma Q600')
+
+    BCD = [0x325]
+    FORMATS     = ['epub', 'fb2', 'mobi', 'prc', 'html', 'rtf', 'chm', 'pdf', 'txt']
+
 class KOGAN(SWEEX):
 
     name           = 'Kogan Device Interface'

From e268beaa9081a2b9afc13dbe32471e43817ad88d Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 31 Oct 2010 11:48:22 -0600
Subject: [PATCH 09/19] Fix #7362 (7.26 freezing)

---
 src/calibre/ebooks/metadata/amazon.py |  14 +-
 src/calibre/library/comments.py       |  12 +
 src/calibre/utils/html2text.py        | 451 ++++++++++++++++++++++++++
 3 files changed, 471 insertions(+), 6 deletions(-)
 create mode 100644 src/calibre/utils/html2text.py

diff --git a/src/calibre/ebooks/metadata/amazon.py b/src/calibre/ebooks/metadata/amazon.py
index a8ff0f1ad0..e61e0b2748 100644
--- a/src/calibre/ebooks/metadata/amazon.py
+++ b/src/calibre/ebooks/metadata/amazon.py
@@ -14,6 +14,7 @@ from calibre import browser
 from calibre.ebooks.metadata import check_isbn
 from calibre.ebooks.metadata.book.base import Metadata
 from calibre.ebooks.chardet import xml_to_unicode
+from calibre.library.comments import sanitize_comments_html
 
 def find_asin(br, isbn):
     q = 'http://www.amazon.com/s?field-keywords='+isbn
@@ -95,25 +96,26 @@ def get_metadata(br, asin, mi):
         # remove all attributes from tags
         desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc)
         # Collapse whitespace
-        desc = re.sub('\n+', '\n', desc)
-        desc = re.sub(' +', ' ', desc)
+        #desc = re.sub('\n+', '\n', desc)
+        #desc = re.sub(' +', ' ', desc)
         # Remove the notice about text referring to out of print editions
         desc = re.sub(r'(?s)<em>--This text ref.*?</em>', '', desc)
         # Remove comments
         desc = re.sub(r'(?s)<!--.*?-->', '', desc)
-        mi.comments = desc
+        mi.comments = sanitize_comments_html(desc)
 
     return True
 
 
 def main(args=sys.argv):
     # Test xisbn
-    print get_social_metadata('Learning Python', None, None, '8324616489')
-    print
+    #print get_social_metadata('Learning Python', None, None, '8324616489')
+    #print
 
     # Test sophisticated comment formatting
-    print get_social_metadata('Swan Thieves', None, None, '9780316065795')
+    print get_social_metadata('Swan Thieves', None, None, '9781416580829')
     print
+    return
 
     # Random tests
     print get_social_metadata('Star Trek: Destiny: Mere Mortals', None, None, '9781416551720')
diff --git a/src/calibre/library/comments.py b/src/calibre/library/comments.py
index 670d9f2564..45d6ccaa45 100644
--- a/src/calibre/library/comments.py
+++ b/src/calibre/library/comments.py
@@ -11,11 +11,15 @@ from calibre.constants import preferred_encoding
 from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString, \
         CData, Comment, Declaration, ProcessingInstruction
 from calibre import prepare_string_for_xml
+from calibre.utils.html2text import html2text
+from calibre.ebooks.markdown import markdown
 
 # Hackish - ignoring sentences ending or beginning in numbers to avoid
 # confusion with decimal points.
 lost_cr_pat = re.compile('([a-z])([\.\?!])([A-Z])')
 lost_cr_exception_pat = re.compile(r'(Ph\.D)|(D\.Phil)|((Dr|Mr|Mrs|Ms)\.[A-Z])')
+sanitize_pat = re.compile(r'<script|<table|<tr|<td|<th|<style|<iframe',
+        re.IGNORECASE)
 
 def comments_to_html(comments):
     '''
@@ -53,6 +57,9 @@ def comments_to_html(comments):
                 for x in comments.split('\n\n')]
         return '\n'.join(parts)
 
+    if sanitize_pat.search(comments) is not None:
+        return sanitize_comments_html(comments)
+
     # Explode lost CRs to \n\n
     comments = lost_cr_exception_pat.sub(lambda m: m.group().replace('.',
         '.\r'), comments)
@@ -115,6 +122,11 @@ def comments_to_html(comments):
 
     return result.renderContents(encoding=None)
 
+def sanitize_comments_html(html):
+    text = html2text(html)
+    md = markdown.Markdown(safe_mode=True)
+    return md.convert(text)
+
 def test():
     for pat, val in [
             ('lineone\n\nlinetwo',
diff --git a/src/calibre/utils/html2text.py b/src/calibre/utils/html2text.py
new file mode 100644
index 0000000000..afe5a0aded
--- /dev/null
+++ b/src/calibre/utils/html2text.py
@@ -0,0 +1,451 @@
+#!/usr/bin/env python
+"""html2text: Turn HTML into equivalent Markdown-structured text."""
+__version__ = "2.39"
+__author__ = "Aaron Swartz (me@aaronsw.com)"
+__copyright__ = "(C) 2004-2008 Aaron Swartz. GNU GPL 3."
+__contributors__ = ["Martin 'Joey' Schulze", "Ricardo Reyes", "Kevin Jay North"]
+
+# TODO:
+#   Support decoded entities with unifiable.
+
+if not hasattr(__builtins__, 'True'): True, False = 1, 0
+import re, sys, urllib, htmlentitydefs, codecs, StringIO, types
+import sgmllib
+import urlparse
+sgmllib.charref = re.compile('&#([xX]?[0-9a-fA-F]+)[^0-9a-fA-F]')
+
+try: from textwrap import wrap
+except: pass
+
+# Use Unicode characters instead of their ascii psuedo-replacements
+UNICODE_SNOB = 0
+
+# Put the links after each paragraph instead of at the end.
+LINKS_EACH_PARAGRAPH = 0
+
+# Wrap long lines at position. 0 for no wrapping. (Requires Python 2.3.)
+BODY_WIDTH = 78
+
+# Don't show internal links (href="#local-anchor") -- corresponding link targets
+# won't be visible in the plain text file anyway.
+SKIP_INTERNAL_LINKS = False
+
+### Entity Nonsense ###
+
+def name2cp(k):
+    if k == 'apos': return ord("'")
+    if hasattr(htmlentitydefs, "name2codepoint"): # requires Python 2.3
+        return htmlentitydefs.name2codepoint[k]
+    else:
+        k = htmlentitydefs.entitydefs[k]
+        if k.startswith("&#") and k.endswith(";"): return int(k[2:-1]) # not in latin-1
+        return ord(codecs.latin_1_decode(k)[0])
+
+unifiable = {'rsquo':"'", 'lsquo':"'", 'rdquo':'"', 'ldquo':'"',
+'copy':'(C)', 'mdash':'--', 'nbsp':' ', 'rarr':'->', 'larr':'<-', 'middot':'*',
+'ndash':'-', 'oelig':'oe', 'aelig':'ae',
+'agrave':'a', 'aacute':'a', 'acirc':'a', 'atilde':'a', 'auml':'a', 'aring':'a',
+'egrave':'e', 'eacute':'e', 'ecirc':'e', 'euml':'e',
+'igrave':'i', 'iacute':'i', 'icirc':'i', 'iuml':'i',
+'ograve':'o', 'oacute':'o', 'ocirc':'o', 'otilde':'o', 'ouml':'o',
+'ugrave':'u', 'uacute':'u', 'ucirc':'u', 'uuml':'u'}
+
+unifiable_n = {}
+
+for k in unifiable.keys():
+    unifiable_n[name2cp(k)] = unifiable[k]
+
+def charref(name):
+    if name[0] in ['x','X']:
+        c = int(name[1:], 16)
+    else:
+        c = int(name)
+
+    if not UNICODE_SNOB and c in unifiable_n.keys():
+        return unifiable_n[c]
+    else:
+        return unichr(c)
+
+def entityref(c):
+    if not UNICODE_SNOB and c in unifiable.keys():
+        return unifiable[c]
+    else:
+        try: name2cp(c)
+        except KeyError: return "&" + c
+        else: return unichr(name2cp(c))
+
+def replaceEntities(s):
+    s = s.group(1)
+    if s[0] == "#":
+        return charref(s[1:])
+    else: return entityref(s)
+
+r_unescape = re.compile(r"&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));")
+def unescape(s):
+    return r_unescape.sub(replaceEntities, s)
+
+def fixattrs(attrs):
+    # Fix bug in sgmllib.py
+    if not attrs: return attrs
+    newattrs = []
+    for attr in attrs:
+        newattrs.append((attr[0], unescape(attr[1])))
+    return newattrs
+
+### End Entity Nonsense ###
+
+def onlywhite(line):
+    """Return true if the line does only consist of whitespace characters."""
+    for c in line:
+        if c is not ' ' and c is not '  ':
+            return c is ' '
+    return line
+
+def optwrap(text):
+    """Wrap all paragraphs in the provided text."""
+    if not BODY_WIDTH:
+        return text
+
+    assert wrap, "Requires Python 2.3."
+    result = ''
+    newlines = 0
+    for para in text.split("\n"):
+        if len(para) > 0:
+            if para[0] is not ' ' and para[0] is not '-' and para[0] is not '*':
+                for line in wrap(para, BODY_WIDTH):
+                    result += line + "\n"
+                result += "\n"
+                newlines = 2
+            else:
+                if not onlywhite(para):
+                    result += para + "\n"
+                    newlines = 1
+        else:
+            if newlines < 2:
+                result += "\n"
+                newlines += 1
+    return result
+
+def hn(tag):
+    if tag[0] == 'h' and len(tag) == 2:
+        try:
+            n = int(tag[1])
+            if n in range(1, 10): return n
+        except ValueError: return 0
+
+class _html2text(sgmllib.SGMLParser):
+    def __init__(self, out=None, baseurl=''):
+        sgmllib.SGMLParser.__init__(self)
+
+        if out is None: self.out = self.outtextf
+        else: self.out = out
+        self.outtext = u''
+        self.quiet = 0
+        self.p_p = 0
+        self.outcount = 0
+        self.start = 1
+        self.space = 0
+        self.a = []
+        self.astack = []
+        self.acount = 0
+        self.list = []
+        self.blockquote = 0
+        self.pre = 0
+        self.startpre = 0
+        self.lastWasNL = 0
+        self.abbr_title = None # current abbreviation definition
+        self.abbr_data = None # last inner HTML (for abbr being defined)
+        self.abbr_list = {} # stack of abbreviations to write later
+        self.baseurl = baseurl
+
+    def outtextf(self, s):
+        self.outtext += s
+
+    def close(self):
+        sgmllib.SGMLParser.close(self)
+
+        self.pbr()
+        self.o('', 0, 'end')
+
+        return self.outtext
+
+    def handle_charref(self, c):
+        self.o(charref(c))
+
+    def handle_entityref(self, c):
+        self.o(entityref(c))
+
+    def unknown_starttag(self, tag, attrs):
+        self.handle_tag(tag, attrs, 1)
+
+    def unknown_endtag(self, tag):
+        self.handle_tag(tag, None, 0)
+
+    def previousIndex(self, attrs):
+        """ returns the index of certain set of attributes (of a link) in the
+            self.a list
+
+            If the set of attributes is not found, returns None
+        """
+        if not attrs.has_key('href'): return None
+
+        i = -1
+        for a in self.a:
+            i += 1
+            match = 0
+
+            if a.has_key('href') and a['href'] == attrs['href']:
+                if a.has_key('title') or attrs.has_key('title'):
+                        if (a.has_key('title') and attrs.has_key('title') and
+                            a['title'] == attrs['title']):
+                            match = True
+                else:
+                    match = True
+
+            if match: return i
+
+    def handle_tag(self, tag, attrs, start):
+        attrs = fixattrs(attrs)
+
+        if hn(tag):
+            self.p()
+            if start: self.o(hn(tag)*"#" + ' ')
+
+        if tag in ['p', 'div']: self.p()
+
+        if tag == "br" and start: self.o("  \n")
+
+        if tag == "hr" and start:
+            self.p()
+            self.o("* * *")
+            self.p()
+
+        if tag in ["head", "style", 'script']:
+            if start: self.quiet += 1
+            else: self.quiet -= 1
+
+        if tag in ["body"]:
+            self.quiet = 0 # sites like 9rules.com never close <head>
+
+        if tag == "blockquote":
+            if start:
+                self.p(); self.o('> ', 0, 1); self.start = 1
+                self.blockquote += 1
+            else:
+                self.blockquote -= 1
+                self.p()
+
+        if tag in ['em', 'i', 'u']: self.o("_")
+        if tag in ['strong', 'b']: self.o("**")
+        if tag == "code" and not self.pre: self.o('`') #TODO: `` `this` ``
+        if tag == "abbr":
+            if start:
+                attrsD = {}
+                for (x, y) in attrs: attrsD[x] = y
+                attrs = attrsD
+
+                self.abbr_title = None
+                self.abbr_data = ''
+                if attrs.has_key('title'):
+                    self.abbr_title = attrs['title']
+            else:
+                if self.abbr_title != None:
+                    self.abbr_list[self.abbr_data] = self.abbr_title
+                    self.abbr_title = None
+                self.abbr_data = ''
+
+        if tag == "a":
+            if start:
+                attrsD = {}
+                for (x, y) in attrs: attrsD[x] = y
+                attrs = attrsD
+                if attrs.has_key('href') and not (SKIP_INTERNAL_LINKS and attrs['href'].startswith('#')):
+                    self.astack.append(attrs)
+                    self.o("[")
+                else:
+                    self.astack.append(None)
+            else:
+                if self.astack:
+                    a = self.astack.pop()
+                    if a:
+                        i = self.previousIndex(a)
+                        if i is not None:
+                            a = self.a[i]
+                        else:
+                            self.acount += 1
+                            a['count'] = self.acount
+                            a['outcount'] = self.outcount
+                            self.a.append(a)
+                        self.o("][" + `a['count']` + "]")
+
+        if tag == "img" and start:
+            attrsD = {}
+            for (x, y) in attrs: attrsD[x] = y
+            attrs = attrsD
+            if attrs.has_key('src'):
+                attrs['href'] = attrs['src']
+                alt = attrs.get('alt', '')
+                i = self.previousIndex(attrs)
+                if i is not None:
+                    attrs = self.a[i]
+                else:
+                    self.acount += 1
+                    attrs['count'] = self.acount
+                    attrs['outcount'] = self.outcount
+                    self.a.append(attrs)
+                self.o("![")
+                self.o(alt)
+                self.o("]["+`attrs['count']`+"]")
+
+        if tag == 'dl' and start: self.p()
+        if tag == 'dt' and not start: self.pbr()
+        if tag == 'dd' and start: self.o('    ')
+        if tag == 'dd' and not start: self.pbr()
+
+        if tag in ["ol", "ul"]:
+            if start:
+                self.list.append({'name':tag, 'num':0})
+            else:
+                if self.list: self.list.pop()
+
+            self.p()
+
+        if tag == 'li':
+            if start:
+                self.pbr()
+                if self.list: li = self.list[-1]
+                else: li = {'name':'ul', 'num':0}
+                self.o("  "*len(self.list)) #TODO: line up <ol><li>s > 9 correctly.
+                if li['name'] == "ul": self.o("* ")
+                elif li['name'] == "ol":
+                    li['num'] += 1
+                    self.o(`li['num']`+". ")
+                self.start = 1
+            else:
+                self.pbr()
+
+        if tag in ["table", "tr"] and start: self.p()
+        if tag == 'td': self.pbr()
+
+        if tag == "pre":
+            if start:
+                self.startpre = 1
+                self.pre = 1
+            else:
+                self.pre = 0
+            self.p()
+
+    def pbr(self):
+        if self.p_p == 0: self.p_p = 1
+
+    def p(self): self.p_p = 2
+
+    def o(self, data, puredata=0, force=0):
+        if self.abbr_data is not None: self.abbr_data += data
+
+        if not self.quiet:
+            if puredata and not self.pre:
+                data = re.sub('\s+', ' ', data)
+                if data and data[0] == ' ':
+                    self.space = 1
+                    data = data[1:]
+            if not data and not force: return
+
+            if self.startpre:
+                #self.out(" :") #TODO: not output when already one there
+                self.startpre = 0
+
+            bq = (">" * self.blockquote)
+            if not (force and data and data[0] == ">") and self.blockquote: bq += " "
+
+            if self.pre:
+                bq += "    "
+                data = data.replace("\n", "\n"+bq)
+
+            if self.start:
+                self.space = 0
+                self.p_p = 0
+                self.start = 0
+
+            if force == 'end':
+                # It's the end.
+                self.p_p = 0
+                self.out("\n")
+                self.space = 0
+
+
+            if self.p_p:
+                self.out(('\n'+bq)*self.p_p)
+                self.space = 0
+
+            if self.space:
+                if not self.lastWasNL: self.out(' ')
+                self.space = 0
+
+            if self.a and ((self.p_p == 2 and LINKS_EACH_PARAGRAPH) or force == "end"):
+                if force == "end": self.out("\n")
+
+                newa = []
+                for link in self.a:
+                    if self.outcount > link['outcount']:
+                        self.out("   ["+`link['count']`+"]: " + urlparse.urljoin(self.baseurl, link['href']))
+                        if link.has_key('title'): self.out(" ("+link['title']+")")
+                        self.out("\n")
+                    else:
+                        newa.append(link)
+
+                if self.a != newa: self.out("\n") # Don't need an extra line when nothing was done.
+
+                self.a = newa
+
+            if self.abbr_list and force == "end":
+                for abbr, definition in self.abbr_list.items():
+                    self.out("  *[" + abbr + "]: " + definition + "\n")
+
+            self.p_p = 0
+            self.out(data)
+            self.lastWasNL = data and data[-1] == '\n'
+            self.outcount += 1
+
+    def handle_data(self, data):
+        if r'\/script>' in data: self.quiet -= 1
+        self.o(data, 1)
+
+    def unknown_decl(self, data): pass
+
+def wrapwrite(text): sys.stdout.write(text.encode('utf8'))
+
+def html2text_file(html, out=wrapwrite, baseurl=''):
+    h = _html2text(out, baseurl)
+    h.feed(html)
+    h.feed("")
+    return h.close()
+
+def html2text(html, baseurl=''):
+    return optwrap(html2text_file(html, None, baseurl))
+
+if __name__ == "__main__":
+    baseurl = ''
+    if sys.argv[1:]:
+        arg = sys.argv[1]
+        if arg.startswith('http://') or arg.startswith('https://'):
+            baseurl = arg
+            j = urllib.urlopen(baseurl)
+            try:
+                from feedparser import _getCharacterEncoding as enc
+            except ImportError:
+                   enc = lambda x, y: ('utf-8', 1)
+            text = j.read()
+            encoding = enc(j.headers, text)[0]
+            if encoding == 'us-ascii': encoding = 'utf-8'
+            data = text.decode(encoding)
+
+        else:
+            encoding = 'utf8'
+            if len(sys.argv) > 2:
+                encoding = sys.argv[2]
+            data = open(arg, 'r').read().decode(encoding)
+    else:
+        data = sys.stdin.read().decode('utf8')
+    wrapwrite(html2text(data, baseurl))
+

From 92fe7d3725f7785278c4bd2dfd5ad81e290827f5 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 31 Oct 2010 12:01:16 -0600
Subject: [PATCH 10/19] Amazon metadata download plugin: Improved parsing of
 broken HTML

---
 src/calibre/ebooks/metadata/amazon.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/metadata/amazon.py b/src/calibre/ebooks/metadata/amazon.py
index e61e0b2748..5d7d0358f0 100644
--- a/src/calibre/ebooks/metadata/amazon.py
+++ b/src/calibre/ebooks/metadata/amazon.py
@@ -9,6 +9,7 @@ Fetch metadata using Amazon AWS
 import sys, re
 
 from lxml import html
+from lxml.html import soupparser
 
 from calibre import browser
 from calibre.ebooks.metadata import check_isbn
@@ -71,7 +72,7 @@ def get_metadata(br, asin, mi):
         return False
     raw = xml_to_unicode(raw, strip_encoding_pats=True,
             resolve_entities=True)[0]
-    root = html.fromstring(raw)
+    root = soupparser.fromstring(raw)
     ratings = root.xpath('//form[@id="handleBuy"]/descendant::*[@class="asinReviewsSummary"]')
     if ratings:
         pat = re.compile(r'([0-9.]+) out of (\d+) stars')

From 134fad20e0fb3d4defeb791a096499df590d72dd Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 31 Oct 2010 12:45:39 -0600
Subject: [PATCH 11/19] Re-arrange send to device menu to make it harder to
 accidentally trigger the send and delete actions

---
 src/calibre/gui2/device.py | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/src/calibre/gui2/device.py b/src/calibre/gui2/device.py
index 78585d13b6..4e93335af6 100644
--- a/src/calibre/gui2/device.py
+++ b/src/calibre/gui2/device.py
@@ -484,17 +484,22 @@ class DeviceMenu(QMenu): # {{{
                     _('Storage Card B')),
         ]
 
+        later_menus = []
 
         for menu in (self, self.set_default_menu):
             for actions, desc in (
                     (basic_actions, ''),
+                    (specific_actions, _('Send specific format to')),
                     (delete_actions, _('Send and delete from library')),
-                    (specific_actions, _('Send specific format to'))
                     ):
                 mdest = menu
                 if actions is not basic_actions:
-                    mdest = menu.addMenu(desc)
+                    mdest = QMenu(desc)
                     self._memory.append(mdest)
+                    later_menus.append(mdest)
+                    if menu is self.set_default_menu:
+                        menu.addMenu(mdest)
+                        menu.addSeparator()
 
                 for dest, delete, specific, icon, text in actions:
                     action = DeviceAction(dest, delete, specific, icon, text, self)
@@ -507,7 +512,7 @@ class DeviceMenu(QMenu): # {{{
                         action.a_s.connect(self.action_triggered)
                         self.actions.append(action)
                     mdest.addAction(action)
-                if actions is not specific_actions:
+                if actions is basic_actions:
                     menu.addSeparator()
 
         da = config['default_send_to_device_action']
@@ -525,14 +530,21 @@ class DeviceMenu(QMenu): # {{{
         self.group.triggered.connect(self.change_default_action)
         self.addSeparator()
 
+        self.addMenu(later_menus[0])
+        self.addSeparator()
+
         mitem = self.addAction(QIcon(I('eject.png')), _('Eject device'))
         mitem.setEnabled(False)
         mitem.triggered.connect(lambda x : self.disconnect_mounted_device.emit())
         self.disconnect_mounted_device_action = mitem
-
         self.addSeparator()
+
         self.addMenu(self.set_default_menu)
         self.addSeparator()
+
+        self.addMenu(later_menus[1])
+        self.addSeparator()
+
         annot = self.addAction(_('Fetch annotations (experimental)'))
         annot.setEnabled(False)
         annot.triggered.connect(lambda x :

From 965826bef98dd750ac7056456478862bc2ff6a3d Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 31 Oct 2010 12:47:30 -0600
Subject: [PATCH 12/19] Correio da Manha by jmst

---
 resources/recipes/cm_journal.recipe | 44 +++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)
 create mode 100644 resources/recipes/cm_journal.recipe

diff --git a/resources/recipes/cm_journal.recipe b/resources/recipes/cm_journal.recipe
new file mode 100644
index 0000000000..c47fb35775
--- /dev/null
+++ b/resources/recipes/cm_journal.recipe
@@ -0,0 +1,44 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class CMJornal_pt(BasicNewsRecipe):
+    title                 = 'Correio da Manha - Portugal'
+    __author__            = 'jmst'
+    description           = 'As noticias de Portugal e do Mundo'
+    publisher             = 'Cofina Media'
+    category              = ''
+    oldest_article        = 1
+    no_stylesheets        = True
+    encoding              = 'utf-8'
+    use_embedded_content  = False
+    language              = 'pt'
+    extra_css             = ' .publish{font-style: italic; line-height: 1.2em; border-bottom: 1px dotted; padding: 5px 0} .entity{line-height: 1.2em} .overview{line-height:1.2em} '
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    keep_only_tags = [
+                        dict(name=['h2','h1'])
+                      , dict(name='div', attrs={'class': ['news']})
+                     ]
+
+    remove_tags = [
+                    dict(name=['object','embed','iframe'])
+                   ,dict(name='a',attrs={'href':['#']})
+                  ]
+
+    feeds = [
+              (u'Actualidade' , u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000009-0000-0000-0000-000000000009' )
+             ,(u'Portugal'    , u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000010-0000-0000-0000-000000000010'    )
+             ,(u'Economia' , u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000011-0000-0000-0000-000000000011' )
+             ,(u'Mundo' , u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000091-0000-0000-0000-000000000091' )
+             ,(u'Desporto' , u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000012-0000-0000-0000-000000000012'  )
+             ,(u'TV & Media', u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000092-0000-0000-0000-000000000092')
+            ]
+
+    def print_version(self, url):
+        return url.replace('noticia.aspx', 'Imprimir.aspx')
+

From b8240c99b9df56a5000201d5d880c8ff3cc8046d Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 31 Oct 2010 13:00:46 -0600
Subject: [PATCH 13/19] Clic_RBS by arvoredo

---
 resources/recipes/clic_rbs.recipe     | 50 +++++++++++++++++++++++++++
 src/calibre/ebooks/metadata/amazon.py |  2 +-
 2 files changed, 51 insertions(+), 1 deletion(-)
 create mode 100644 resources/recipes/clic_rbs.recipe

diff --git a/resources/recipes/clic_rbs.recipe b/resources/recipes/clic_rbs.recipe
new file mode 100644
index 0000000000..559dfa2000
--- /dev/null
+++ b/resources/recipes/clic_rbs.recipe
@@ -0,0 +1,50 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class ClicRBS(BasicNewsRecipe):
+    title          = u'ClicRBS'
+    language = 'pt'
+    __author__ = 'arvoredo'
+    oldest_article = 3
+    max_articles_per_feed = 9
+    cover_url             = 'http://www.publicidade.clicrbs.com.br/clicrbs/imgs/logo_clic.gif'
+
+    remove_tags = [
+                       dict(name='div', attrs={'class':['clic-barra-inner', 'botao-versao-mobile ']})
+                        ]
+
+    remove_tags_before = dict(name='div ', attrs={'class':'descricao'})
+    remove_tags_before = dict(name='div', attrs={'id':'glb-corpo'})
+    remove_tags_before = dict(name='div', attrs={'class':'descricao'})
+    remove_tags_before = dict(name='div', attrs={'class':'coluna'})
+    remove_tags_after = dict(name='div', attrs={'class':'extra'})
+    remove_tags_after = dict(name='div', attrs={'id':'links-patrocinados'})
+    remove_tags_after = dict(name='h4', attrs={'class':'tipo-c comente'})
+    remove_tags_after = dict(name='ul', attrs={'class':'lista'})
+
+    feeds = [
+               (u'zerohora.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?uf=1&local=1&channel=13')
+             , (u'diariocatarinense.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?uf=2&local=18&channel=67')
+             , (u'Concursos e Emprego', u'http://g1.globo.com/Rss2/0,,AS0-9654,00.xml')
+             , (u'Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?channel=87&uf=1&local=1')
+             , (u'Economia, zerohora.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=801&uf=1&local=1&channel=13')
+             , (u'Esportes, zerohora.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=802&uf=1&local=1&channel=13')
+             , (u'Economia, Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1180&channel=87&uf=1&local=1')
+             , (u'Política, Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1185&channel=87&uf=1&local=1')
+             , (u'Mundo, Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1184&channel=87&uf=1&local=1')
+             , (u'Catarinense, Esportes, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=&theme=371&uf=2&channel=2')
+             , (u'Geral, Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1183&channel=87&uf=1&local=1')
+             , (u'Estilo de Vida, zerohora.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=805&uf=1&local=1&channel=13')
+             , (u'Corrida, Corrida, Esportes, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1313&theme=15704&uf=1&channel=2')
+             , (u'Jornal de Santa Catarina, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?espid=159&uf=2&local=18')
+             , (u'Grêmio, Futebol, Esportes, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=11&theme=65&uf=1&channel=2')
+             , (u'Velocidade, Esportes, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1314&theme=2655&uf=1&channel=2')
+            ]
+
+    extra_css = '''
+                    cite{color:#007BB5; font-size:xx-small; font-style:italic;}
+                    body{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
+                    h3{font-size:large; color:#082963; font-weight:bold;}
+                    #ident{color:#0179B4; font-size:xx-small;}
+                    p{color:#000000;font-weight:normal;}
+                    .commentario p{color:#007BB5; font-style:italic;}
+                '''
diff --git a/src/calibre/ebooks/metadata/amazon.py b/src/calibre/ebooks/metadata/amazon.py
index 5d7d0358f0..9c89016e8b 100644
--- a/src/calibre/ebooks/metadata/amazon.py
+++ b/src/calibre/ebooks/metadata/amazon.py
@@ -114,7 +114,7 @@ def main(args=sys.argv):
     #print
 
     # Test sophisticated comment formatting
-    print get_social_metadata('Swan Thieves', None, None, '9781416580829')
+    print get_social_metadata('Angels & Demons', None, None, '9781416580829')
     print
     return
 

From 803e9eb32069367839fc95e170b3024ac54b649b Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 31 Oct 2010 13:20:26 -0600
Subject: [PATCH 14/19] Revert bundled version of BeautifulSoup in windows
 build to 3.0.8 to improve parsing of broken HTML

---
 setup/installer/windows/notes.rst | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/setup/installer/windows/notes.rst b/setup/installer/windows/notes.rst
index a8ba41e8ff..545070f7ff 100644
--- a/setup/installer/windows/notes.rst
+++ b/setup/installer/windows/notes.rst
@@ -28,7 +28,9 @@ If there are no windows binaries already compiled for the version of python you
 
 Run the following command to install python dependencies::
 
-    easy_install --always-unzip -U ipython mechanize BeautifulSoup pyreadline python-dateutil dnspython
+    easy_install --always-unzip -U ipython mechanize pyreadline python-dateutil dnspython
+
+Install BeautifulSoup 3.0.x manually into site-packages (3.1.x parses broken HTML very poorly)
 
 Qt
 --------

From a42f927f791ce682a72a98904bd4569b9ed1e9d5 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 31 Oct 2010 18:29:35 -0600
Subject: [PATCH 15/19] ...

---
 setup/server.py                 | 29 +++++++++++++++++++++++++----
 src/calibre/library/comments.py |  8 +++++++-
 2 files changed, 32 insertions(+), 5 deletions(-)

diff --git a/setup/server.py b/setup/server.py
index 66cb6adf7b..0fea4ec733 100644
--- a/setup/server.py
+++ b/setup/server.py
@@ -5,7 +5,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 
-import subprocess, tempfile, os, time, sys
+import subprocess, tempfile, os, time, sys, telnetlib
 from threading import RLock
 
 from setup import Command
@@ -28,7 +28,12 @@ else:
         def process_default(self, event):
             name = getattr(event,
                     'name', None)
-            if name and os.path.splitext(name)[1] == '.py':
+            if not name:
+                return
+            ext = os.path.splitext(name)[1]
+            reload = False
+            if ext == '.py':
+                reload = True
                 print
                 print name, 'changed'
                 self.command.kill_server()
@@ -36,6 +41,9 @@ else:
                 print self.command.prompt,
                 sys.stdout.flush()
 
+            if reload:
+                self.command.reload_browser(delay=1)
+
 
 class Server(Command):
 
@@ -75,6 +83,19 @@ class Server(Command):
             self.notifier.start()
             self.wdd = wm.add_watch(os.path.abspath('src'), mask, rec=True)
 
+    def reload_browser(self, delay=0.1):
+        time.sleep(delay)
+        try:
+            t = telnetlib.Telnet('localhost', 4242)
+            t.read_until("repl>")
+            t.write('BrowserReload();')
+            print t.read_until("repl>")
+            t.close()
+        except:
+            print 'Failed to reload browser'
+            import traceback
+            traceback.print_exc()
+
     def run(self, opts):
         self.lock = RLock()
         tdir = tempfile.gettempdir()
@@ -88,8 +109,8 @@ class Server(Command):
         first = True
         while True:
             self.launch_server()
-            if first:
-                pass
+            if not first:
+                self.reload_browser()
             first = False
 
             try:
diff --git a/src/calibre/library/comments.py b/src/calibre/library/comments.py
index 45d6ccaa45..83eec89abe 100644
--- a/src/calibre/library/comments.py
+++ b/src/calibre/library/comments.py
@@ -58,7 +58,13 @@ def comments_to_html(comments):
         return '\n'.join(parts)
 
     if sanitize_pat.search(comments) is not None:
-        return sanitize_comments_html(comments)
+        try:
+            return sanitize_comments_html(comments)
+        except:
+            import traceback
+            traceback.print_exc()
+            return u'<p></p>'
+
 
     # Explode lost CRs to \n\n
     comments = lost_cr_exception_pat.sub(lambda m: m.group().replace('.',

From bdf2cd48ddff2edb5b23bfbc971716ded8130994 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 31 Oct 2010 19:24:01 -0600
Subject: [PATCH 16/19] ...

---
 resources/recipes/ming_pao.recipe | 51 +++++++++++++++----------------
 1 file changed, 24 insertions(+), 27 deletions(-)

diff --git a/resources/recipes/ming_pao.recipe b/resources/recipes/ming_pao.recipe
index 6a61405698..162a3c774e 100644
--- a/resources/recipes/ming_pao.recipe
+++ b/resources/recipes/ming_pao.recipe
@@ -1,7 +1,9 @@
-cense__   = 'GPL v3'
+__license__   = 'GPL v3'
 __copyright__ = '2010, Eddie Lau'
 '''
 modified from Singtao Toronto calibre recipe by rty
+Change Log:
+2010/10/31: skip repeated articles in section pages
 '''
 
 import datetime
@@ -23,42 +25,37 @@ class AdvancedUserRecipe1278063072(BasicNewsRecipe):
     recursions = 0
     conversion_options = {'linearize_tables':True}
     masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
-
     keep_only_tags = [dict(name='h1'),
                       dict(attrs={'id':['newscontent01','newscontent02']})]
 
     def get_fetchdate(self):
         dt_utc = datetime.datetime.utcnow()
-        # convert UTC to local hk time
-        dt_local = dt_utc - datetime.timedelta(-8.0/24)
+        # convert UTC to local hk time - at around HKT 5.30am, all news are available
+        dt_local = dt_utc - datetime.timedelta(-2.5/24)
         return dt_local.strftime("%Y%m%d")
 
     def parse_index(self):
-            feeds = []
-            dateStr = self.get_fetchdate()
-            for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'), (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm'), (u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'), (u'\u793e\u8a55\u2027\u7b46\u9663 Editorial', 'http://news.mingpao.com/' + dateStr + '/mrindex.htm'), (u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'), (u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'), (u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm'), ('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'), (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm'), (u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),]:
-                articles = self.parse_section(url)
-                if articles:
-                    feeds.append((title, articles))
+        feeds = []
+        dateStr = self.get_fetchdate()
+        for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'), (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm'), (u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'), (u'\u793e\u8a55\u2027\u7b46\u9663 Editorial', 'http://news.mingpao.com/' + dateStr + '/mrindex.htm'), (u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'), (u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'), (u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm'), ('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'), (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm'), (u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),]:
+            articles = self.parse_section(url)
+            if articles:
+                feeds.append((title, articles))
             return feeds
 
     def parse_section(self, url):
-            dateStr = self.get_fetchdate()
-            soup = self.index_to_soup(url)
-            divs = soup.findAll(attrs={'class': ['bullet']})
-            current_articles = []
-            for i in divs:
-                a = i.find('a', href = True)
-                title = self.tag_to_string(a)
-                url = a.get('href', False)
-                url = 'http://news.mingpao.com/' + dateStr + '/' +url
+        dateStr = self.get_fetchdate()
+        soup = self.index_to_soup(url)
+        divs = soup.findAll(attrs={'class': ['bullet']})
+        current_articles = []
+        included_urls = []
+        for i in divs:
+            a = i.find('a', href = True)
+            title = self.tag_to_string(a)
+            url = a.get('href', False)
+            url = 'http://news.mingpao.com/' + dateStr + '/' +url
+            if url not in included_urls:
                 current_articles.append({'title': title, 'url': url, 'description':''})
-            return current_articles
-
-    def preprocess_html(self, soup):
-        for item in soup.findAll(style=True):
-           del item['style']
-        for item in soup.findAll(width=True):
-           del item['width']
-        return soup
+                included_urls.append(url)
+        return current_articles
 

From 21731b3c046da70cdc63fa348f164b9d5f4218cc Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 31 Oct 2010 21:00:38 -0600
Subject: [PATCH 17/19] ...

---
 src/calibre/utils/html2text.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/calibre/utils/html2text.py b/src/calibre/utils/html2text.py
index afe5a0aded..0eb84a3d38 100644
--- a/src/calibre/utils/html2text.py
+++ b/src/calibre/utils/html2text.py
@@ -9,7 +9,7 @@ __contributors__ = ["Martin 'Joey' Schulze", "Ricardo Reyes", "Kevin Jay North"]
 #   Support decoded entities with unifiable.
 
 if not hasattr(__builtins__, 'True'): True, False = 1, 0
-import re, sys, urllib, htmlentitydefs, codecs, StringIO, types
+import re, sys, urllib, htmlentitydefs, codecs
 import sgmllib
 import urlparse
 sgmllib.charref = re.compile('&#([xX]?[0-9a-fA-F]+)[^0-9a-fA-F]')
@@ -18,17 +18,17 @@ try: from textwrap import wrap
 except: pass
 
 # Use Unicode characters instead of their ascii psuedo-replacements
-UNICODE_SNOB = 0
+UNICODE_SNOB = 1
 
 # Put the links after each paragraph instead of at the end.
 LINKS_EACH_PARAGRAPH = 0
 
 # Wrap long lines at position. 0 for no wrapping. (Requires Python 2.3.)
-BODY_WIDTH = 78
+BODY_WIDTH = 0
 
 # Don't show internal links (href="#local-anchor") -- corresponding link targets
 # won't be visible in the plain text file anyway.
-SKIP_INTERNAL_LINKS = False
+SKIP_INTERNAL_LINKS = True
 
 ### Entity Nonsense ###
 
@@ -433,8 +433,9 @@ if __name__ == "__main__":
             j = urllib.urlopen(baseurl)
             try:
                 from feedparser import _getCharacterEncoding as enc
+                enc
             except ImportError:
-                   enc = lambda x, y: ('utf-8', 1)
+                enc = lambda x, y: ('utf-8', 1)
             text = j.read()
             encoding = enc(j.headers, text)[0]
             if encoding == 'us-ascii': encoding = 'utf-8'

From 073bf833712d7827ebe2ecfcb0b36478ea75d878 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 31 Oct 2010 22:22:49 -0600
Subject: [PATCH 18/19] El Faro de Vigo by Jefferson Frantz. Fixes #405 (New
 news feed)

---
 resources/recipes/el_faro.recipe | 77 ++++++++++++++++++++++++++++++++
 1 file changed, 77 insertions(+)
 create mode 100644 resources/recipes/el_faro.recipe

diff --git a/resources/recipes/el_faro.recipe b/resources/recipes/el_faro.recipe
new file mode 100644
index 0000000000..ec1b74b5cb
--- /dev/null
+++ b/resources/recipes/el_faro.recipe
@@ -0,0 +1,77 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class ElFaroDeVigo(BasicNewsRecipe):
+    title          = u'El Faro de Vigo'
+    oldest_article = 1
+    max_articles_per_feed = 100
+    __author__  = 'Jefferson Frantz'
+    description = 'Noticias de Vigo'
+    timefmt = ' [%d %b, %Y]'
+    language = 'es'
+    encoding               = 'cp1252'
+    no_stylesheets = True
+    remove_javascript = True
+
+    feeds          = [
+##                        (u'Vigo', u'http://www.farodevigo.es/elementosInt/rss/1'),
+##                        (u'Gran Vigo', u'http://www.farodevigo.es/elementosInt/rss/2'),
+                        (u'Galicia', u'http://www.farodevigo.es/elementosInt/rss/4'),
+                        (u'España', u'http://www.farodevigo.es/elementosInt/rss/6'),
+                        (u'Mundo', u'http://www.farodevigo.es/elementosInt/rss/7'),
+##                        (u'Opinión', u'http://www.farodevigo.es/elementosInt/rss/5'),
+                        (u'Economía', u'http://www.farodevigo.es/elementosInt/rss/10'),
+                        (u'Sociedad y Cultura', u'http://www.farodevigo.es/elementosInt/rss/8'),
+                        (u'Sucesos', u'http://www.farodevigo.es/elementosInt/rss/9'),
+                        (u'Deportes', u'http://www.farodevigo.es/elementosInt/rss/11'),
+                        (u'Agenda', u'http://www.farodevigo.es/elementosInt/rss/21'),
+                        (u'Gente', u'http://www.farodevigo.es/elementosInt/rss/24'),
+                        (u'Televisión', u'http://www.farodevigo.es/elementosInt/rss/25'),
+                        (u'Ciencia y Tecnología', u'http://www.farodevigo.es/elementosInt/rss/26')]
+
+    extra_css              = '''.noticia_texto{ font-family: sans-serif; font-size: medium; text-align: justify }
+                                h1{font-family: serif; font-size: x-large; font-weight: bold; color: #000000; text-align: center}
+                                h2{font-family: serif; font-size: medium; font-weight: bold; color: #000000; text-align: left}
+                                .enlacenegrita10{font-family: serif; font-size: small; font-weight: bold; color: #000000; text-align: left}
+                                .noticia_titular{font-family: serif; font-size: x-large; font-weight: bold; color: #000000; text-align: center}'''
+
+
+    def preprocess_html(self, soup):
+            for item in soup.findAll(style=True):
+               del item['style']
+
+            url = 'http://estaticos00.farodevigo.es//elementosWeb/mediaweb/images/compartir/barrapunto.gif'
+            fitem = soup.find('img',src=url)
+            if fitem:
+               par = fitem.parent
+               par.extract()
+            url = 'http://estaticos01.farodevigo.es//elementosWeb/mediaweb/images/compartir/barrapunto.gif'
+            fitem = soup.find('img',src=url)
+            if fitem:
+               par = fitem.parent
+               par.extract()
+            url = 'http://estaticos02.farodevigo.es//elementosWeb/mediaweb/images/compartir/barrapunto.gif'
+            fitem = soup.find('img',src=url)
+            if fitem:
+               par = fitem.parent
+               par.extract()
+
+            return self.adeify_images(soup)
+
+    def postprocess_html(self, soup, first_fetch):
+        divs = soup.findAll(True, {'class':'enlacenegrita10'})
+        for div in divs:
+            div['align'] = 'left'
+
+        return soup
+
+
+    keep_only_tags = [dict(name='div', attrs={'class':['noticias']})]
+
+    remove_tags        = [
+                             dict(name=['object','link','script','ul','iframe','ol'])
+                            ,dict(name='div', attrs={'class':['noticiadd2', 'cintillo2', 'noticiadd', 'noticiadd2']})
+                            ,dict(name='div', attrs={'class':['imagen_derecha', 'noticiadd3', 'extraHTML']})
+
+                         ]
+
+

From 0c8684fa2191d1329860de55c364718c991db469 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 31 Oct 2010 22:43:24 -0600
Subject: [PATCH 19/19] Fix #7369 (0.7.26)

---
 src/calibre/ebooks/metadata/amazon.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/metadata/amazon.py b/src/calibre/ebooks/metadata/amazon.py
index 9c89016e8b..81d996c6a7 100644
--- a/src/calibre/ebooks/metadata/amazon.py
+++ b/src/calibre/ebooks/metadata/amazon.py
@@ -72,7 +72,10 @@ def get_metadata(br, asin, mi):
         return False
     raw = xml_to_unicode(raw, strip_encoding_pats=True,
             resolve_entities=True)[0]
-    root = soupparser.fromstring(raw)
+    try:
+        root = soupparser.fromstring(raw)
+    except:
+        return False
     ratings = root.xpath('//form[@id="handleBuy"]/descendant::*[@class="asinReviewsSummary"]')
     if ratings:
         pat = re.compile(r'([0-9.]+) out of (\d+) stars')