Merge from trunk

2025-07-09 03:04:10 -04:00 · 2010-11-01 01:31:39 +01:00 · 2010-11-01 01:31:39 +01:00 · 679136f6a7
commit 679136f6a7
parent 9aefafc745 803e9eb320
21 changed files with 429 additions and 142 deletions
--- a/resources/content_server/browse/browse.css
+++ b/resources/content_server/browse/browse.css
@ -208,6 +208,8 @@ h2.library_name {

 }

+.toplevel li a { text-decoration: none; }
+
 .toplevel li img {
    vertical-align: middle;
    margin-right: 1em;
@ -261,9 +263,16 @@ h2.library_name {

 }

-.category div.category-item span.href { display: none }
+.category div.category-item a { text-decoration: none; color: inherit; }

-#groups span.load_href { display: none }
+#groups a.load_href { 
+    text-decoration: none;
+    color: inherit;
+    font-size: medium;
+    font-weight: normal;
+    padding: 0;
+    padding-left: 0.5em;
+}

 #groups h3 {
    font-weight: bold;
--- a/resources/content_server/browse/browse.js
+++ b/resources/content_server/browse/browse.js
@ -116,7 +116,7 @@ function toplevel() {
    $(".sort_select").hide();

    $(".toplevel li").click(function() {
-        var href = $(this).children("span.url").text();
+        var href = $(this).children("a").attr('href');
        window.location = href;
    });

@ -133,7 +133,7 @@ function render_error(msg) {
 // Category feed {{{

 function category_clicked() {
-   var href = $(this).find("span.href").html();
+   var href = $(this).find("a").attr('href');
   window.location = href;
 }

@ -151,7 +151,7 @@ function category() {

        change: function(event, ui) {
            if (ui.newContent) {
-                var href = ui.newContent.children("span.load_href").html();
+                var href = ui.newContent.prev().children("a.load_href").attr('href');
                ui.newContent.children(".loading").show();
                if (href) {
                    $.ajax({
--- a/resources/content_server/read/monocle.js
+++ b/resources/content_server/read/monocle.js
--- a/resources/recipes/clic_rbs.recipe
+++ b/resources/recipes/clic_rbs.recipe
@ -0,0 +1,50 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class ClicRBS(BasicNewsRecipe):
+    title          = u'ClicRBS'
+    language = 'pt'
+    __author__ = 'arvoredo'
+    oldest_article = 3
+    max_articles_per_feed = 9
+    cover_url             = 'http://www.publicidade.clicrbs.com.br/clicrbs/imgs/logo_clic.gif'
+
+    remove_tags = [
+                       dict(name='div', attrs={'class':['clic-barra-inner', 'botao-versao-mobile ']})
+                        ]
+
+    remove_tags_before = dict(name='div ', attrs={'class':'descricao'})
+    remove_tags_before = dict(name='div', attrs={'id':'glb-corpo'})
+    remove_tags_before = dict(name='div', attrs={'class':'descricao'})
+    remove_tags_before = dict(name='div', attrs={'class':'coluna'})
+    remove_tags_after = dict(name='div', attrs={'class':'extra'})
+    remove_tags_after = dict(name='div', attrs={'id':'links-patrocinados'})
+    remove_tags_after = dict(name='h4', attrs={'class':'tipo-c comente'})
+    remove_tags_after = dict(name='ul', attrs={'class':'lista'})
+
+    feeds = [
+               (u'zerohora.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?uf=1&local=1&channel=13')
+             , (u'diariocatarinense.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?uf=2&local=18&channel=67')
+             , (u'Concursos e Emprego', u'http://g1.globo.com/Rss2/0,,AS0-9654,00.xml')
+             , (u'Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?channel=87&uf=1&local=1')
+             , (u'Economia, zerohora.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=801&uf=1&local=1&channel=13')
+             , (u'Esportes, zerohora.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=802&uf=1&local=1&channel=13')
+             , (u'Economia, Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1180&channel=87&uf=1&local=1')
+             , (u'Política, Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1185&channel=87&uf=1&local=1')
+             , (u'Mundo, Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1184&channel=87&uf=1&local=1')
+             , (u'Catarinense, Esportes, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=&theme=371&uf=2&channel=2')
+             , (u'Geral, Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1183&channel=87&uf=1&local=1')
+             , (u'Estilo de Vida, zerohora.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=805&uf=1&local=1&channel=13')
+             , (u'Corrida, Corrida, Esportes, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1313&theme=15704&uf=1&channel=2')
+             , (u'Jornal de Santa Catarina, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?espid=159&uf=2&local=18')
+             , (u'Grêmio, Futebol, Esportes, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=11&theme=65&uf=1&channel=2')
+             , (u'Velocidade, Esportes, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1314&theme=2655&uf=1&channel=2')
+            ]
+
+    extra_css = '''
+                    cite{color:#007BB5; font-size:xx-small; font-style:italic;}
+                    body{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
+                    h3{font-size:large; color:#082963; font-weight:bold;}
+                    #ident{color:#0179B4; font-size:xx-small;}
+                    p{color:#000000;font-weight:normal;}
+                    .commentario p{color:#007BB5; font-style:italic;}
+                '''
--- a/resources/recipes/cm_journal.recipe
+++ b/resources/recipes/cm_journal.recipe
@ -0,0 +1,44 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class CMJornal_pt(BasicNewsRecipe):
+    title                 = 'Correio da Manha - Portugal'
+    __author__            = 'jmst'
+    description           = 'As noticias de Portugal e do Mundo'
+    publisher             = 'Cofina Media'
+    category              = ''
+    oldest_article        = 1
+    no_stylesheets        = True
+    encoding              = 'utf-8'
+    use_embedded_content  = False
+    language              = 'pt'
+    extra_css             = ' .publish{font-style: italic; line-height: 1.2em; border-bottom: 1px dotted; padding: 5px 0} .entity{line-height: 1.2em} .overview{line-height:1.2em} '
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    keep_only_tags = [
+                        dict(name=['h2','h1'])
+                      , dict(name='div', attrs={'class': ['news']})
+                     ]
+
+    remove_tags = [
+                    dict(name=['object','embed','iframe'])
+                   ,dict(name='a',attrs={'href':['#']})
+                  ]
+
+    feeds = [
+              (u'Actualidade' , u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000009-0000-0000-0000-000000000009' )
+             ,(u'Portugal'    , u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000010-0000-0000-0000-000000000010'    )
+             ,(u'Economia' , u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000011-0000-0000-0000-000000000011' )
+             ,(u'Mundo' , u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000091-0000-0000-0000-000000000091' )
+             ,(u'Desporto' , u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000012-0000-0000-0000-000000000012'  )
+             ,(u'TV & Media', u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000092-0000-0000-0000-000000000092')
+            ]
+
+    def print_version(self, url):
+        return url.replace('noticia.aspx', 'Imprimir.aspx')
+
--- a/resources/recipes/mediapart.recipe
+++ b/resources/recipes/mediapart.recipe
@ -1,53 +1,79 @@
-#!/usr/bin/env  python
-
 __license__   = 'GPL v3'
-__copyright__ = '2009, Mathieu Godlewski <mathieu at godlewski.fr>'
+__copyright__ = '2009, Mathieu Godlewski <mathieu at godlewski.fr>; 2010, Louis Gesbert <meta at antislash dot info>'
 '''
 Mediapart
 '''

-import re, string
-from calibre.ebooks.BeautifulSoup import BeautifulSoup
+from calibre.ebooks.BeautifulSoup import Tag
 from calibre.web.feeds.news import BasicNewsRecipe

 class Mediapart(BasicNewsRecipe):
    title          = 'Mediapart'
-    __author__ = 'Mathieu Godlewski <mathieu at godlewski.fr>'
+    __author__ = 'Mathieu Godlewski'
    description = 'Global news in french from online newspapers'
    oldest_article = 7
    language = 'fr'
+    needs_subscription = True

    max_articles_per_feed = 50
    no_stylesheets = True

-    html2lrf_options = ['--base-font-size', '10']
+    cover_url = 'http://www.mediapart.fr/sites/all/themes/mediapart/mediapart/images/annonce.jpg'

    feeds =  [
        ('Les articles', 'http://www.mediapart.fr/articles/feed'),
    ]

-    preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE|re.DOTALL), i[1]) for i in
-        [
-            (r'<div class="print-title">([^>]+)</div>', lambda match : '<h2>'+match.group(1)+'</h2>'),
-            (r'<p>Mediapart\.fr</p>', lambda match : ''),
-            (r'<p[^>]*>[\s]*</p>', lambda match : ''),
-            (r'<p><a href="[^\.]+\.pdf">[^>]*</a></p>', lambda match : ''),
+# -- print-version has poor quality on this website, better do the conversion ourselves
+#
+#     preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE|re.DOTALL), i[1]) for i in
+#         [
+#             (r'<div class="print-title">([^>]+)</div>', lambda match : '<h2>'+match.group(1)+'</h2>'),
+#             (r'<span class=\'auteur_staff\'>[^>]+<a title=\'[^\']*\'[^>]*>([^<]*)</a>[^<]*</span>',
+#              lambda match : '<i>'+match.group(1)+'</i>'),
+#             (r'\'', lambda match: '&rsquo;'),
+#         ]
+#      ]
+#
+#     remove_tags    = [ dict(name='div', attrs={'class':'print-source_url'}),
+#                        dict(name='div', attrs={'class':'print-links'}),
+#                        dict(name='img', attrs={'src':'entete_article.png'}),
+#                        dict(name='br') ]
+#
+#     def print_version(self, url):
+#         raw = self.browser.open(url).read()
+#         soup = BeautifulSoup(raw.decode('utf8', 'replace'))
+#         div = soup.find('div', {'id':re.compile('node-\d+')})
+#         if div is None:
+#             return None
+#         article_id = string.replace(div['id'], 'node-', '')
+#         if article_id is None:
+#             return None
+#         return 'http://www.mediapart.fr/print/'+article_id
+
+# -- Non-print version [dict(name='div', attrs={'class':'advert'})]
+
+    keep_only_tags = [
+        dict(name='h1', attrs={'class':'title'}),
+        dict(name='div', attrs={'class':'page_papier_detail'}),
        ]
-    ]

-    remove_tags    = [ dict(name='div', attrs={'class':'print-source_url'}),
-                                  dict(name='div', attrs={'class':'print-links'}),
-                                  dict(name='img', attrs={'src':'entete_article.png'}),
-    ]
+    def preprocess_html(self,soup):
+        for title in soup.findAll('div', {'class':'titre'}):
+            tag = Tag(soup, 'h3')
+            title.replaceWith(tag)
+            tag.insert(0,title)
+        return soup

+# -- Handle login
+
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser()
+        if self.username is not None and self.password is not None:
+            br.open('http://www.mediapart.fr/')
+            br.select_form(nr=1)
+            br['name'] = self.username
+            br['pass'] = self.password
+            br.submit()
+        return br

-    def print_version(self, url):
-        raw = self.browser.open(url).read()
-        soup = BeautifulSoup(raw.decode('utf8', 'replace'))
-        div = soup.find('div', {'class':'node node-type-article'})
-        if div is None:
-            return None
-        article_id = string.replace(div['id'], 'node-', '')
-        if article_id is None:
-            return None
-        return 'http://www.mediapart.fr/print/'+article_id
--- a/resources/recipes/rue89.recipe
+++ b/resources/recipes/rue89.recipe
@ -0,0 +1,53 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Louis Gesbert <meta at antislash dot info>'
+'''
+Rue89
+'''
+
+__author__ = '2010, Louis Gesbert <meta at antislash dot info>'
+
+import re
+from calibre.ebooks.BeautifulSoup import Tag
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Rue89(BasicNewsRecipe):
+    title = 'Rue89'
+    __author__ = 'Louis Gesbert'
+    description = 'Popular free french news website'
+    title = u'Rue89'
+    language = 'fr'
+    oldest_article = 7
+    max_articles_per_feed = 50
+
+    feeds = [(u'La Une', u'http://www.rue89.com/homepage/feed')]
+
+    no_stylesheets = True
+
+    preprocess_regexps = [
+        (re.compile(r'<(/?)h2>', re.IGNORECASE|re.DOTALL),
+         lambda match : '<'+match.group(1)+'h3>'),
+        (re.compile(r'<div class="print-title">([^>]+)</div>', re.IGNORECASE|re.DOTALL),
+         lambda match : '<h2>'+match.group(1)+'</h2>'),
+        (re.compile(r'<img[^>]+src="[^"]*/numeros/(\d+)[^0-9.">]*.gif"[^>]*/>', re.IGNORECASE|re.DOTALL),
+         lambda match : '<span style="font-family: Sans-serif; color: red; font-size:24pt; padding=2pt;">'+match.group(1)+'</span>'),
+        (re.compile(r'\''), lambda match: '&rsquo;'),
+        ]
+
+    def preprocess_html(self,soup):
+        body = Tag(soup, 'body')
+        title = soup.find('h1', {'class':'title'})
+        content = soup.find('div', {'class':'content'})
+        soup.body.replaceWith(body)
+        body.insert(0, title)
+        body.insert(1, content)
+        return soup
+
+    remove_tags = [ #dict(name='div', attrs={'class':'print-source_url'}),
+                    #dict(name='div', attrs={'class':'print-links'}),
+                    #dict(name='img', attrs={'class':'print-logo'}),
+                    dict(name='div', attrs={'class':'content_top'}),
+                    dict(name='div', attrs={'id':'sidebar-left'}), ]
+
+# -- print-version has poor quality on this website, better do the conversion ourselves
+#    def print_version(self, url):
+#        return re.sub('^.*-([0-9]+)$', 'http://www.rue89.com/print/\\1',url)
--- a/setup/installer/windows/notes.rst
+++ b/setup/installer/windows/notes.rst
@ -28,7 +28,9 @@ If there are no windows binaries already compiled for the version of python you

 Run the following command to install python dependencies::

-    easy_install --always-unzip -U ipython mechanize BeautifulSoup pyreadline python-dateutil dnspython
+    easy_install --always-unzip -U ipython mechanize pyreadline python-dateutil dnspython
+
+Install BeautifulSoup 3.0.x manually into site-packages (3.1.x parses broken HTML very poorly)

 Qt
 --------
--- a/setup/server.py
+++ b/setup/server.py
@ -5,10 +5,38 @@ __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import subprocess, tempfile, os, time
+import subprocess, tempfile, os, time, sys
+from threading import RLock

 from setup import Command

+try:
+    from pyinotify import WatchManager, ThreadedNotifier, EventsCodes, ProcessEvent
+except:
+    wm = None
+else:
+    wm = WatchManager()
+    flags = EventsCodes.ALL_FLAGS
+    mask = flags['IN_MODIFY']
+
+    class ProcessEvents(ProcessEvent):
+
+        def __init__(self, command):
+            ProcessEvent.__init__(self)
+            self.command = command
+
+        def process_default(self, event):
+            name = getattr(event,
+                    'name', None)
+            if name and os.path.splitext(name)[1] == '.py':
+                print
+                print name, 'changed'
+                self.command.kill_server()
+                self.command.launch_server()
+                print self.command.prompt,
+                sys.stdout.flush()
+
+
 class Server(Command):

    description = 'Run the calibre server in development mode conveniently'
@ -18,31 +46,62 @@ class Server(Command):
    def rebuild_monocole(self):
        subprocess.check_call(['sprocketize', '-C', self.MONOCLE_PATH,
            '-I', 'src', 'src/monocle.js'],
-            stdout=open('resources/content_server/monocle.js', 'wb'))
+            stdout=open('resources/content_server/read/monocle.js', 'wb'))

-    def launch_server(self, log):
-        self.rebuild_monocole()
-        p = subprocess.Popen(['calibre-server', '--develop'],
-                stderr=subprocess.STDOUT, stdout=log)
-        return p
+    def launch_server(self):
+        print 'Starting server...\n'
+        with self.lock:
+            self.rebuild_monocole()
+            self.server_proc = p = subprocess.Popen(['calibre-server', '--develop'],
+                    stderr=subprocess.STDOUT, stdout=self.server_log)
+            time.sleep(0.2)
+            if p.poll() is not None:
+                print 'Starting server failed'
+                raise SystemExit(1)
+            return p
+
+    def kill_server(self):
+        print 'Killing server...\n'
+        if self.server_proc is not None:
+            with self.lock:
+                if self.server_proc.poll() is None:
+                    self.server_proc.terminate()
+                while self.server_proc.poll() is None:
+                    time.sleep(0.1)
+
+    def watch(self):
+        if wm is not None:
+            self.notifier = ThreadedNotifier(wm, ProcessEvents(self))
+            self.notifier.start()
+            self.wdd = wm.add_watch(os.path.abspath('src'), mask, rec=True)

    def run(self, opts):
+        self.lock = RLock()
        tdir = tempfile.gettempdir()
        logf = os.path.join(tdir, 'calibre-server.log')
-        log = open(logf, 'ab')
+        self.server_log = open(logf, 'ab')
+        self.prompt = 'Press Enter to kill/restart server. Ctrl+C to quit: '
        print 'Server log available at:', logf
+        print
+        self.watch()

+        first = True
        while True:
-            print 'Starting server...'
-            p = self.launch_server(log)
+            self.launch_server()
+            if first:
+                pass
+            first = False
+
            try:
-                raw_input('Press Enter to kill/restart server. Ctrl+C to quit: ')
+                raw_input(self.prompt)
            except:
+                print
+                self.kill_server()
                break
            else:
-                while p.returncode is None:
-                    p.terminate()
-                    time.sleep(0.1)
-                    p.kill()
+                self.kill_server()
        print

+        if hasattr(self, 'notifier'):
+            self.notifier.stop()
+
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -475,7 +475,7 @@ from calibre.devices.teclast.driver import TECLAST_K3, NEWSMY, IPAPYRUS, \
        SOVOS, PICO
 from calibre.devices.sne.driver import SNE
 from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL, KOGAN, \
-        GEMEI, VELOCITYMICRO, PDNOVEL_KOBO
+        GEMEI, VELOCITYMICRO, PDNOVEL_KOBO, Q600
 from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
 from calibre.devices.kobo.driver import KOBO

@ -586,6 +586,7 @@ plugins += [
    AVANT,
    MENTOR,
    SWEEX,
+    Q600,
    KOGAN,
    PDNOVEL,
    SPECTRA,
--- a/src/calibre/devices/apple/driver.py
+++ b/src/calibre/devices/apple/driver.py
@ -19,7 +19,7 @@ from calibre.ebooks.metadata.book.base import Metadata
 from calibre.ebooks.metadata.epub import set_metadata
 from calibre.library.server.utils import strftime
 from calibre.utils.config import config_dir, prefs
-from calibre.utils.date import isoformat, now, parse_date
+from calibre.utils.date import now, parse_date
 from calibre.utils.logging import Log
 from calibre.utils.zipfile import ZipFile

@ -2521,11 +2521,11 @@ class ITUNES(DriverBase):
                        metadata.timestamp = datetime.datetime(old_ts.year, old_ts.month, old_ts.day, old_ts.hour,
                                                   old_ts.minute, old_ts.second, old_ts.microsecond+1, old_ts.tzinfo)
                    else:
-                        metadata.timestamp = isoformat(now())
+                        metadata.timestamp = now()
                        if DEBUG:
                            self.log.info("   add timestamp: %s" % metadata.timestamp)
                else:
-                    metadata.timestamp = isoformat(now())
+                    metadata.timestamp = now()
                    if DEBUG:
                        self.log.warning("   missing <metadata> block in OPF file")
                        self.log.info("   add timestamp: %s" % metadata.timestamp)
--- a/src/calibre/devices/misc.py
+++ b/src/calibre/devices/misc.py
@ -72,6 +72,15 @@ class SWEEX(USBMS):
    EBOOK_DIR_MAIN = ''
    SUPPORTS_SUB_DIRS = True

+class Q600(SWEEX):
+
+    name = 'Digma Q600 Device interface'
+    gui_name = 'Q600'
+    description    = _('Communicate with the Digma Q600')
+
+    BCD = [0x325]
+    FORMATS     = ['epub', 'fb2', 'mobi', 'prc', 'html', 'rtf', 'chm', 'pdf', 'txt']
+
 class KOGAN(SWEEX):

    name           = 'Kogan Device Interface'
--- a/src/calibre/ebooks/metadata/amazon.py
+++ b/src/calibre/ebooks/metadata/amazon.py
@ -9,11 +9,13 @@ Fetch metadata using Amazon AWS
 import sys, re

 from lxml import html
+from lxml.html import soupparser

 from calibre import browser
 from calibre.ebooks.metadata import check_isbn
 from calibre.ebooks.metadata.book.base import Metadata
 from calibre.ebooks.chardet import xml_to_unicode
+from calibre.library.comments import sanitize_comments_html

 def find_asin(br, isbn):
    q = 'http://www.amazon.com/s?field-keywords='+isbn
@ -70,7 +72,7 @@ def get_metadata(br, asin, mi):
        return False
    raw = xml_to_unicode(raw, strip_encoding_pats=True,
            resolve_entities=True)[0]
-    root = html.fromstring(raw)
+    root = soupparser.fromstring(raw)
    ratings = root.xpath('//form[@id="handleBuy"]/descendant::*[@class="asinReviewsSummary"]')
    if ratings:
        pat = re.compile(r'([0-9.]+) out of (\d+) stars')
@ -95,25 +97,26 @@ def get_metadata(br, asin, mi):
        # remove all attributes from tags
        desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc)
        # Collapse whitespace
-        desc = re.sub('\n+', '\n', desc)
-        desc = re.sub(' +', ' ', desc)
+        #desc = re.sub('\n+', '\n', desc)
+        #desc = re.sub(' +', ' ', desc)
        # Remove the notice about text referring to out of print editions
        desc = re.sub(r'(?s)<em>--This text ref.*?</em>', '', desc)
        # Remove comments
        desc = re.sub(r'(?s)<!--.*?-->', '', desc)
-        mi.comments = desc
+        mi.comments = sanitize_comments_html(desc)

    return True


 def main(args=sys.argv):
    # Test xisbn
-    print get_social_metadata('Learning Python', None, None, '8324616489')
-    print
+    #print get_social_metadata('Learning Python', None, None, '8324616489')
+    #print

    # Test sophisticated comment formatting
-    print get_social_metadata('Swan Thieves', None, None, '9780316065795')
+    print get_social_metadata('Angels & Demons', None, None, '9781416580829')
    print
+    return

    # Random tests
    print get_social_metadata('Star Trek: Destiny: Mere Mortals', None, None, '9781416551720')
--- a/src/calibre/gui2/device.py
+++ b/src/calibre/gui2/device.py
@ -484,17 +484,22 @@ class DeviceMenu(QMenu): # {{{
                    _('Storage Card B')),
        ]

+        later_menus = []

        for menu in (self, self.set_default_menu):
            for actions, desc in (
                    (basic_actions, ''),
+                    (specific_actions, _('Send specific format to')),
                    (delete_actions, _('Send and delete from library')),
-                    (specific_actions, _('Send specific format to'))
                    ):
                mdest = menu
                if actions is not basic_actions:
-                    mdest = menu.addMenu(desc)
+                    mdest = QMenu(desc)
                    self._memory.append(mdest)
+                    later_menus.append(mdest)
+                    if menu is self.set_default_menu:
+                        menu.addMenu(mdest)
+                        menu.addSeparator()

                for dest, delete, specific, icon, text in actions:
                    action = DeviceAction(dest, delete, specific, icon, text, self)
@ -507,7 +512,7 @@ class DeviceMenu(QMenu): # {{{
                        action.a_s.connect(self.action_triggered)
                        self.actions.append(action)
                    mdest.addAction(action)
-                if actions is not specific_actions:
+                if actions is basic_actions:
                    menu.addSeparator()

        da = config['default_send_to_device_action']
@ -525,14 +530,21 @@ class DeviceMenu(QMenu): # {{{
        self.group.triggered.connect(self.change_default_action)
        self.addSeparator()

+        self.addMenu(later_menus[0])
+        self.addSeparator()
+
        mitem = self.addAction(QIcon(I('eject.png')), _('Eject device'))
        mitem.setEnabled(False)
        mitem.triggered.connect(lambda x : self.disconnect_mounted_device.emit())
        self.disconnect_mounted_device_action = mitem
-
        self.addSeparator()
+
        self.addMenu(self.set_default_menu)
        self.addSeparator()
+
+        self.addMenu(later_menus[1])
+        self.addSeparator()
+
        annot = self.addAction(_('Fetch annotations (experimental)'))
        annot.setEnabled(False)
        annot.triggered.connect(lambda x :
--- a/src/calibre/library/comments.py
+++ b/src/calibre/library/comments.py
@ -11,11 +11,15 @@ from calibre.constants import preferred_encoding
 from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString, \
        CData, Comment, Declaration, ProcessingInstruction
 from calibre import prepare_string_for_xml
+from calibre.utils.html2text import html2text
+from calibre.ebooks.markdown import markdown

 # Hackish - ignoring sentences ending or beginning in numbers to avoid
 # confusion with decimal points.
 lost_cr_pat = re.compile('([a-z])([\.\?!])([A-Z])')
 lost_cr_exception_pat = re.compile(r'(Ph\.D)|(D\.Phil)|((Dr|Mr|Mrs|Ms)\.[A-Z])')
+sanitize_pat = re.compile(r'<script|<table|<tr|<td|<th|<style|<iframe',
+        re.IGNORECASE)

 def comments_to_html(comments):
    '''
@ -53,6 +57,9 @@ def comments_to_html(comments):
                for x in comments.split('\n\n')]
        return '\n'.join(parts)

+    if sanitize_pat.search(comments) is not None:
+        return sanitize_comments_html(comments)
+
    # Explode lost CRs to \n\n
    comments = lost_cr_exception_pat.sub(lambda m: m.group().replace('.',
        '.\r'), comments)
@ -115,6 +122,11 @@ def comments_to_html(comments):

    return result.renderContents(encoding=None)

+def sanitize_comments_html(html):
+    text = html2text(html)
+    md = markdown.Markdown(safe_mode=True)
+    return md.convert(text)
+
 def test():
    for pat, val in [
            ('lineone\n\nlinetwo',
--- a/src/calibre/library/server/base.py
+++ b/src/calibre/library/server/base.py
@ -118,16 +118,17 @@ class LibraryServer(ContentServer, MobileServer, XMLServer, OPDSServer, Cache,
        self.set_database(db)

        cherrypy.config.update({
-                                'log.screen'             : opts.develop,
-                                'engine.autoreload_on'   : opts.develop,
-                                'tools.log_headers.on'   : opts.develop,
-                                'checker.on'             : opts.develop,
-                                'request.show_tracebacks': show_tracebacks,
-                                'server.socket_host'     : listen_on,
-                                'server.socket_port'     : opts.port,
-                                'server.socket_timeout'  : opts.timeout, #seconds
-                                'server.thread_pool'     : opts.thread_pool, # number of threads
-                               })
+            'log.screen'             : opts.develop,
+            'engine.autoreload_on'   : getattr(opts,
+                                        'auto_reload', False),
+            'tools.log_headers.on'   : opts.develop,
+            'checker.on'             : opts.develop,
+            'request.show_tracebacks': show_tracebacks,
+            'server.socket_host'     : listen_on,
+            'server.socket_port'     : opts.port,
+            'server.socket_timeout'  : opts.timeout, #seconds
+            'server.thread_pool'     : opts.thread_pool, # number of threads
+        })
        if embedded or wsgi:
            cherrypy.config.update({'engine.SIGHUP'          : None,
                                    'engine.SIGTERM'         : None,})
--- a/src/calibre/library/server/browse.py
+++ b/src/calibre/library/server/browse.py
@ -123,9 +123,10 @@ def get_category_items(category, items, restriction, datatype, prefix): # {{{

    def item(i):
        templ = (u'<div title="{4}" class="category-item">'
-                '<div class="category-name">{0}</div><div>{1}</div>'
-                '<div>{2}'
-                '<span class="href">{5}{3}</span></div></div>')
+                '<div class="category-name">'
+                '<a href="{5}{3}" title="{4}">{0}</a></div>'
+                '<div>{1}</div>'
+                '<div>{2}</div></div>')
        rating, rstring = render_rating(i.avg_rating, prefix)
        name = xml(i.name)
        if datatype == 'rating':
@ -142,7 +143,7 @@ def get_category_items(category, items, restriction, datatype, prefix): # {{{
            q = category
        href = '/browse/matches/%s/%s'%(quote(q), quote(id_))
        return templ.format(xml(name), rating,
-                xml(desc), xml(href), rstring, prefix)
+                xml(desc), xml(href, True), rstring, prefix)

    items = list(map(item, items))
    return '\n'.join(['<div class="category-container">'] + items + ['</div>'])
@ -252,8 +253,6 @@ class BrowseServer(object):
        lp = self.db.library_path
        if isbytestring(lp):
            lp = force_unicode(lp, filesystem_encoding)
-        if isinstance(ans, unicode):
-            ans = ans.encode('utf-8')
        ans = ans.replace('{library_name}', xml(os.path.basename(lp)))
        ans = ans.replace('{library_path}', xml(lp, True))
        ans = ans.replace('{initial_search}', initial_search)
@ -335,9 +334,10 @@ class BrowseServer(object):
                icon = 'blank.png'
            cats.append((meta['name'], category, icon))

-        cats = [('<li title="{2} {0}"><img src="{3}{src}" alt="{0}" />'
+        cats = [('<li><a title="{2} {0}" href="/browse/category/{1}">&nbsp;</a>'
+                 '<img src="{3}{src}" alt="{0}" />'
                 '<span class="label">{0}</span>'
-                 '<span class="url">{3}/browse/category/{1}</span></li>')
+                 '</li>')
                .format(xml(x, True), xml(quote(y)), xml(_('Browse books by')),
                    self.opts.url_prefix, src='/browse/icon/'+z)
                for x, y, z in cats]
@ -393,14 +393,15 @@ class BrowseServer(object):
            for x in sorted(starts):
                category_groups[x] = len([y for y in items if
                    getter(y).upper().startswith(x)])
-            items = [(u'<h3 title="{0}">{0} <span>[{2}]</span></h3><div>'
+            items = [(u'<h3 title="{0}"><a class="load_href" title="{0}"'
+                      u' href="{4}{3}"><strong>{0}</strong> [{2}]</a></h3><div>'
                      u'<div class="loaded" style="display:none"></div>'
                      u'<div class="loading"><img alt="{1}" src="{4}/static/loading.gif" /><em>{1}</em></div>'
-                      u'<span class="load_href">{4}{3}</span></div>').format(
+                      u'</div>').format(
                        xml(s, True),
                        xml(_('Loading, please wait'))+'&hellip;',
                        unicode(c),
-                        xml(u'/browse/category_group/%s/%s'%(category, s)),
+                        xml(u'/browse/category_group/%s/%s'%(category, s), True),
                        self.opts.url_prefix)
                    for s, c in category_groups.items()]
            items = '\n\n'.join(items)
@ -563,7 +564,8 @@ class BrowseServer(object):
            if not val:
                val = ''
            args[key] = xml(val, True)
-        fname = ascii_filename(args['title']) + ' - ' + ascii_filename(args['authors'])
+        fname = quote(ascii_filename(args['title']) + ' - ' +
+                ascii_filename(args['authors']))
        return args, fmt, fmts, fname

    @Endpoint(mimetype='application/json; charset=utf-8')
--- a/src/calibre/library/server/content.py
+++ b/src/calibre/library/server/content.py
@ -70,10 +70,10 @@ class ContentServer(object):
            id = id.rpartition('_')[-1].partition('.')[0]
            match = re.search(r'\d+', id)
            if not match:
-                raise cherrypy.HTTPError(400, 'id:%s not an integer'%id)
+                raise cherrypy.HTTPError(404, 'id:%s not an integer'%id)
            id = int(match.group())
        if not self.db.has_id(id):
-            raise cherrypy.HTTPError(400, 'id:%d does not exist in database'%id)
+            raise cherrypy.HTTPError(404, 'id:%d does not exist in database'%id)
        if what == 'thumb' or what.startswith('thumb_'):
            try:
                width, height = map(int, what.split('_')[1:])
--- a/src/calibre/library/server/main.py
+++ b/src/calibre/library/server/main.py
@ -58,6 +58,9 @@ The OPDS interface is advertised via BonJour automatically.
            help=_('Specifies a restriction to be used for this invocation. '
                   'This option overrides any per-library settings specified'
                   ' in the GUI'))
+    parser.add_option('--auto-reload', default=False, action='store_true',
+            help=_('Auto reload server when source code changes. May not'
+                ' work in all environments.'))
    return parser


--- a/src/calibre/library/server/mobile.py
+++ b/src/calibre/library/server/mobile.py
@ -7,6 +7,7 @@ __docformat__ = 'restructuredtext en'

 import re, os
 import __builtin__
+from urllib import quote

 import cherrypy
 from lxml import html
@ -115,8 +116,8 @@ def build_index(books, num, search, sort, order, start, total, url_base, CKEYS,

        data = TD()
        for fmt in book['formats'].split(','):
-            a = ascii_filename(book['authors'])
-            t = ascii_filename(book['title'])
+            a = quote(ascii_filename(book['authors']))
+            t = quote(ascii_filename(book['title']))
            s = SPAN(
                A(
                    fmt.lower(),
--- a/src/calibre/utils/html2text.py
+++ b/src/calibre/utils/html2text.py
@ -17,18 +17,18 @@ sgmllib.charref = re.compile('&#([xX]?[0-9a-fA-F]+)[^0-9a-fA-F]')
 try: from textwrap import wrap
 except: pass

-# Use Unicode characters instead of their ascii pseudo-replacements
+# Use Unicode characters instead of their ascii psuedo-replacements
 UNICODE_SNOB = 0

 # Put the links after each paragraph instead of at the end.
 LINKS_EACH_PARAGRAPH = 0

 # Wrap long lines at position. 0 for no wrapping. (Requires Python 2.3.)
-BODY_WIDTH = 0
+BODY_WIDTH = 78

 # Don't show internal links (href="#local-anchor") -- corresponding link targets
 # won't be visible in the plain text file anyway.
-SKIP_INTERNAL_LINKS = True
+SKIP_INTERNAL_LINKS = False

 ### Entity Nonsense ###

@ -41,13 +41,13 @@ def name2cp(k):
        if k.startswith("&#") and k.endswith(";"): return int(k[2:-1]) # not in latin-1
        return ord(codecs.latin_1_decode(k)[0])

-unifiable = {'rsquo':"'", 'lsquo':"'", 'rdquo':'"', 'ldquo':'"', 
+unifiable = {'rsquo':"'", 'lsquo':"'", 'rdquo':'"', 'ldquo':'"',
 'copy':'(C)', 'mdash':'--', 'nbsp':' ', 'rarr':'->', 'larr':'<-', 'middot':'*',
 'ndash':'-', 'oelig':'oe', 'aelig':'ae',
-'agrave':'a', 'aacute':'a', 'acirc':'a', 'atilde':'a', 'auml':'a', 'aring':'a', 
-'egrave':'e', 'eacute':'e', 'ecirc':'e', 'euml':'e', 
+'agrave':'a', 'aacute':'a', 'acirc':'a', 'atilde':'a', 'auml':'a', 'aring':'a',
+'egrave':'e', 'eacute':'e', 'ecirc':'e', 'euml':'e',
 'igrave':'i', 'iacute':'i', 'icirc':'i', 'iuml':'i',
-'ograve':'o', 'oacute':'o', 'ocirc':'o', 'otilde':'o', 'ouml':'o', 
+'ograve':'o', 'oacute':'o', 'ocirc':'o', 'otilde':'o', 'ouml':'o',
 'ugrave':'u', 'uacute':'u', 'ucirc':'u', 'uuml':'u'}

 unifiable_n = {}
@ -60,7 +60,7 @@ def charref(name):
        c = int(name[1:], 16)
    else:
        c = int(name)
-    
+
    if not UNICODE_SNOB and c in unifiable_n.keys():
        return unifiable_n[c]
    else:
@ -76,14 +76,14 @@ def entityref(c):

 def replaceEntities(s):
    s = s.group(1)
-    if s[0] == "#": 
+    if s[0] == "#":
        return charref(s[1:])
    else: return entityref(s)

 r_unescape = re.compile(r"&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));")
 def unescape(s):
    return r_unescape.sub(replaceEntities, s)
-    
+
 def fixattrs(attrs):
    # Fix bug in sgmllib.py
    if not attrs: return attrs
@ -105,7 +105,7 @@ def optwrap(text):
    """Wrap all paragraphs in the provided text."""
    if not BODY_WIDTH:
        return text
-    
+
    assert wrap, "Requires Python 2.3."
    result = ''
    newlines = 0
@ -136,7 +136,7 @@ def hn(tag):
 class _html2text(sgmllib.SGMLParser):
    def __init__(self, out=None, baseurl=''):
        sgmllib.SGMLParser.__init__(self)
-        
+
        if out is None: self.out = self.outtextf
        else: self.out = out
        self.outtext = u''
@ -157,43 +157,43 @@ class _html2text(sgmllib.SGMLParser):
        self.abbr_data = None # last inner HTML (for abbr being defined)
        self.abbr_list = {} # stack of abbreviations to write later
        self.baseurl = baseurl
-    
-    def outtextf(self, s): 
+
+    def outtextf(self, s):
        self.outtext += s
-    
+
    def close(self):
        sgmllib.SGMLParser.close(self)
-        
+
        self.pbr()
        self.o('', 0, 'end')
-        
+
        return self.outtext
-        
+
    def handle_charref(self, c):
        self.o(charref(c))

    def handle_entityref(self, c):
        self.o(entityref(c))
-            
+
    def unknown_starttag(self, tag, attrs):
        self.handle_tag(tag, attrs, 1)
-    
+
    def unknown_endtag(self, tag):
        self.handle_tag(tag, None, 0)
-        
+
    def previousIndex(self, attrs):
        """ returns the index of certain set of attributes (of a link) in the
            self.a list
- 
+
            If the set of attributes is not found, returns None
        """
        if not attrs.has_key('href'): return None
-        
+
        i = -1
        for a in self.a:
            i += 1
            match = 0
-            
+
            if a.has_key('href') and a['href'] == attrs['href']:
                if a.has_key('title') or attrs.has_key('title'):
                        if (a.has_key('title') and attrs.has_key('title') and
@ -206,13 +206,13 @@ class _html2text(sgmllib.SGMLParser):

    def handle_tag(self, tag, attrs, start):
        attrs = fixattrs(attrs)
-    
+
        if hn(tag):
            self.p()
            if start: self.o(hn(tag)*"#" + ' ')

        if tag in ['p', 'div']: self.p()
-        
+
        if tag == "br" and start: self.o("  \n")

        if tag == "hr" and start:
@ -220,21 +220,21 @@ class _html2text(sgmllib.SGMLParser):
            self.o("* * *")
            self.p()

-        if tag in ["head", "style", 'script']: 
+        if tag in ["head", "style", 'script']:
            if start: self.quiet += 1
            else: self.quiet -= 1

        if tag in ["body"]:
            self.quiet = 0 # sites like 9rules.com never close <head>
-        
+
        if tag == "blockquote":
-            if start: 
+            if start:
                self.p(); self.o('> ', 0, 1); self.start = 1
                self.blockquote += 1
            else:
                self.blockquote -= 1
                self.p()
-        
+
        if tag in ['em', 'i', 'u']: self.o("_")
        if tag in ['strong', 'b']: self.o("**")
        if tag == "code" and not self.pre: self.o('`') #TODO: `` `this` ``
@ -243,7 +243,7 @@ class _html2text(sgmllib.SGMLParser):
                attrsD = {}
                for (x, y) in attrs: attrsD[x] = y
                attrs = attrsD
-                
+
                self.abbr_title = None
                self.abbr_data = ''
                if attrs.has_key('title'):
@ -253,13 +253,13 @@ class _html2text(sgmllib.SGMLParser):
                    self.abbr_list[self.abbr_data] = self.abbr_title
                    self.abbr_title = None
                self.abbr_data = ''
-        
+
        if tag == "a":
            if start:
                attrsD = {}
                for (x, y) in attrs: attrsD[x] = y
                attrs = attrsD
-                if attrs.has_key('href') and not (SKIP_INTERNAL_LINKS and attrs['href'].startswith('#')): 
+                if attrs.has_key('href') and not (SKIP_INTERNAL_LINKS and attrs['href'].startswith('#')):
                    self.astack.append(attrs)
                    self.o("[")
                else:
@ -277,7 +277,7 @@ class _html2text(sgmllib.SGMLParser):
                            a['outcount'] = self.outcount
                            self.a.append(a)
                        self.o("][" + `a['count']` + "]")
-        
+
        if tag == "img" and start:
            attrsD = {}
            for (x, y) in attrs: attrsD[x] = y
@ -296,20 +296,20 @@ class _html2text(sgmllib.SGMLParser):
                self.o("![")
                self.o(alt)
                self.o("]["+`attrs['count']`+"]")
-        
+
        if tag == 'dl' and start: self.p()
        if tag == 'dt' and not start: self.pbr()
        if tag == 'dd' and start: self.o('    ')
        if tag == 'dd' and not start: self.pbr()
-        
+
        if tag in ["ol", "ul"]:
            if start:
                self.list.append({'name':tag, 'num':0})
            else:
                if self.list: self.list.pop()
-            
+
            self.p()
-        
+
        if tag == 'li':
            if start:
                self.pbr()
@ -323,10 +323,10 @@ class _html2text(sgmllib.SGMLParser):
                self.start = 1
            else:
                self.pbr()
-        
+
        if tag in ["table", "tr"] and start: self.p()
        if tag == 'td': self.pbr()
-        
+
        if tag == "pre":
            if start:
                self.startpre = 1
@ -334,34 +334,34 @@ class _html2text(sgmllib.SGMLParser):
            else:
                self.pre = 0
            self.p()
-            
+
    def pbr(self):
        if self.p_p == 0: self.p_p = 1

    def p(self): self.p_p = 2
-    
+
    def o(self, data, puredata=0, force=0):
        if self.abbr_data is not None: self.abbr_data += data
-        
-        if not self.quiet: 
+
+        if not self.quiet:
            if puredata and not self.pre:
                data = re.sub('\s+', ' ', data)
                if data and data[0] == ' ':
                    self.space = 1
                    data = data[1:]
            if not data and not force: return
-            
+
            if self.startpre:
                #self.out(" :") #TODO: not output when already one there
                self.startpre = 0
-            
+
            bq = (">" * self.blockquote)
            if not (force and data and data[0] == ">") and self.blockquote: bq += " "
-            
+
            if self.pre:
                bq += "    "
                data = data.replace("\n", "\n"+bq)
-            
+
            if self.start:
                self.space = 0
                self.p_p = 0
@ -377,7 +377,7 @@ class _html2text(sgmllib.SGMLParser):
            if self.p_p:
                self.out(('\n'+bq)*self.p_p)
                self.space = 0
-                
+
            if self.space:
                if not self.lastWasNL: self.out(' ')
                self.space = 0
@ -388,7 +388,7 @@ class _html2text(sgmllib.SGMLParser):
                newa = []
                for link in self.a:
                    if self.outcount > link['outcount']:
-                        self.out("   ["+`link['count']`+"]: " + urlparse.urljoin(self.baseurl, link['href'])) 
+                        self.out("   ["+`link['count']`+"]: " + urlparse.urljoin(self.baseurl, link['href']))
                        if link.has_key('title'): self.out(" ("+link['title']+")")
                        self.out("\n")
                    else:
@ -397,7 +397,7 @@ class _html2text(sgmllib.SGMLParser):
                if self.a != newa: self.out("\n") # Don't need an extra line when nothing was done.

                self.a = newa
-            
+
            if self.abbr_list and force == "end":
                for abbr, definition in self.abbr_list.items():
                    self.out("  *[" + abbr + "]: " + definition + "\n")
@ -410,7 +410,7 @@ class _html2text(sgmllib.SGMLParser):
    def handle_data(self, data):
        if r'\/script>' in data: self.quiet -= 1
        self.o(data, 1)
-    
+
    def unknown_decl(self, data): pass

 def wrapwrite(text): sys.stdout.write(text.encode('utf8'))