diff --git a/resources/content_server/browse/browse.css b/resources/content_server/browse/browse.css index 92ed4c3ce6..1243795e55 100644 --- a/resources/content_server/browse/browse.css +++ b/resources/content_server/browse/browse.css @@ -208,6 +208,8 @@ h2.library_name { } +.toplevel li a { text-decoration: none; } + .toplevel li img { vertical-align: middle; margin-right: 1em; @@ -261,9 +263,16 @@ h2.library_name { } -.category div.category-item span.href { display: none } +.category div.category-item a { text-decoration: none; color: inherit; } -#groups span.load_href { display: none } +#groups a.load_href { + text-decoration: none; + color: inherit; + font-size: medium; + font-weight: normal; + padding: 0; + padding-left: 0.5em; +} #groups h3 { font-weight: bold; diff --git a/resources/content_server/browse/browse.js b/resources/content_server/browse/browse.js index 89ce679871..e0585a9afd 100644 --- a/resources/content_server/browse/browse.js +++ b/resources/content_server/browse/browse.js @@ -116,7 +116,7 @@ function toplevel() { $(".sort_select").hide(); $(".toplevel li").click(function() { - var href = $(this).children("span.url").text(); + var href = $(this).children("a").attr('href'); window.location = href; }); @@ -133,7 +133,7 @@ function render_error(msg) { // Category feed {{{ function category_clicked() { - var href = $(this).find("span.href").html(); + var href = $(this).find("a").attr('href'); window.location = href; } @@ -151,7 +151,7 @@ function category() { change: function(event, ui) { if (ui.newContent) { - var href = ui.newContent.children("span.load_href").html(); + var href = ui.newContent.prev().children("a.load_href").attr('href'); ui.newContent.children(".loading").show(); if (href) { $.ajax({ diff --git a/resources/content_server/monocle.js b/resources/content_server/read/monocle.js similarity index 100% rename from resources/content_server/monocle.js rename to resources/content_server/read/monocle.js diff --git a/resources/recipes/clic_rbs.recipe b/resources/recipes/clic_rbs.recipe new file mode 100644 index 0000000000..559dfa2000 --- /dev/null +++ b/resources/recipes/clic_rbs.recipe @@ -0,0 +1,50 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class ClicRBS(BasicNewsRecipe): + title = u'ClicRBS' + language = 'pt' + __author__ = 'arvoredo' + oldest_article = 3 + max_articles_per_feed = 9 + cover_url = 'http://www.publicidade.clicrbs.com.br/clicrbs/imgs/logo_clic.gif' + + remove_tags = [ + dict(name='div', attrs={'class':['clic-barra-inner', 'botao-versao-mobile ']}) + ] + + remove_tags_before = dict(name='div ', attrs={'class':'descricao'}) + remove_tags_before = dict(name='div', attrs={'id':'glb-corpo'}) + remove_tags_before = dict(name='div', attrs={'class':'descricao'}) + remove_tags_before = dict(name='div', attrs={'class':'coluna'}) + remove_tags_after = dict(name='div', attrs={'class':'extra'}) + remove_tags_after = dict(name='div', attrs={'id':'links-patrocinados'}) + remove_tags_after = dict(name='h4', attrs={'class':'tipo-c comente'}) + remove_tags_after = dict(name='ul', attrs={'class':'lista'}) + + feeds = [ + (u'zerohora.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?uf=1&local=1&channel=13') + , (u'diariocatarinense.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?uf=2&local=18&channel=67') + , (u'Concursos e Emprego', u'http://g1.globo.com/Rss2/0,,AS0-9654,00.xml') + , (u'Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?channel=87&uf=1&local=1') + , (u'Economia, zerohora.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=801&uf=1&local=1&channel=13') + , (u'Esportes, zerohora.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=802&uf=1&local=1&channel=13') + , (u'Economia, Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1180&channel=87&uf=1&local=1') + , (u'Política, Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1185&channel=87&uf=1&local=1') + , (u'Mundo, Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1184&channel=87&uf=1&local=1') + , (u'Catarinense, Esportes, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=&theme=371&uf=2&channel=2') + , (u'Geral, Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1183&channel=87&uf=1&local=1') + , (u'Estilo de Vida, zerohora.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=805&uf=1&local=1&channel=13') + , (u'Corrida, Corrida, Esportes, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1313&theme=15704&uf=1&channel=2') + , (u'Jornal de Santa Catarina, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?espid=159&uf=2&local=18') + , (u'Grêmio, Futebol, Esportes, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=11&theme=65&uf=1&channel=2') + , (u'Velocidade, Esportes, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1314&theme=2655&uf=1&channel=2') + ] + + extra_css = ''' + cite{color:#007BB5; font-size:xx-small; font-style:italic;} + body{font-family:Arial,Helvetica,sans-serif;font-size:x-small;} + h3{font-size:large; color:#082963; font-weight:bold;} + #ident{color:#0179B4; font-size:xx-small;} + p{color:#000000;font-weight:normal;} + .commentario p{color:#007BB5; font-style:italic;} + ''' diff --git a/resources/recipes/cm_journal.recipe b/resources/recipes/cm_journal.recipe new file mode 100644 index 0000000000..c47fb35775 --- /dev/null +++ b/resources/recipes/cm_journal.recipe @@ -0,0 +1,44 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class CMJornal_pt(BasicNewsRecipe): + title = 'Correio da Manha - Portugal' + __author__ = 'jmst' + description = 'As noticias de Portugal e do Mundo' + publisher = 'Cofina Media' + category = '' + oldest_article = 1 + no_stylesheets = True + encoding = 'utf-8' + use_embedded_content = False + language = 'pt' + extra_css = ' .publish{font-style: italic; line-height: 1.2em; border-bottom: 1px dotted; padding: 5px 0} .entity{line-height: 1.2em} .overview{line-height:1.2em} ' + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + } + + keep_only_tags = [ + dict(name=['h2','h1']) + , dict(name='div', attrs={'class': ['news']}) + ] + + remove_tags = [ + dict(name=['object','embed','iframe']) + ,dict(name='a',attrs={'href':['#']}) + ] + + feeds = [ + (u'Actualidade' , u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000009-0000-0000-0000-000000000009' ) + ,(u'Portugal' , u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000010-0000-0000-0000-000000000010' ) + ,(u'Economia' , u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000011-0000-0000-0000-000000000011' ) + ,(u'Mundo' , u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000091-0000-0000-0000-000000000091' ) + ,(u'Desporto' , u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000012-0000-0000-0000-000000000012' ) + ,(u'TV & Media', u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000092-0000-0000-0000-000000000092') + ] + + def print_version(self, url): + return url.replace('noticia.aspx', 'Imprimir.aspx') + diff --git a/resources/recipes/mediapart.recipe b/resources/recipes/mediapart.recipe index ca5f787747..0cf8f21032 100644 --- a/resources/recipes/mediapart.recipe +++ b/resources/recipes/mediapart.recipe @@ -1,53 +1,79 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' -__copyright__ = '2009, Mathieu Godlewski ' +__copyright__ = '2009, Mathieu Godlewski ; 2010, Louis Gesbert ' ''' Mediapart ''' -import re, string -from calibre.ebooks.BeautifulSoup import BeautifulSoup +from calibre.ebooks.BeautifulSoup import Tag from calibre.web.feeds.news import BasicNewsRecipe class Mediapart(BasicNewsRecipe): title = 'Mediapart' - __author__ = 'Mathieu Godlewski ' + __author__ = 'Mathieu Godlewski' description = 'Global news in french from online newspapers' oldest_article = 7 language = 'fr' + needs_subscription = True max_articles_per_feed = 50 no_stylesheets = True - html2lrf_options = ['--base-font-size', '10'] + cover_url = 'http://www.mediapart.fr/sites/all/themes/mediapart/mediapart/images/annonce.jpg' feeds = [ ('Les articles', 'http://www.mediapart.fr/articles/feed'), ] - preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE|re.DOTALL), i[1]) for i in - [ - (r'', lambda match : '

'+match.group(1)+'

'), - (r'

Mediapart\.fr

', lambda match : ''), - (r']*>[\s]*

', lambda match : ''), - (r'

[^>]*

', lambda match : ''), +# -- print-version has poor quality on this website, better do the conversion ourselves +# +# preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE|re.DOTALL), i[1]) for i in +# [ +# (r'', lambda match : '

'+match.group(1)+'

'), +# (r'[^>]+]*>([^<]*)[^<]*', +# lambda match : ''+match.group(1)+''), +# (r'\'', lambda match: '’'), +# ] +# ] +# +# remove_tags = [ dict(name='div', attrs={'class':'print-source_url'}), +# dict(name='div', attrs={'class':'print-links'}), +# dict(name='img', attrs={'src':'entete_article.png'}), +# dict(name='br') ] +# +# def print_version(self, url): +# raw = self.browser.open(url).read() +# soup = BeautifulSoup(raw.decode('utf8', 'replace')) +# div = soup.find('div', {'id':re.compile('node-\d+')}) +# if div is None: +# return None +# article_id = string.replace(div['id'], 'node-', '') +# if article_id is None: +# return None +# return 'http://www.mediapart.fr/print/'+article_id + +# -- Non-print version [dict(name='div', attrs={'class':'advert'})] + + keep_only_tags = [ + dict(name='h1', attrs={'class':'title'}), + dict(name='div', attrs={'class':'page_papier_detail'}), ] - ] - remove_tags = [ dict(name='div', attrs={'class':'print-source_url'}), - dict(name='div', attrs={'class':'print-links'}), - dict(name='img', attrs={'src':'entete_article.png'}), - ] + def preprocess_html(self,soup): + for title in soup.findAll('div', {'class':'titre'}): + tag = Tag(soup, 'h3') + title.replaceWith(tag) + tag.insert(0,title) + return soup +# -- Handle login + + def get_browser(self): + br = BasicNewsRecipe.get_browser() + if self.username is not None and self.password is not None: + br.open('http://www.mediapart.fr/') + br.select_form(nr=1) + br['name'] = self.username + br['pass'] = self.password + br.submit() + return br - def print_version(self, url): - raw = self.browser.open(url).read() - soup = BeautifulSoup(raw.decode('utf8', 'replace')) - div = soup.find('div', {'class':'node node-type-article'}) - if div is None: - return None - article_id = string.replace(div['id'], 'node-', '') - if article_id is None: - return None - return 'http://www.mediapart.fr/print/'+article_id diff --git a/resources/recipes/rue89.recipe b/resources/recipes/rue89.recipe new file mode 100644 index 0000000000..51cf8f6b98 --- /dev/null +++ b/resources/recipes/rue89.recipe @@ -0,0 +1,53 @@ +__license__ = 'GPL v3' +__copyright__ = '2010, Louis Gesbert ' +''' +Rue89 +''' + +__author__ = '2010, Louis Gesbert ' + +import re +from calibre.ebooks.BeautifulSoup import Tag +from calibre.web.feeds.news import BasicNewsRecipe + +class Rue89(BasicNewsRecipe): + title = 'Rue89' + __author__ = 'Louis Gesbert' + description = 'Popular free french news website' + title = u'Rue89' + language = 'fr' + oldest_article = 7 + max_articles_per_feed = 50 + + feeds = [(u'La Une', u'http://www.rue89.com/homepage/feed')] + + no_stylesheets = True + + preprocess_regexps = [ + (re.compile(r'<(/?)h2>', re.IGNORECASE|re.DOTALL), + lambda match : '<'+match.group(1)+'h3>'), + (re.compile(r'', re.IGNORECASE|re.DOTALL), + lambda match : '

'+match.group(1)+'

'), + (re.compile(r']+src="[^"]*/numeros/(\d+)[^0-9.">]*.gif"[^>]*/>', re.IGNORECASE|re.DOTALL), + lambda match : ''+match.group(1)+''), + (re.compile(r'\''), lambda match: '’'), + ] + + def preprocess_html(self,soup): + body = Tag(soup, 'body') + title = soup.find('h1', {'class':'title'}) + content = soup.find('div', {'class':'content'}) + soup.body.replaceWith(body) + body.insert(0, title) + body.insert(1, content) + return soup + + remove_tags = [ #dict(name='div', attrs={'class':'print-source_url'}), + #dict(name='div', attrs={'class':'print-links'}), + #dict(name='img', attrs={'class':'print-logo'}), + dict(name='div', attrs={'class':'content_top'}), + dict(name='div', attrs={'id':'sidebar-left'}), ] + +# -- print-version has poor quality on this website, better do the conversion ourselves +# def print_version(self, url): +# return re.sub('^.*-([0-9]+)$', 'http://www.rue89.com/print/\\1',url) diff --git a/setup/installer/windows/notes.rst b/setup/installer/windows/notes.rst index a8ba41e8ff..545070f7ff 100644 --- a/setup/installer/windows/notes.rst +++ b/setup/installer/windows/notes.rst @@ -28,7 +28,9 @@ If there are no windows binaries already compiled for the version of python you Run the following command to install python dependencies:: - easy_install --always-unzip -U ipython mechanize BeautifulSoup pyreadline python-dateutil dnspython + easy_install --always-unzip -U ipython mechanize pyreadline python-dateutil dnspython + +Install BeautifulSoup 3.0.x manually into site-packages (3.1.x parses broken HTML very poorly) Qt -------- diff --git a/setup/server.py b/setup/server.py index 2103f4805a..66cb6adf7b 100644 --- a/setup/server.py +++ b/setup/server.py @@ -5,10 +5,38 @@ __license__ = 'GPL v3' __copyright__ = '2010, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import subprocess, tempfile, os, time +import subprocess, tempfile, os, time, sys +from threading import RLock from setup import Command +try: + from pyinotify import WatchManager, ThreadedNotifier, EventsCodes, ProcessEvent +except: + wm = None +else: + wm = WatchManager() + flags = EventsCodes.ALL_FLAGS + mask = flags['IN_MODIFY'] + + class ProcessEvents(ProcessEvent): + + def __init__(self, command): + ProcessEvent.__init__(self) + self.command = command + + def process_default(self, event): + name = getattr(event, + 'name', None) + if name and os.path.splitext(name)[1] == '.py': + print + print name, 'changed' + self.command.kill_server() + self.command.launch_server() + print self.command.prompt, + sys.stdout.flush() + + class Server(Command): description = 'Run the calibre server in development mode conveniently' @@ -18,31 +46,62 @@ class Server(Command): def rebuild_monocole(self): subprocess.check_call(['sprocketize', '-C', self.MONOCLE_PATH, '-I', 'src', 'src/monocle.js'], - stdout=open('resources/content_server/monocle.js', 'wb')) + stdout=open('resources/content_server/read/monocle.js', 'wb')) - def launch_server(self, log): - self.rebuild_monocole() - p = subprocess.Popen(['calibre-server', '--develop'], - stderr=subprocess.STDOUT, stdout=log) - return p + def launch_server(self): + print 'Starting server...\n' + with self.lock: + self.rebuild_monocole() + self.server_proc = p = subprocess.Popen(['calibre-server', '--develop'], + stderr=subprocess.STDOUT, stdout=self.server_log) + time.sleep(0.2) + if p.poll() is not None: + print 'Starting server failed' + raise SystemExit(1) + return p + + def kill_server(self): + print 'Killing server...\n' + if self.server_proc is not None: + with self.lock: + if self.server_proc.poll() is None: + self.server_proc.terminate() + while self.server_proc.poll() is None: + time.sleep(0.1) + + def watch(self): + if wm is not None: + self.notifier = ThreadedNotifier(wm, ProcessEvents(self)) + self.notifier.start() + self.wdd = wm.add_watch(os.path.abspath('src'), mask, rec=True) def run(self, opts): + self.lock = RLock() tdir = tempfile.gettempdir() logf = os.path.join(tdir, 'calibre-server.log') - log = open(logf, 'ab') + self.server_log = open(logf, 'ab') + self.prompt = 'Press Enter to kill/restart server. Ctrl+C to quit: ' print 'Server log available at:', logf + print + self.watch() + first = True while True: - print 'Starting server...' - p = self.launch_server(log) + self.launch_server() + if first: + pass + first = False + try: - raw_input('Press Enter to kill/restart server. Ctrl+C to quit: ') + raw_input(self.prompt) except: + print + self.kill_server() break else: - while p.returncode is None: - p.terminate() - time.sleep(0.1) - p.kill() + self.kill_server() print + if hasattr(self, 'notifier'): + self.notifier.stop() + diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index 2945cc6604..3cc84f248d 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -475,7 +475,7 @@ from calibre.devices.teclast.driver import TECLAST_K3, NEWSMY, IPAPYRUS, \ SOVOS, PICO from calibre.devices.sne.driver import SNE from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL, KOGAN, \ - GEMEI, VELOCITYMICRO, PDNOVEL_KOBO + GEMEI, VELOCITYMICRO, PDNOVEL_KOBO, Q600 from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG from calibre.devices.kobo.driver import KOBO @@ -586,6 +586,7 @@ plugins += [ AVANT, MENTOR, SWEEX, + Q600, KOGAN, PDNOVEL, SPECTRA, diff --git a/src/calibre/devices/apple/driver.py b/src/calibre/devices/apple/driver.py index 9ad3cf3e08..74fa868255 100644 --- a/src/calibre/devices/apple/driver.py +++ b/src/calibre/devices/apple/driver.py @@ -19,7 +19,7 @@ from calibre.ebooks.metadata.book.base import Metadata from calibre.ebooks.metadata.epub import set_metadata from calibre.library.server.utils import strftime from calibre.utils.config import config_dir, prefs -from calibre.utils.date import isoformat, now, parse_date +from calibre.utils.date import now, parse_date from calibre.utils.logging import Log from calibre.utils.zipfile import ZipFile @@ -2521,11 +2521,11 @@ class ITUNES(DriverBase): metadata.timestamp = datetime.datetime(old_ts.year, old_ts.month, old_ts.day, old_ts.hour, old_ts.minute, old_ts.second, old_ts.microsecond+1, old_ts.tzinfo) else: - metadata.timestamp = isoformat(now()) + metadata.timestamp = now() if DEBUG: self.log.info(" add timestamp: %s" % metadata.timestamp) else: - metadata.timestamp = isoformat(now()) + metadata.timestamp = now() if DEBUG: self.log.warning(" missing block in OPF file") self.log.info(" add timestamp: %s" % metadata.timestamp) diff --git a/src/calibre/devices/misc.py b/src/calibre/devices/misc.py index 92e26d47e4..af5a77ce03 100644 --- a/src/calibre/devices/misc.py +++ b/src/calibre/devices/misc.py @@ -72,6 +72,15 @@ class SWEEX(USBMS): EBOOK_DIR_MAIN = '' SUPPORTS_SUB_DIRS = True +class Q600(SWEEX): + + name = 'Digma Q600 Device interface' + gui_name = 'Q600' + description = _('Communicate with the Digma Q600') + + BCD = [0x325] + FORMATS = ['epub', 'fb2', 'mobi', 'prc', 'html', 'rtf', 'chm', 'pdf', 'txt'] + class KOGAN(SWEEX): name = 'Kogan Device Interface' diff --git a/src/calibre/ebooks/metadata/amazon.py b/src/calibre/ebooks/metadata/amazon.py index a8ff0f1ad0..9c89016e8b 100644 --- a/src/calibre/ebooks/metadata/amazon.py +++ b/src/calibre/ebooks/metadata/amazon.py @@ -9,11 +9,13 @@ Fetch metadata using Amazon AWS import sys, re from lxml import html +from lxml.html import soupparser from calibre import browser from calibre.ebooks.metadata import check_isbn from calibre.ebooks.metadata.book.base import Metadata from calibre.ebooks.chardet import xml_to_unicode +from calibre.library.comments import sanitize_comments_html def find_asin(br, isbn): q = 'http://www.amazon.com/s?field-keywords='+isbn @@ -70,7 +72,7 @@ def get_metadata(br, asin, mi): return False raw = xml_to_unicode(raw, strip_encoding_pats=True, resolve_entities=True)[0] - root = html.fromstring(raw) + root = soupparser.fromstring(raw) ratings = root.xpath('//form[@id="handleBuy"]/descendant::*[@class="asinReviewsSummary"]') if ratings: pat = re.compile(r'([0-9.]+) out of (\d+) stars') @@ -95,25 +97,26 @@ def get_metadata(br, asin, mi): # remove all attributes from tags desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc) # Collapse whitespace - desc = re.sub('\n+', '\n', desc) - desc = re.sub(' +', ' ', desc) + #desc = re.sub('\n+', '\n', desc) + #desc = re.sub(' +', ' ', desc) # Remove the notice about text referring to out of print editions desc = re.sub(r'(?s)--This text ref.*?', '', desc) # Remove comments desc = re.sub(r'(?s)', '', desc) - mi.comments = desc + mi.comments = sanitize_comments_html(desc) return True def main(args=sys.argv): # Test xisbn - print get_social_metadata('Learning Python', None, None, '8324616489') - print + #print get_social_metadata('Learning Python', None, None, '8324616489') + #print # Test sophisticated comment formatting - print get_social_metadata('Swan Thieves', None, None, '9780316065795') + print get_social_metadata('Angels & Demons', None, None, '9781416580829') print + return # Random tests print get_social_metadata('Star Trek: Destiny: Mere Mortals', None, None, '9781416551720') diff --git a/src/calibre/gui2/device.py b/src/calibre/gui2/device.py index 78585d13b6..4e93335af6 100644 --- a/src/calibre/gui2/device.py +++ b/src/calibre/gui2/device.py @@ -484,17 +484,22 @@ class DeviceMenu(QMenu): # {{{ _('Storage Card B')), ] + later_menus = [] for menu in (self, self.set_default_menu): for actions, desc in ( (basic_actions, ''), + (specific_actions, _('Send specific format to')), (delete_actions, _('Send and delete from library')), - (specific_actions, _('Send specific format to')) ): mdest = menu if actions is not basic_actions: - mdest = menu.addMenu(desc) + mdest = QMenu(desc) self._memory.append(mdest) + later_menus.append(mdest) + if menu is self.set_default_menu: + menu.addMenu(mdest) + menu.addSeparator() for dest, delete, specific, icon, text in actions: action = DeviceAction(dest, delete, specific, icon, text, self) @@ -507,7 +512,7 @@ class DeviceMenu(QMenu): # {{{ action.a_s.connect(self.action_triggered) self.actions.append(action) mdest.addAction(action) - if actions is not specific_actions: + if actions is basic_actions: menu.addSeparator() da = config['default_send_to_device_action'] @@ -525,14 +530,21 @@ class DeviceMenu(QMenu): # {{{ self.group.triggered.connect(self.change_default_action) self.addSeparator() + self.addMenu(later_menus[0]) + self.addSeparator() + mitem = self.addAction(QIcon(I('eject.png')), _('Eject device')) mitem.setEnabled(False) mitem.triggered.connect(lambda x : self.disconnect_mounted_device.emit()) self.disconnect_mounted_device_action = mitem - self.addSeparator() + self.addMenu(self.set_default_menu) self.addSeparator() + + self.addMenu(later_menus[1]) + self.addSeparator() + annot = self.addAction(_('Fetch annotations (experimental)')) annot.setEnabled(False) annot.triggered.connect(lambda x : diff --git a/src/calibre/library/comments.py b/src/calibre/library/comments.py index 670d9f2564..45d6ccaa45 100644 --- a/src/calibre/library/comments.py +++ b/src/calibre/library/comments.py @@ -11,11 +11,15 @@ from calibre.constants import preferred_encoding from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString, \ CData, Comment, Declaration, ProcessingInstruction from calibre import prepare_string_for_xml +from calibre.utils.html2text import html2text +from calibre.ebooks.markdown import markdown # Hackish - ignoring sentences ending or beginning in numbers to avoid # confusion with decimal points. lost_cr_pat = re.compile('([a-z])([\.\?!])([A-Z])') lost_cr_exception_pat = re.compile(r'(Ph\.D)|(D\.Phil)|((Dr|Mr|Mrs|Ms)\.[A-Z])') +sanitize_pat = re.compile(r'' - '
{0}
{1}
' - '
{2}' - '{5}{3}
') + '
' + '{0}
' + '
{1}
' + '
{2}
') rating, rstring = render_rating(i.avg_rating, prefix) name = xml(i.name) if datatype == 'rating': @@ -142,7 +143,7 @@ def get_category_items(category, items, restriction, datatype, prefix): # {{{ q = category href = '/browse/matches/%s/%s'%(quote(q), quote(id_)) return templ.format(xml(name), rating, - xml(desc), xml(href), rstring, prefix) + xml(desc), xml(href, True), rstring, prefix) items = list(map(item, items)) return '\n'.join(['
'] + items + ['
']) @@ -252,8 +253,6 @@ class BrowseServer(object): lp = self.db.library_path if isbytestring(lp): lp = force_unicode(lp, filesystem_encoding) - if isinstance(ans, unicode): - ans = ans.encode('utf-8') ans = ans.replace('{library_name}', xml(os.path.basename(lp))) ans = ans.replace('{library_path}', xml(lp, True)) ans = ans.replace('{initial_search}', initial_search) @@ -335,9 +334,10 @@ class BrowseServer(object): icon = 'blank.png' cats.append((meta['name'], category, icon)) - cats = [('
  • {0}' + cats = [('
  •  ' + '{0}' '{0}' - '{3}/browse/category/{1}
  • ') + '') .format(xml(x, True), xml(quote(y)), xml(_('Browse books by')), self.opts.url_prefix, src='/browse/icon/'+z) for x, y, z in cats] @@ -393,14 +393,15 @@ class BrowseServer(object): for x in sorted(starts): category_groups[x] = len([y for y in items if getter(y).upper().startswith(x)]) - items = [(u'

    {0} [{2}]

    ' + items = [(u'

    {0} [{2}]

    ' u'' u'
    {1}{1}
    ' - u'{4}{3}
    ').format( + u'
    ').format( xml(s, True), xml(_('Loading, please wait'))+'…', unicode(c), - xml(u'/browse/category_group/%s/%s'%(category, s)), + xml(u'/browse/category_group/%s/%s'%(category, s), True), self.opts.url_prefix) for s, c in category_groups.items()] items = '\n\n'.join(items) @@ -563,7 +564,8 @@ class BrowseServer(object): if not val: val = '' args[key] = xml(val, True) - fname = ascii_filename(args['title']) + ' - ' + ascii_filename(args['authors']) + fname = quote(ascii_filename(args['title']) + ' - ' + + ascii_filename(args['authors'])) return args, fmt, fmts, fname @Endpoint(mimetype='application/json; charset=utf-8') diff --git a/src/calibre/library/server/content.py b/src/calibre/library/server/content.py index 670c31b9df..6437f02cb6 100644 --- a/src/calibre/library/server/content.py +++ b/src/calibre/library/server/content.py @@ -70,10 +70,10 @@ class ContentServer(object): id = id.rpartition('_')[-1].partition('.')[0] match = re.search(r'\d+', id) if not match: - raise cherrypy.HTTPError(400, 'id:%s not an integer'%id) + raise cherrypy.HTTPError(404, 'id:%s not an integer'%id) id = int(match.group()) if not self.db.has_id(id): - raise cherrypy.HTTPError(400, 'id:%d does not exist in database'%id) + raise cherrypy.HTTPError(404, 'id:%d does not exist in database'%id) if what == 'thumb' or what.startswith('thumb_'): try: width, height = map(int, what.split('_')[1:]) diff --git a/src/calibre/library/server/main.py b/src/calibre/library/server/main.py index fbd811a1ab..b7cb3ecf12 100644 --- a/src/calibre/library/server/main.py +++ b/src/calibre/library/server/main.py @@ -58,6 +58,9 @@ The OPDS interface is advertised via BonJour automatically. help=_('Specifies a restriction to be used for this invocation. ' 'This option overrides any per-library settings specified' ' in the GUI')) + parser.add_option('--auto-reload', default=False, action='store_true', + help=_('Auto reload server when source code changes. May not' + ' work in all environments.')) return parser diff --git a/src/calibre/library/server/mobile.py b/src/calibre/library/server/mobile.py index a889089109..d66e6d842f 100644 --- a/src/calibre/library/server/mobile.py +++ b/src/calibre/library/server/mobile.py @@ -7,6 +7,7 @@ __docformat__ = 'restructuredtext en' import re, os import __builtin__ +from urllib import quote import cherrypy from lxml import html @@ -115,8 +116,8 @@ def build_index(books, num, search, sort, order, start, total, url_base, CKEYS, data = TD() for fmt in book['formats'].split(','): - a = ascii_filename(book['authors']) - t = ascii_filename(book['title']) + a = quote(ascii_filename(book['authors'])) + t = quote(ascii_filename(book['title'])) s = SPAN( A( fmt.lower(), diff --git a/src/calibre/utils/html2text.py b/src/calibre/utils/html2text.py index b271def4bb..afe5a0aded 100644 --- a/src/calibre/utils/html2text.py +++ b/src/calibre/utils/html2text.py @@ -17,18 +17,18 @@ sgmllib.charref = re.compile('&#([xX]?[0-9a-fA-F]+)[^0-9a-fA-F]') try: from textwrap import wrap except: pass -# Use Unicode characters instead of their ascii pseudo-replacements +# Use Unicode characters instead of their ascii psuedo-replacements UNICODE_SNOB = 0 # Put the links after each paragraph instead of at the end. LINKS_EACH_PARAGRAPH = 0 # Wrap long lines at position. 0 for no wrapping. (Requires Python 2.3.) -BODY_WIDTH = 0 +BODY_WIDTH = 78 # Don't show internal links (href="#local-anchor") -- corresponding link targets # won't be visible in the plain text file anyway. -SKIP_INTERNAL_LINKS = True +SKIP_INTERNAL_LINKS = False ### Entity Nonsense ### @@ -41,13 +41,13 @@ def name2cp(k): if k.startswith("&#") and k.endswith(";"): return int(k[2:-1]) # not in latin-1 return ord(codecs.latin_1_decode(k)[0]) -unifiable = {'rsquo':"'", 'lsquo':"'", 'rdquo':'"', 'ldquo':'"', +unifiable = {'rsquo':"'", 'lsquo':"'", 'rdquo':'"', 'ldquo':'"', 'copy':'(C)', 'mdash':'--', 'nbsp':' ', 'rarr':'->', 'larr':'<-', 'middot':'*', 'ndash':'-', 'oelig':'oe', 'aelig':'ae', -'agrave':'a', 'aacute':'a', 'acirc':'a', 'atilde':'a', 'auml':'a', 'aring':'a', -'egrave':'e', 'eacute':'e', 'ecirc':'e', 'euml':'e', +'agrave':'a', 'aacute':'a', 'acirc':'a', 'atilde':'a', 'auml':'a', 'aring':'a', +'egrave':'e', 'eacute':'e', 'ecirc':'e', 'euml':'e', 'igrave':'i', 'iacute':'i', 'icirc':'i', 'iuml':'i', -'ograve':'o', 'oacute':'o', 'ocirc':'o', 'otilde':'o', 'ouml':'o', +'ograve':'o', 'oacute':'o', 'ocirc':'o', 'otilde':'o', 'ouml':'o', 'ugrave':'u', 'uacute':'u', 'ucirc':'u', 'uuml':'u'} unifiable_n = {} @@ -60,7 +60,7 @@ def charref(name): c = int(name[1:], 16) else: c = int(name) - + if not UNICODE_SNOB and c in unifiable_n.keys(): return unifiable_n[c] else: @@ -76,14 +76,14 @@ def entityref(c): def replaceEntities(s): s = s.group(1) - if s[0] == "#": + if s[0] == "#": return charref(s[1:]) else: return entityref(s) r_unescape = re.compile(r"&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));") def unescape(s): return r_unescape.sub(replaceEntities, s) - + def fixattrs(attrs): # Fix bug in sgmllib.py if not attrs: return attrs @@ -105,7 +105,7 @@ def optwrap(text): """Wrap all paragraphs in the provided text.""" if not BODY_WIDTH: return text - + assert wrap, "Requires Python 2.3." result = '' newlines = 0 @@ -136,7 +136,7 @@ def hn(tag): class _html2text(sgmllib.SGMLParser): def __init__(self, out=None, baseurl=''): sgmllib.SGMLParser.__init__(self) - + if out is None: self.out = self.outtextf else: self.out = out self.outtext = u'' @@ -157,43 +157,43 @@ class _html2text(sgmllib.SGMLParser): self.abbr_data = None # last inner HTML (for abbr being defined) self.abbr_list = {} # stack of abbreviations to write later self.baseurl = baseurl - - def outtextf(self, s): + + def outtextf(self, s): self.outtext += s - + def close(self): sgmllib.SGMLParser.close(self) - + self.pbr() self.o('', 0, 'end') - + return self.outtext - + def handle_charref(self, c): self.o(charref(c)) def handle_entityref(self, c): self.o(entityref(c)) - + def unknown_starttag(self, tag, attrs): self.handle_tag(tag, attrs, 1) - + def unknown_endtag(self, tag): self.handle_tag(tag, None, 0) - + def previousIndex(self, attrs): """ returns the index of certain set of attributes (of a link) in the self.a list - + If the set of attributes is not found, returns None """ if not attrs.has_key('href'): return None - + i = -1 for a in self.a: i += 1 match = 0 - + if a.has_key('href') and a['href'] == attrs['href']: if a.has_key('title') or attrs.has_key('title'): if (a.has_key('title') and attrs.has_key('title') and @@ -206,13 +206,13 @@ class _html2text(sgmllib.SGMLParser): def handle_tag(self, tag, attrs, start): attrs = fixattrs(attrs) - + if hn(tag): self.p() if start: self.o(hn(tag)*"#" + ' ') if tag in ['p', 'div']: self.p() - + if tag == "br" and start: self.o(" \n") if tag == "hr" and start: @@ -220,21 +220,21 @@ class _html2text(sgmllib.SGMLParser): self.o("* * *") self.p() - if tag in ["head", "style", 'script']: + if tag in ["head", "style", 'script']: if start: self.quiet += 1 else: self.quiet -= 1 if tag in ["body"]: self.quiet = 0 # sites like 9rules.com never close - + if tag == "blockquote": - if start: + if start: self.p(); self.o('> ', 0, 1); self.start = 1 self.blockquote += 1 else: self.blockquote -= 1 self.p() - + if tag in ['em', 'i', 'u']: self.o("_") if tag in ['strong', 'b']: self.o("**") if tag == "code" and not self.pre: self.o('`') #TODO: `` `this` `` @@ -243,7 +243,7 @@ class _html2text(sgmllib.SGMLParser): attrsD = {} for (x, y) in attrs: attrsD[x] = y attrs = attrsD - + self.abbr_title = None self.abbr_data = '' if attrs.has_key('title'): @@ -253,13 +253,13 @@ class _html2text(sgmllib.SGMLParser): self.abbr_list[self.abbr_data] = self.abbr_title self.abbr_title = None self.abbr_data = '' - + if tag == "a": if start: attrsD = {} for (x, y) in attrs: attrsD[x] = y attrs = attrsD - if attrs.has_key('href') and not (SKIP_INTERNAL_LINKS and attrs['href'].startswith('#')): + if attrs.has_key('href') and not (SKIP_INTERNAL_LINKS and attrs['href'].startswith('#')): self.astack.append(attrs) self.o("[") else: @@ -277,7 +277,7 @@ class _html2text(sgmllib.SGMLParser): a['outcount'] = self.outcount self.a.append(a) self.o("][" + `a['count']` + "]") - + if tag == "img" and start: attrsD = {} for (x, y) in attrs: attrsD[x] = y @@ -296,20 +296,20 @@ class _html2text(sgmllib.SGMLParser): self.o("![") self.o(alt) self.o("]["+`attrs['count']`+"]") - + if tag == 'dl' and start: self.p() if tag == 'dt' and not start: self.pbr() if tag == 'dd' and start: self.o(' ') if tag == 'dd' and not start: self.pbr() - + if tag in ["ol", "ul"]: if start: self.list.append({'name':tag, 'num':0}) else: if self.list: self.list.pop() - + self.p() - + if tag == 'li': if start: self.pbr() @@ -323,10 +323,10 @@ class _html2text(sgmllib.SGMLParser): self.start = 1 else: self.pbr() - + if tag in ["table", "tr"] and start: self.p() if tag == 'td': self.pbr() - + if tag == "pre": if start: self.startpre = 1 @@ -334,34 +334,34 @@ class _html2text(sgmllib.SGMLParser): else: self.pre = 0 self.p() - + def pbr(self): if self.p_p == 0: self.p_p = 1 def p(self): self.p_p = 2 - + def o(self, data, puredata=0, force=0): if self.abbr_data is not None: self.abbr_data += data - - if not self.quiet: + + if not self.quiet: if puredata and not self.pre: data = re.sub('\s+', ' ', data) if data and data[0] == ' ': self.space = 1 data = data[1:] if not data and not force: return - + if self.startpre: #self.out(" :") #TODO: not output when already one there self.startpre = 0 - + bq = (">" * self.blockquote) if not (force and data and data[0] == ">") and self.blockquote: bq += " " - + if self.pre: bq += " " data = data.replace("\n", "\n"+bq) - + if self.start: self.space = 0 self.p_p = 0 @@ -377,7 +377,7 @@ class _html2text(sgmllib.SGMLParser): if self.p_p: self.out(('\n'+bq)*self.p_p) self.space = 0 - + if self.space: if not self.lastWasNL: self.out(' ') self.space = 0 @@ -388,7 +388,7 @@ class _html2text(sgmllib.SGMLParser): newa = [] for link in self.a: if self.outcount > link['outcount']: - self.out(" ["+`link['count']`+"]: " + urlparse.urljoin(self.baseurl, link['href'])) + self.out(" ["+`link['count']`+"]: " + urlparse.urljoin(self.baseurl, link['href'])) if link.has_key('title'): self.out(" ("+link['title']+")") self.out("\n") else: @@ -397,7 +397,7 @@ class _html2text(sgmllib.SGMLParser): if self.a != newa: self.out("\n") # Don't need an extra line when nothing was done. self.a = newa - + if self.abbr_list and force == "end": for abbr, definition in self.abbr_list.items(): self.out(" *[" + abbr + "]: " + definition + "\n") @@ -410,7 +410,7 @@ class _html2text(sgmllib.SGMLParser): def handle_data(self, data): if r'\/script>' in data: self.quiet -= 1 self.o(data, 1) - + def unknown_decl(self, data): pass def wrapwrite(text): sys.stdout.write(text.encode('utf8'))