mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
KG updates
This commit is contained in:
commit
7116a0a744
@ -208,6 +208,8 @@ h2.library_name {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.toplevel li a { text-decoration: none; }
|
||||||
|
|
||||||
.toplevel li img {
|
.toplevel li img {
|
||||||
vertical-align: middle;
|
vertical-align: middle;
|
||||||
margin-right: 1em;
|
margin-right: 1em;
|
||||||
@ -261,9 +263,16 @@ h2.library_name {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
.category div.category-item span.href { display: none }
|
.category div.category-item a { text-decoration: none; color: inherit; }
|
||||||
|
|
||||||
#groups span.load_href { display: none }
|
#groups a.load_href {
|
||||||
|
text-decoration: none;
|
||||||
|
color: inherit;
|
||||||
|
font-size: medium;
|
||||||
|
font-weight: normal;
|
||||||
|
padding: 0;
|
||||||
|
padding-left: 0.5em;
|
||||||
|
}
|
||||||
|
|
||||||
#groups h3 {
|
#groups h3 {
|
||||||
font-weight: bold;
|
font-weight: bold;
|
||||||
|
@ -116,7 +116,7 @@ function toplevel() {
|
|||||||
$(".sort_select").hide();
|
$(".sort_select").hide();
|
||||||
|
|
||||||
$(".toplevel li").click(function() {
|
$(".toplevel li").click(function() {
|
||||||
var href = $(this).children("span.url").text();
|
var href = $(this).children("a").attr('href');
|
||||||
window.location = href;
|
window.location = href;
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -133,7 +133,7 @@ function render_error(msg) {
|
|||||||
// Category feed {{{
|
// Category feed {{{
|
||||||
|
|
||||||
function category_clicked() {
|
function category_clicked() {
|
||||||
var href = $(this).find("span.href").html();
|
var href = $(this).find("a").attr('href');
|
||||||
window.location = href;
|
window.location = href;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -151,7 +151,7 @@ function category() {
|
|||||||
|
|
||||||
change: function(event, ui) {
|
change: function(event, ui) {
|
||||||
if (ui.newContent) {
|
if (ui.newContent) {
|
||||||
var href = ui.newContent.children("span.load_href").html();
|
var href = ui.newContent.prev().children("a.load_href").attr('href');
|
||||||
ui.newContent.children(".loading").show();
|
ui.newContent.children(".loading").show();
|
||||||
if (href) {
|
if (href) {
|
||||||
$.ajax({
|
$.ajax({
|
||||||
|
50
resources/recipes/clic_rbs.recipe
Normal file
50
resources/recipes/clic_rbs.recipe
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class ClicRBS(BasicNewsRecipe):
|
||||||
|
title = u'ClicRBS'
|
||||||
|
language = 'pt'
|
||||||
|
__author__ = 'arvoredo'
|
||||||
|
oldest_article = 3
|
||||||
|
max_articles_per_feed = 9
|
||||||
|
cover_url = 'http://www.publicidade.clicrbs.com.br/clicrbs/imgs/logo_clic.gif'
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div', attrs={'class':['clic-barra-inner', 'botao-versao-mobile ']})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags_before = dict(name='div ', attrs={'class':'descricao'})
|
||||||
|
remove_tags_before = dict(name='div', attrs={'id':'glb-corpo'})
|
||||||
|
remove_tags_before = dict(name='div', attrs={'class':'descricao'})
|
||||||
|
remove_tags_before = dict(name='div', attrs={'class':'coluna'})
|
||||||
|
remove_tags_after = dict(name='div', attrs={'class':'extra'})
|
||||||
|
remove_tags_after = dict(name='div', attrs={'id':'links-patrocinados'})
|
||||||
|
remove_tags_after = dict(name='h4', attrs={'class':'tipo-c comente'})
|
||||||
|
remove_tags_after = dict(name='ul', attrs={'class':'lista'})
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'zerohora.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?uf=1&local=1&channel=13')
|
||||||
|
, (u'diariocatarinense.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?uf=2&local=18&channel=67')
|
||||||
|
, (u'Concursos e Emprego', u'http://g1.globo.com/Rss2/0,,AS0-9654,00.xml')
|
||||||
|
, (u'Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?channel=87&uf=1&local=1')
|
||||||
|
, (u'Economia, zerohora.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=801&uf=1&local=1&channel=13')
|
||||||
|
, (u'Esportes, zerohora.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=802&uf=1&local=1&channel=13')
|
||||||
|
, (u'Economia, Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1180&channel=87&uf=1&local=1')
|
||||||
|
, (u'Política, Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1185&channel=87&uf=1&local=1')
|
||||||
|
, (u'Mundo, Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1184&channel=87&uf=1&local=1')
|
||||||
|
, (u'Catarinense, Esportes, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=&theme=371&uf=2&channel=2')
|
||||||
|
, (u'Geral, Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1183&channel=87&uf=1&local=1')
|
||||||
|
, (u'Estilo de Vida, zerohora.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=805&uf=1&local=1&channel=13')
|
||||||
|
, (u'Corrida, Corrida, Esportes, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1313&theme=15704&uf=1&channel=2')
|
||||||
|
, (u'Jornal de Santa Catarina, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?espid=159&uf=2&local=18')
|
||||||
|
, (u'Grêmio, Futebol, Esportes, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=11&theme=65&uf=1&channel=2')
|
||||||
|
, (u'Velocidade, Esportes, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1314&theme=2655&uf=1&channel=2')
|
||||||
|
]
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
cite{color:#007BB5; font-size:xx-small; font-style:italic;}
|
||||||
|
body{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
|
||||||
|
h3{font-size:large; color:#082963; font-weight:bold;}
|
||||||
|
#ident{color:#0179B4; font-size:xx-small;}
|
||||||
|
p{color:#000000;font-weight:normal;}
|
||||||
|
.commentario p{color:#007BB5; font-style:italic;}
|
||||||
|
'''
|
44
resources/recipes/cm_journal.recipe
Normal file
44
resources/recipes/cm_journal.recipe
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class CMJornal_pt(BasicNewsRecipe):
|
||||||
|
title = 'Correio da Manha - Portugal'
|
||||||
|
__author__ = 'jmst'
|
||||||
|
description = 'As noticias de Portugal e do Mundo'
|
||||||
|
publisher = 'Cofina Media'
|
||||||
|
category = ''
|
||||||
|
oldest_article = 1
|
||||||
|
no_stylesheets = True
|
||||||
|
encoding = 'utf-8'
|
||||||
|
use_embedded_content = False
|
||||||
|
language = 'pt'
|
||||||
|
extra_css = ' .publish{font-style: italic; line-height: 1.2em; border-bottom: 1px dotted; padding: 5px 0} .entity{line-height: 1.2em} .overview{line-height:1.2em} '
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name=['h2','h1'])
|
||||||
|
, dict(name='div', attrs={'class': ['news']})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name=['object','embed','iframe'])
|
||||||
|
,dict(name='a',attrs={'href':['#']})
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Actualidade' , u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000009-0000-0000-0000-000000000009' )
|
||||||
|
,(u'Portugal' , u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000010-0000-0000-0000-000000000010' )
|
||||||
|
,(u'Economia' , u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000011-0000-0000-0000-000000000011' )
|
||||||
|
,(u'Mundo' , u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000091-0000-0000-0000-000000000091' )
|
||||||
|
,(u'Desporto' , u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000012-0000-0000-0000-000000000012' )
|
||||||
|
,(u'TV & Media', u'http://www.cmjornal.xl.pt/rss/rss.aspx?channelID=00000092-0000-0000-0000-000000000092')
|
||||||
|
]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return url.replace('noticia.aspx', 'Imprimir.aspx')
|
||||||
|
|
77
resources/recipes/el_faro.recipe
Normal file
77
resources/recipes/el_faro.recipe
Normal file
@ -0,0 +1,77 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class ElFaroDeVigo(BasicNewsRecipe):
|
||||||
|
title = u'El Faro de Vigo'
|
||||||
|
oldest_article = 1
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
__author__ = 'Jefferson Frantz'
|
||||||
|
description = 'Noticias de Vigo'
|
||||||
|
timefmt = ' [%d %b, %Y]'
|
||||||
|
language = 'es'
|
||||||
|
encoding = 'cp1252'
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
## (u'Vigo', u'http://www.farodevigo.es/elementosInt/rss/1'),
|
||||||
|
## (u'Gran Vigo', u'http://www.farodevigo.es/elementosInt/rss/2'),
|
||||||
|
(u'Galicia', u'http://www.farodevigo.es/elementosInt/rss/4'),
|
||||||
|
(u'España', u'http://www.farodevigo.es/elementosInt/rss/6'),
|
||||||
|
(u'Mundo', u'http://www.farodevigo.es/elementosInt/rss/7'),
|
||||||
|
## (u'Opinión', u'http://www.farodevigo.es/elementosInt/rss/5'),
|
||||||
|
(u'Economía', u'http://www.farodevigo.es/elementosInt/rss/10'),
|
||||||
|
(u'Sociedad y Cultura', u'http://www.farodevigo.es/elementosInt/rss/8'),
|
||||||
|
(u'Sucesos', u'http://www.farodevigo.es/elementosInt/rss/9'),
|
||||||
|
(u'Deportes', u'http://www.farodevigo.es/elementosInt/rss/11'),
|
||||||
|
(u'Agenda', u'http://www.farodevigo.es/elementosInt/rss/21'),
|
||||||
|
(u'Gente', u'http://www.farodevigo.es/elementosInt/rss/24'),
|
||||||
|
(u'Televisión', u'http://www.farodevigo.es/elementosInt/rss/25'),
|
||||||
|
(u'Ciencia y Tecnología', u'http://www.farodevigo.es/elementosInt/rss/26')]
|
||||||
|
|
||||||
|
extra_css = '''.noticia_texto{ font-family: sans-serif; font-size: medium; text-align: justify }
|
||||||
|
h1{font-family: serif; font-size: x-large; font-weight: bold; color: #000000; text-align: center}
|
||||||
|
h2{font-family: serif; font-size: medium; font-weight: bold; color: #000000; text-align: left}
|
||||||
|
.enlacenegrita10{font-family: serif; font-size: small; font-weight: bold; color: #000000; text-align: left}
|
||||||
|
.noticia_titular{font-family: serif; font-size: x-large; font-weight: bold; color: #000000; text-align: center}'''
|
||||||
|
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
|
||||||
|
url = 'http://estaticos00.farodevigo.es//elementosWeb/mediaweb/images/compartir/barrapunto.gif'
|
||||||
|
fitem = soup.find('img',src=url)
|
||||||
|
if fitem:
|
||||||
|
par = fitem.parent
|
||||||
|
par.extract()
|
||||||
|
url = 'http://estaticos01.farodevigo.es//elementosWeb/mediaweb/images/compartir/barrapunto.gif'
|
||||||
|
fitem = soup.find('img',src=url)
|
||||||
|
if fitem:
|
||||||
|
par = fitem.parent
|
||||||
|
par.extract()
|
||||||
|
url = 'http://estaticos02.farodevigo.es//elementosWeb/mediaweb/images/compartir/barrapunto.gif'
|
||||||
|
fitem = soup.find('img',src=url)
|
||||||
|
if fitem:
|
||||||
|
par = fitem.parent
|
||||||
|
par.extract()
|
||||||
|
|
||||||
|
return self.adeify_images(soup)
|
||||||
|
|
||||||
|
def postprocess_html(self, soup, first_fetch):
|
||||||
|
divs = soup.findAll(True, {'class':'enlacenegrita10'})
|
||||||
|
for div in divs:
|
||||||
|
div['align'] = 'left'
|
||||||
|
|
||||||
|
return soup
|
||||||
|
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'class':['noticias']})]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name=['object','link','script','ul','iframe','ol'])
|
||||||
|
,dict(name='div', attrs={'class':['noticiadd2', 'cintillo2', 'noticiadd', 'noticiadd2']})
|
||||||
|
,dict(name='div', attrs={'class':['imagen_derecha', 'noticiadd3', 'extraHTML']})
|
||||||
|
|
||||||
|
]
|
||||||
|
|
||||||
|
|
@ -1,7 +1,9 @@
|
|||||||
cense__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2010, Eddie Lau'
|
__copyright__ = '2010, Eddie Lau'
|
||||||
'''
|
'''
|
||||||
modified from Singtao Toronto calibre recipe by rty
|
modified from Singtao Toronto calibre recipe by rty
|
||||||
|
Change Log:
|
||||||
|
2010/10/31: skip repeated articles in section pages
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
@ -23,14 +25,13 @@ class AdvancedUserRecipe1278063072(BasicNewsRecipe):
|
|||||||
recursions = 0
|
recursions = 0
|
||||||
conversion_options = {'linearize_tables':True}
|
conversion_options = {'linearize_tables':True}
|
||||||
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
|
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
|
||||||
|
|
||||||
keep_only_tags = [dict(name='h1'),
|
keep_only_tags = [dict(name='h1'),
|
||||||
dict(attrs={'id':['newscontent01','newscontent02']})]
|
dict(attrs={'id':['newscontent01','newscontent02']})]
|
||||||
|
|
||||||
def get_fetchdate(self):
|
def get_fetchdate(self):
|
||||||
dt_utc = datetime.datetime.utcnow()
|
dt_utc = datetime.datetime.utcnow()
|
||||||
# convert UTC to local hk time
|
# convert UTC to local hk time - at around HKT 5.30am, all news are available
|
||||||
dt_local = dt_utc - datetime.timedelta(-8.0/24)
|
dt_local = dt_utc - datetime.timedelta(-2.5/24)
|
||||||
return dt_local.strftime("%Y%m%d")
|
return dt_local.strftime("%Y%m%d")
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
@ -47,18 +48,14 @@ class AdvancedUserRecipe1278063072(BasicNewsRecipe):
|
|||||||
soup = self.index_to_soup(url)
|
soup = self.index_to_soup(url)
|
||||||
divs = soup.findAll(attrs={'class': ['bullet']})
|
divs = soup.findAll(attrs={'class': ['bullet']})
|
||||||
current_articles = []
|
current_articles = []
|
||||||
|
included_urls = []
|
||||||
for i in divs:
|
for i in divs:
|
||||||
a = i.find('a', href = True)
|
a = i.find('a', href = True)
|
||||||
title = self.tag_to_string(a)
|
title = self.tag_to_string(a)
|
||||||
url = a.get('href', False)
|
url = a.get('href', False)
|
||||||
url = 'http://news.mingpao.com/' + dateStr + '/' +url
|
url = 'http://news.mingpao.com/' + dateStr + '/' +url
|
||||||
|
if url not in included_urls:
|
||||||
current_articles.append({'title': title, 'url': url, 'description':''})
|
current_articles.append({'title': title, 'url': url, 'description':''})
|
||||||
|
included_urls.append(url)
|
||||||
return current_articles
|
return current_articles
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
for item in soup.findAll(style=True):
|
|
||||||
del item['style']
|
|
||||||
for item in soup.findAll(width=True):
|
|
||||||
del item['width']
|
|
||||||
return soup
|
|
||||||
|
|
||||||
|
@ -28,7 +28,9 @@ If there are no windows binaries already compiled for the version of python you
|
|||||||
|
|
||||||
Run the following command to install python dependencies::
|
Run the following command to install python dependencies::
|
||||||
|
|
||||||
easy_install --always-unzip -U ipython mechanize BeautifulSoup pyreadline python-dateutil dnspython
|
easy_install --always-unzip -U ipython mechanize pyreadline python-dateutil dnspython
|
||||||
|
|
||||||
|
Install BeautifulSoup 3.0.x manually into site-packages (3.1.x parses broken HTML very poorly)
|
||||||
|
|
||||||
Qt
|
Qt
|
||||||
--------
|
--------
|
||||||
|
106
setup/server.py
106
setup/server.py
@ -5,10 +5,46 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import subprocess, tempfile, os, time
|
import subprocess, tempfile, os, time, sys, telnetlib
|
||||||
|
from threading import RLock
|
||||||
|
|
||||||
from setup import Command
|
from setup import Command
|
||||||
|
|
||||||
|
try:
|
||||||
|
from pyinotify import WatchManager, ThreadedNotifier, EventsCodes, ProcessEvent
|
||||||
|
except:
|
||||||
|
wm = None
|
||||||
|
else:
|
||||||
|
wm = WatchManager()
|
||||||
|
flags = EventsCodes.ALL_FLAGS
|
||||||
|
mask = flags['IN_MODIFY']
|
||||||
|
|
||||||
|
class ProcessEvents(ProcessEvent):
|
||||||
|
|
||||||
|
def __init__(self, command):
|
||||||
|
ProcessEvent.__init__(self)
|
||||||
|
self.command = command
|
||||||
|
|
||||||
|
def process_default(self, event):
|
||||||
|
name = getattr(event,
|
||||||
|
'name', None)
|
||||||
|
if not name:
|
||||||
|
return
|
||||||
|
ext = os.path.splitext(name)[1]
|
||||||
|
reload = False
|
||||||
|
if ext == '.py':
|
||||||
|
reload = True
|
||||||
|
print
|
||||||
|
print name, 'changed'
|
||||||
|
self.command.kill_server()
|
||||||
|
self.command.launch_server()
|
||||||
|
print self.command.prompt,
|
||||||
|
sys.stdout.flush()
|
||||||
|
|
||||||
|
if reload:
|
||||||
|
self.command.reload_browser(delay=1)
|
||||||
|
|
||||||
|
|
||||||
class Server(Command):
|
class Server(Command):
|
||||||
|
|
||||||
description = 'Run the calibre server in development mode conveniently'
|
description = 'Run the calibre server in development mode conveniently'
|
||||||
@ -18,31 +54,75 @@ class Server(Command):
|
|||||||
def rebuild_monocole(self):
|
def rebuild_monocole(self):
|
||||||
subprocess.check_call(['sprocketize', '-C', self.MONOCLE_PATH,
|
subprocess.check_call(['sprocketize', '-C', self.MONOCLE_PATH,
|
||||||
'-I', 'src', 'src/monocle.js'],
|
'-I', 'src', 'src/monocle.js'],
|
||||||
stdout=open('resources/content_server/monocle.js', 'wb'))
|
stdout=open('resources/content_server/read/monocle.js', 'wb'))
|
||||||
|
|
||||||
def launch_server(self, log):
|
def launch_server(self):
|
||||||
|
print 'Starting server...\n'
|
||||||
|
with self.lock:
|
||||||
self.rebuild_monocole()
|
self.rebuild_monocole()
|
||||||
p = subprocess.Popen(['calibre-server', '--develop'],
|
self.server_proc = p = subprocess.Popen(['calibre-server', '--develop'],
|
||||||
stderr=subprocess.STDOUT, stdout=log)
|
stderr=subprocess.STDOUT, stdout=self.server_log)
|
||||||
|
time.sleep(0.2)
|
||||||
|
if p.poll() is not None:
|
||||||
|
print 'Starting server failed'
|
||||||
|
raise SystemExit(1)
|
||||||
return p
|
return p
|
||||||
|
|
||||||
|
def kill_server(self):
|
||||||
|
print 'Killing server...\n'
|
||||||
|
if self.server_proc is not None:
|
||||||
|
with self.lock:
|
||||||
|
if self.server_proc.poll() is None:
|
||||||
|
self.server_proc.terminate()
|
||||||
|
while self.server_proc.poll() is None:
|
||||||
|
time.sleep(0.1)
|
||||||
|
|
||||||
|
def watch(self):
|
||||||
|
if wm is not None:
|
||||||
|
self.notifier = ThreadedNotifier(wm, ProcessEvents(self))
|
||||||
|
self.notifier.start()
|
||||||
|
self.wdd = wm.add_watch(os.path.abspath('src'), mask, rec=True)
|
||||||
|
|
||||||
|
def reload_browser(self, delay=0.1):
|
||||||
|
time.sleep(delay)
|
||||||
|
try:
|
||||||
|
t = telnetlib.Telnet('localhost', 4242)
|
||||||
|
t.read_until("repl>")
|
||||||
|
t.write('BrowserReload();')
|
||||||
|
print t.read_until("repl>")
|
||||||
|
t.close()
|
||||||
|
except:
|
||||||
|
print 'Failed to reload browser'
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
def run(self, opts):
|
def run(self, opts):
|
||||||
|
self.lock = RLock()
|
||||||
tdir = tempfile.gettempdir()
|
tdir = tempfile.gettempdir()
|
||||||
logf = os.path.join(tdir, 'calibre-server.log')
|
logf = os.path.join(tdir, 'calibre-server.log')
|
||||||
log = open(logf, 'ab')
|
self.server_log = open(logf, 'ab')
|
||||||
|
self.prompt = 'Press Enter to kill/restart server. Ctrl+C to quit: '
|
||||||
print 'Server log available at:', logf
|
print 'Server log available at:', logf
|
||||||
|
print
|
||||||
|
self.watch()
|
||||||
|
|
||||||
|
first = True
|
||||||
while True:
|
while True:
|
||||||
print 'Starting server...'
|
self.launch_server()
|
||||||
p = self.launch_server(log)
|
if not first:
|
||||||
|
self.reload_browser()
|
||||||
|
first = False
|
||||||
|
|
||||||
try:
|
try:
|
||||||
raw_input('Press Enter to kill/restart server. Ctrl+C to quit: ')
|
raw_input(self.prompt)
|
||||||
except:
|
except:
|
||||||
|
print
|
||||||
|
self.kill_server()
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
while p.returncode is None:
|
self.kill_server()
|
||||||
p.terminate()
|
|
||||||
time.sleep(0.1)
|
|
||||||
p.kill()
|
|
||||||
print
|
print
|
||||||
|
|
||||||
|
if hasattr(self, 'notifier'):
|
||||||
|
self.notifier.stop()
|
||||||
|
|
||||||
|
@ -475,7 +475,7 @@ from calibre.devices.teclast.driver import TECLAST_K3, NEWSMY, IPAPYRUS, \
|
|||||||
SOVOS, PICO
|
SOVOS, PICO
|
||||||
from calibre.devices.sne.driver import SNE
|
from calibre.devices.sne.driver import SNE
|
||||||
from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL, KOGAN, \
|
from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL, KOGAN, \
|
||||||
GEMEI, VELOCITYMICRO, PDNOVEL_KOBO
|
GEMEI, VELOCITYMICRO, PDNOVEL_KOBO, Q600
|
||||||
from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
|
from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
|
||||||
from calibre.devices.kobo.driver import KOBO
|
from calibre.devices.kobo.driver import KOBO
|
||||||
|
|
||||||
@ -586,6 +586,7 @@ plugins += [
|
|||||||
AVANT,
|
AVANT,
|
||||||
MENTOR,
|
MENTOR,
|
||||||
SWEEX,
|
SWEEX,
|
||||||
|
Q600,
|
||||||
KOGAN,
|
KOGAN,
|
||||||
PDNOVEL,
|
PDNOVEL,
|
||||||
SPECTRA,
|
SPECTRA,
|
||||||
|
@ -19,7 +19,7 @@ from calibre.ebooks.metadata.book.base import Metadata
|
|||||||
from calibre.ebooks.metadata.epub import set_metadata
|
from calibre.ebooks.metadata.epub import set_metadata
|
||||||
from calibre.library.server.utils import strftime
|
from calibre.library.server.utils import strftime
|
||||||
from calibre.utils.config import config_dir, prefs
|
from calibre.utils.config import config_dir, prefs
|
||||||
from calibre.utils.date import isoformat, now, parse_date
|
from calibre.utils.date import now, parse_date
|
||||||
from calibre.utils.logging import Log
|
from calibre.utils.logging import Log
|
||||||
from calibre.utils.zipfile import ZipFile
|
from calibre.utils.zipfile import ZipFile
|
||||||
|
|
||||||
@ -2521,11 +2521,11 @@ class ITUNES(DriverBase):
|
|||||||
metadata.timestamp = datetime.datetime(old_ts.year, old_ts.month, old_ts.day, old_ts.hour,
|
metadata.timestamp = datetime.datetime(old_ts.year, old_ts.month, old_ts.day, old_ts.hour,
|
||||||
old_ts.minute, old_ts.second, old_ts.microsecond+1, old_ts.tzinfo)
|
old_ts.minute, old_ts.second, old_ts.microsecond+1, old_ts.tzinfo)
|
||||||
else:
|
else:
|
||||||
metadata.timestamp = isoformat(now())
|
metadata.timestamp = now()
|
||||||
if DEBUG:
|
if DEBUG:
|
||||||
self.log.info(" add timestamp: %s" % metadata.timestamp)
|
self.log.info(" add timestamp: %s" % metadata.timestamp)
|
||||||
else:
|
else:
|
||||||
metadata.timestamp = isoformat(now())
|
metadata.timestamp = now()
|
||||||
if DEBUG:
|
if DEBUG:
|
||||||
self.log.warning(" missing <metadata> block in OPF file")
|
self.log.warning(" missing <metadata> block in OPF file")
|
||||||
self.log.info(" add timestamp: %s" % metadata.timestamp)
|
self.log.info(" add timestamp: %s" % metadata.timestamp)
|
||||||
|
@ -72,6 +72,15 @@ class SWEEX(USBMS):
|
|||||||
EBOOK_DIR_MAIN = ''
|
EBOOK_DIR_MAIN = ''
|
||||||
SUPPORTS_SUB_DIRS = True
|
SUPPORTS_SUB_DIRS = True
|
||||||
|
|
||||||
|
class Q600(SWEEX):
|
||||||
|
|
||||||
|
name = 'Digma Q600 Device interface'
|
||||||
|
gui_name = 'Q600'
|
||||||
|
description = _('Communicate with the Digma Q600')
|
||||||
|
|
||||||
|
BCD = [0x325]
|
||||||
|
FORMATS = ['epub', 'fb2', 'mobi', 'prc', 'html', 'rtf', 'chm', 'pdf', 'txt']
|
||||||
|
|
||||||
class KOGAN(SWEEX):
|
class KOGAN(SWEEX):
|
||||||
|
|
||||||
name = 'Kogan Device Interface'
|
name = 'Kogan Device Interface'
|
||||||
|
@ -9,11 +9,13 @@ Fetch metadata using Amazon AWS
|
|||||||
import sys, re
|
import sys, re
|
||||||
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
|
from lxml.html import soupparser
|
||||||
|
|
||||||
from calibre import browser
|
from calibre import browser
|
||||||
from calibre.ebooks.metadata import check_isbn
|
from calibre.ebooks.metadata import check_isbn
|
||||||
from calibre.ebooks.metadata.book.base import Metadata
|
from calibre.ebooks.metadata.book.base import Metadata
|
||||||
from calibre.ebooks.chardet import xml_to_unicode
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
|
from calibre.library.comments import sanitize_comments_html
|
||||||
|
|
||||||
def find_asin(br, isbn):
|
def find_asin(br, isbn):
|
||||||
q = 'http://www.amazon.com/s?field-keywords='+isbn
|
q = 'http://www.amazon.com/s?field-keywords='+isbn
|
||||||
@ -70,7 +72,10 @@ def get_metadata(br, asin, mi):
|
|||||||
return False
|
return False
|
||||||
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
||||||
resolve_entities=True)[0]
|
resolve_entities=True)[0]
|
||||||
root = html.fromstring(raw)
|
try:
|
||||||
|
root = soupparser.fromstring(raw)
|
||||||
|
except:
|
||||||
|
return False
|
||||||
ratings = root.xpath('//form[@id="handleBuy"]/descendant::*[@class="asinReviewsSummary"]')
|
ratings = root.xpath('//form[@id="handleBuy"]/descendant::*[@class="asinReviewsSummary"]')
|
||||||
if ratings:
|
if ratings:
|
||||||
pat = re.compile(r'([0-9.]+) out of (\d+) stars')
|
pat = re.compile(r'([0-9.]+) out of (\d+) stars')
|
||||||
@ -95,25 +100,26 @@ def get_metadata(br, asin, mi):
|
|||||||
# remove all attributes from tags
|
# remove all attributes from tags
|
||||||
desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc)
|
desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc)
|
||||||
# Collapse whitespace
|
# Collapse whitespace
|
||||||
desc = re.sub('\n+', '\n', desc)
|
#desc = re.sub('\n+', '\n', desc)
|
||||||
desc = re.sub(' +', ' ', desc)
|
#desc = re.sub(' +', ' ', desc)
|
||||||
# Remove the notice about text referring to out of print editions
|
# Remove the notice about text referring to out of print editions
|
||||||
desc = re.sub(r'(?s)<em>--This text ref.*?</em>', '', desc)
|
desc = re.sub(r'(?s)<em>--This text ref.*?</em>', '', desc)
|
||||||
# Remove comments
|
# Remove comments
|
||||||
desc = re.sub(r'(?s)<!--.*?-->', '', desc)
|
desc = re.sub(r'(?s)<!--.*?-->', '', desc)
|
||||||
mi.comments = desc
|
mi.comments = sanitize_comments_html(desc)
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def main(args=sys.argv):
|
def main(args=sys.argv):
|
||||||
# Test xisbn
|
# Test xisbn
|
||||||
print get_social_metadata('Learning Python', None, None, '8324616489')
|
#print get_social_metadata('Learning Python', None, None, '8324616489')
|
||||||
print
|
#print
|
||||||
|
|
||||||
# Test sophisticated comment formatting
|
# Test sophisticated comment formatting
|
||||||
print get_social_metadata('Swan Thieves', None, None, '9780316065795')
|
print get_social_metadata('Angels & Demons', None, None, '9781416580829')
|
||||||
print
|
print
|
||||||
|
return
|
||||||
|
|
||||||
# Random tests
|
# Random tests
|
||||||
print get_social_metadata('Star Trek: Destiny: Mere Mortals', None, None, '9781416551720')
|
print get_social_metadata('Star Trek: Destiny: Mere Mortals', None, None, '9781416551720')
|
||||||
|
@ -484,17 +484,22 @@ class DeviceMenu(QMenu): # {{{
|
|||||||
_('Storage Card B')),
|
_('Storage Card B')),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
later_menus = []
|
||||||
|
|
||||||
for menu in (self, self.set_default_menu):
|
for menu in (self, self.set_default_menu):
|
||||||
for actions, desc in (
|
for actions, desc in (
|
||||||
(basic_actions, ''),
|
(basic_actions, ''),
|
||||||
|
(specific_actions, _('Send specific format to')),
|
||||||
(delete_actions, _('Send and delete from library')),
|
(delete_actions, _('Send and delete from library')),
|
||||||
(specific_actions, _('Send specific format to'))
|
|
||||||
):
|
):
|
||||||
mdest = menu
|
mdest = menu
|
||||||
if actions is not basic_actions:
|
if actions is not basic_actions:
|
||||||
mdest = menu.addMenu(desc)
|
mdest = QMenu(desc)
|
||||||
self._memory.append(mdest)
|
self._memory.append(mdest)
|
||||||
|
later_menus.append(mdest)
|
||||||
|
if menu is self.set_default_menu:
|
||||||
|
menu.addMenu(mdest)
|
||||||
|
menu.addSeparator()
|
||||||
|
|
||||||
for dest, delete, specific, icon, text in actions:
|
for dest, delete, specific, icon, text in actions:
|
||||||
action = DeviceAction(dest, delete, specific, icon, text, self)
|
action = DeviceAction(dest, delete, specific, icon, text, self)
|
||||||
@ -507,7 +512,7 @@ class DeviceMenu(QMenu): # {{{
|
|||||||
action.a_s.connect(self.action_triggered)
|
action.a_s.connect(self.action_triggered)
|
||||||
self.actions.append(action)
|
self.actions.append(action)
|
||||||
mdest.addAction(action)
|
mdest.addAction(action)
|
||||||
if actions is not specific_actions:
|
if actions is basic_actions:
|
||||||
menu.addSeparator()
|
menu.addSeparator()
|
||||||
|
|
||||||
da = config['default_send_to_device_action']
|
da = config['default_send_to_device_action']
|
||||||
@ -525,14 +530,21 @@ class DeviceMenu(QMenu): # {{{
|
|||||||
self.group.triggered.connect(self.change_default_action)
|
self.group.triggered.connect(self.change_default_action)
|
||||||
self.addSeparator()
|
self.addSeparator()
|
||||||
|
|
||||||
|
self.addMenu(later_menus[0])
|
||||||
|
self.addSeparator()
|
||||||
|
|
||||||
mitem = self.addAction(QIcon(I('eject.png')), _('Eject device'))
|
mitem = self.addAction(QIcon(I('eject.png')), _('Eject device'))
|
||||||
mitem.setEnabled(False)
|
mitem.setEnabled(False)
|
||||||
mitem.triggered.connect(lambda x : self.disconnect_mounted_device.emit())
|
mitem.triggered.connect(lambda x : self.disconnect_mounted_device.emit())
|
||||||
self.disconnect_mounted_device_action = mitem
|
self.disconnect_mounted_device_action = mitem
|
||||||
|
|
||||||
self.addSeparator()
|
self.addSeparator()
|
||||||
|
|
||||||
self.addMenu(self.set_default_menu)
|
self.addMenu(self.set_default_menu)
|
||||||
self.addSeparator()
|
self.addSeparator()
|
||||||
|
|
||||||
|
self.addMenu(later_menus[1])
|
||||||
|
self.addSeparator()
|
||||||
|
|
||||||
annot = self.addAction(_('Fetch annotations (experimental)'))
|
annot = self.addAction(_('Fetch annotations (experimental)'))
|
||||||
annot.setEnabled(False)
|
annot.setEnabled(False)
|
||||||
annot.triggered.connect(lambda x :
|
annot.triggered.connect(lambda x :
|
||||||
|
@ -11,11 +11,15 @@ from calibre.constants import preferred_encoding
|
|||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString, \
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString, \
|
||||||
CData, Comment, Declaration, ProcessingInstruction
|
CData, Comment, Declaration, ProcessingInstruction
|
||||||
from calibre import prepare_string_for_xml
|
from calibre import prepare_string_for_xml
|
||||||
|
from calibre.utils.html2text import html2text
|
||||||
|
from calibre.ebooks.markdown import markdown
|
||||||
|
|
||||||
# Hackish - ignoring sentences ending or beginning in numbers to avoid
|
# Hackish - ignoring sentences ending or beginning in numbers to avoid
|
||||||
# confusion with decimal points.
|
# confusion with decimal points.
|
||||||
lost_cr_pat = re.compile('([a-z])([\.\?!])([A-Z])')
|
lost_cr_pat = re.compile('([a-z])([\.\?!])([A-Z])')
|
||||||
lost_cr_exception_pat = re.compile(r'(Ph\.D)|(D\.Phil)|((Dr|Mr|Mrs|Ms)\.[A-Z])')
|
lost_cr_exception_pat = re.compile(r'(Ph\.D)|(D\.Phil)|((Dr|Mr|Mrs|Ms)\.[A-Z])')
|
||||||
|
sanitize_pat = re.compile(r'<script|<table|<tr|<td|<th|<style|<iframe',
|
||||||
|
re.IGNORECASE)
|
||||||
|
|
||||||
def comments_to_html(comments):
|
def comments_to_html(comments):
|
||||||
'''
|
'''
|
||||||
@ -53,6 +57,15 @@ def comments_to_html(comments):
|
|||||||
for x in comments.split('\n\n')]
|
for x in comments.split('\n\n')]
|
||||||
return '\n'.join(parts)
|
return '\n'.join(parts)
|
||||||
|
|
||||||
|
if sanitize_pat.search(comments) is not None:
|
||||||
|
try:
|
||||||
|
return sanitize_comments_html(comments)
|
||||||
|
except:
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
return u'<p></p>'
|
||||||
|
|
||||||
|
|
||||||
# Explode lost CRs to \n\n
|
# Explode lost CRs to \n\n
|
||||||
comments = lost_cr_exception_pat.sub(lambda m: m.group().replace('.',
|
comments = lost_cr_exception_pat.sub(lambda m: m.group().replace('.',
|
||||||
'.\r'), comments)
|
'.\r'), comments)
|
||||||
@ -115,6 +128,11 @@ def comments_to_html(comments):
|
|||||||
|
|
||||||
return result.renderContents(encoding=None)
|
return result.renderContents(encoding=None)
|
||||||
|
|
||||||
|
def sanitize_comments_html(html):
|
||||||
|
text = html2text(html)
|
||||||
|
md = markdown.Markdown(safe_mode=True)
|
||||||
|
return md.convert(text)
|
||||||
|
|
||||||
def test():
|
def test():
|
||||||
for pat, val in [
|
for pat, val in [
|
||||||
('lineone\n\nlinetwo',
|
('lineone\n\nlinetwo',
|
||||||
|
@ -119,7 +119,8 @@ class LibraryServer(ContentServer, MobileServer, XMLServer, OPDSServer, Cache,
|
|||||||
|
|
||||||
cherrypy.config.update({
|
cherrypy.config.update({
|
||||||
'log.screen' : opts.develop,
|
'log.screen' : opts.develop,
|
||||||
'engine.autoreload_on' : opts.develop,
|
'engine.autoreload_on' : getattr(opts,
|
||||||
|
'auto_reload', False),
|
||||||
'tools.log_headers.on' : opts.develop,
|
'tools.log_headers.on' : opts.develop,
|
||||||
'checker.on' : opts.develop,
|
'checker.on' : opts.develop,
|
||||||
'request.show_tracebacks': show_tracebacks,
|
'request.show_tracebacks': show_tracebacks,
|
||||||
|
@ -123,9 +123,10 @@ def get_category_items(category, items, restriction, datatype, prefix): # {{{
|
|||||||
|
|
||||||
def item(i):
|
def item(i):
|
||||||
templ = (u'<div title="{4}" class="category-item">'
|
templ = (u'<div title="{4}" class="category-item">'
|
||||||
'<div class="category-name">{0}</div><div>{1}</div>'
|
'<div class="category-name">'
|
||||||
'<div>{2}'
|
'<a href="{5}{3}" title="{4}">{0}</a></div>'
|
||||||
'<span class="href">{5}{3}</span></div></div>')
|
'<div>{1}</div>'
|
||||||
|
'<div>{2}</div></div>')
|
||||||
rating, rstring = render_rating(i.avg_rating, prefix)
|
rating, rstring = render_rating(i.avg_rating, prefix)
|
||||||
name = xml(i.name)
|
name = xml(i.name)
|
||||||
if datatype == 'rating':
|
if datatype == 'rating':
|
||||||
@ -142,7 +143,7 @@ def get_category_items(category, items, restriction, datatype, prefix): # {{{
|
|||||||
q = category
|
q = category
|
||||||
href = '/browse/matches/%s/%s'%(quote(q), quote(id_))
|
href = '/browse/matches/%s/%s'%(quote(q), quote(id_))
|
||||||
return templ.format(xml(name), rating,
|
return templ.format(xml(name), rating,
|
||||||
xml(desc), xml(href), rstring, prefix)
|
xml(desc), xml(href, True), rstring, prefix)
|
||||||
|
|
||||||
items = list(map(item, items))
|
items = list(map(item, items))
|
||||||
return '\n'.join(['<div class="category-container">'] + items + ['</div>'])
|
return '\n'.join(['<div class="category-container">'] + items + ['</div>'])
|
||||||
@ -252,8 +253,6 @@ class BrowseServer(object):
|
|||||||
lp = self.db.library_path
|
lp = self.db.library_path
|
||||||
if isbytestring(lp):
|
if isbytestring(lp):
|
||||||
lp = force_unicode(lp, filesystem_encoding)
|
lp = force_unicode(lp, filesystem_encoding)
|
||||||
if isinstance(ans, unicode):
|
|
||||||
ans = ans.encode('utf-8')
|
|
||||||
ans = ans.replace('{library_name}', xml(os.path.basename(lp)))
|
ans = ans.replace('{library_name}', xml(os.path.basename(lp)))
|
||||||
ans = ans.replace('{library_path}', xml(lp, True))
|
ans = ans.replace('{library_path}', xml(lp, True))
|
||||||
ans = ans.replace('{initial_search}', initial_search)
|
ans = ans.replace('{initial_search}', initial_search)
|
||||||
@ -335,9 +334,10 @@ class BrowseServer(object):
|
|||||||
icon = 'blank.png'
|
icon = 'blank.png'
|
||||||
cats.append((meta['name'], category, icon))
|
cats.append((meta['name'], category, icon))
|
||||||
|
|
||||||
cats = [('<li title="{2} {0}"><img src="{3}{src}" alt="{0}" />'
|
cats = [('<li><a title="{2} {0}" href="/browse/category/{1}"> </a>'
|
||||||
|
'<img src="{3}{src}" alt="{0}" />'
|
||||||
'<span class="label">{0}</span>'
|
'<span class="label">{0}</span>'
|
||||||
'<span class="url">{3}/browse/category/{1}</span></li>')
|
'</li>')
|
||||||
.format(xml(x, True), xml(quote(y)), xml(_('Browse books by')),
|
.format(xml(x, True), xml(quote(y)), xml(_('Browse books by')),
|
||||||
self.opts.url_prefix, src='/browse/icon/'+z)
|
self.opts.url_prefix, src='/browse/icon/'+z)
|
||||||
for x, y, z in cats]
|
for x, y, z in cats]
|
||||||
@ -393,14 +393,15 @@ class BrowseServer(object):
|
|||||||
for x in sorted(starts):
|
for x in sorted(starts):
|
||||||
category_groups[x] = len([y for y in items if
|
category_groups[x] = len([y for y in items if
|
||||||
getter(y).upper().startswith(x)])
|
getter(y).upper().startswith(x)])
|
||||||
items = [(u'<h3 title="{0}">{0} <span>[{2}]</span></h3><div>'
|
items = [(u'<h3 title="{0}"><a class="load_href" title="{0}"'
|
||||||
|
u' href="{4}{3}"><strong>{0}</strong> [{2}]</a></h3><div>'
|
||||||
u'<div class="loaded" style="display:none"></div>'
|
u'<div class="loaded" style="display:none"></div>'
|
||||||
u'<div class="loading"><img alt="{1}" src="{4}/static/loading.gif" /><em>{1}</em></div>'
|
u'<div class="loading"><img alt="{1}" src="{4}/static/loading.gif" /><em>{1}</em></div>'
|
||||||
u'<span class="load_href">{4}{3}</span></div>').format(
|
u'</div>').format(
|
||||||
xml(s, True),
|
xml(s, True),
|
||||||
xml(_('Loading, please wait'))+'…',
|
xml(_('Loading, please wait'))+'…',
|
||||||
unicode(c),
|
unicode(c),
|
||||||
xml(u'/browse/category_group/%s/%s'%(category, s)),
|
xml(u'/browse/category_group/%s/%s'%(category, s), True),
|
||||||
self.opts.url_prefix)
|
self.opts.url_prefix)
|
||||||
for s, c in category_groups.items()]
|
for s, c in category_groups.items()]
|
||||||
items = '\n\n'.join(items)
|
items = '\n\n'.join(items)
|
||||||
@ -563,7 +564,8 @@ class BrowseServer(object):
|
|||||||
if not val:
|
if not val:
|
||||||
val = ''
|
val = ''
|
||||||
args[key] = xml(val, True)
|
args[key] = xml(val, True)
|
||||||
fname = ascii_filename(args['title']) + ' - ' + ascii_filename(args['authors'])
|
fname = quote(ascii_filename(args['title']) + ' - ' +
|
||||||
|
ascii_filename(args['authors']))
|
||||||
return args, fmt, fmts, fname
|
return args, fmt, fmts, fname
|
||||||
|
|
||||||
@Endpoint(mimetype='application/json; charset=utf-8')
|
@Endpoint(mimetype='application/json; charset=utf-8')
|
||||||
|
@ -70,10 +70,10 @@ class ContentServer(object):
|
|||||||
id = id.rpartition('_')[-1].partition('.')[0]
|
id = id.rpartition('_')[-1].partition('.')[0]
|
||||||
match = re.search(r'\d+', id)
|
match = re.search(r'\d+', id)
|
||||||
if not match:
|
if not match:
|
||||||
raise cherrypy.HTTPError(400, 'id:%s not an integer'%id)
|
raise cherrypy.HTTPError(404, 'id:%s not an integer'%id)
|
||||||
id = int(match.group())
|
id = int(match.group())
|
||||||
if not self.db.has_id(id):
|
if not self.db.has_id(id):
|
||||||
raise cherrypy.HTTPError(400, 'id:%d does not exist in database'%id)
|
raise cherrypy.HTTPError(404, 'id:%d does not exist in database'%id)
|
||||||
if what == 'thumb' or what.startswith('thumb_'):
|
if what == 'thumb' or what.startswith('thumb_'):
|
||||||
try:
|
try:
|
||||||
width, height = map(int, what.split('_')[1:])
|
width, height = map(int, what.split('_')[1:])
|
||||||
|
@ -58,6 +58,9 @@ The OPDS interface is advertised via BonJour automatically.
|
|||||||
help=_('Specifies a restriction to be used for this invocation. '
|
help=_('Specifies a restriction to be used for this invocation. '
|
||||||
'This option overrides any per-library settings specified'
|
'This option overrides any per-library settings specified'
|
||||||
' in the GUI'))
|
' in the GUI'))
|
||||||
|
parser.add_option('--auto-reload', default=False, action='store_true',
|
||||||
|
help=_('Auto reload server when source code changes. May not'
|
||||||
|
' work in all environments.'))
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
|
|
||||||
|
@ -7,6 +7,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
|
|
||||||
import re, os
|
import re, os
|
||||||
import __builtin__
|
import __builtin__
|
||||||
|
from urllib import quote
|
||||||
|
|
||||||
import cherrypy
|
import cherrypy
|
||||||
from lxml import html
|
from lxml import html
|
||||||
@ -115,8 +116,8 @@ def build_index(books, num, search, sort, order, start, total, url_base, CKEYS,
|
|||||||
|
|
||||||
data = TD()
|
data = TD()
|
||||||
for fmt in book['formats'].split(','):
|
for fmt in book['formats'].split(','):
|
||||||
a = ascii_filename(book['authors'])
|
a = quote(ascii_filename(book['authors']))
|
||||||
t = ascii_filename(book['title'])
|
t = quote(ascii_filename(book['title']))
|
||||||
s = SPAN(
|
s = SPAN(
|
||||||
A(
|
A(
|
||||||
fmt.lower(),
|
fmt.lower(),
|
||||||
|
452
src/calibre/utils/html2text.py
Normal file
452
src/calibre/utils/html2text.py
Normal file
@ -0,0 +1,452 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
"""html2text: Turn HTML into equivalent Markdown-structured text."""
|
||||||
|
__version__ = "2.39"
|
||||||
|
__author__ = "Aaron Swartz (me@aaronsw.com)"
|
||||||
|
__copyright__ = "(C) 2004-2008 Aaron Swartz. GNU GPL 3."
|
||||||
|
__contributors__ = ["Martin 'Joey' Schulze", "Ricardo Reyes", "Kevin Jay North"]
|
||||||
|
|
||||||
|
# TODO:
|
||||||
|
# Support decoded entities with unifiable.
|
||||||
|
|
||||||
|
if not hasattr(__builtins__, 'True'): True, False = 1, 0
|
||||||
|
import re, sys, urllib, htmlentitydefs, codecs
|
||||||
|
import sgmllib
|
||||||
|
import urlparse
|
||||||
|
sgmllib.charref = re.compile('&#([xX]?[0-9a-fA-F]+)[^0-9a-fA-F]')
|
||||||
|
|
||||||
|
try: from textwrap import wrap
|
||||||
|
except: pass
|
||||||
|
|
||||||
|
# Use Unicode characters instead of their ascii psuedo-replacements
|
||||||
|
UNICODE_SNOB = 1
|
||||||
|
|
||||||
|
# Put the links after each paragraph instead of at the end.
|
||||||
|
LINKS_EACH_PARAGRAPH = 0
|
||||||
|
|
||||||
|
# Wrap long lines at position. 0 for no wrapping. (Requires Python 2.3.)
|
||||||
|
BODY_WIDTH = 0
|
||||||
|
|
||||||
|
# Don't show internal links (href="#local-anchor") -- corresponding link targets
|
||||||
|
# won't be visible in the plain text file anyway.
|
||||||
|
SKIP_INTERNAL_LINKS = True
|
||||||
|
|
||||||
|
### Entity Nonsense ###
|
||||||
|
|
||||||
|
def name2cp(k):
|
||||||
|
if k == 'apos': return ord("'")
|
||||||
|
if hasattr(htmlentitydefs, "name2codepoint"): # requires Python 2.3
|
||||||
|
return htmlentitydefs.name2codepoint[k]
|
||||||
|
else:
|
||||||
|
k = htmlentitydefs.entitydefs[k]
|
||||||
|
if k.startswith("&#") and k.endswith(";"): return int(k[2:-1]) # not in latin-1
|
||||||
|
return ord(codecs.latin_1_decode(k)[0])
|
||||||
|
|
||||||
|
unifiable = {'rsquo':"'", 'lsquo':"'", 'rdquo':'"', 'ldquo':'"',
|
||||||
|
'copy':'(C)', 'mdash':'--', 'nbsp':' ', 'rarr':'->', 'larr':'<-', 'middot':'*',
|
||||||
|
'ndash':'-', 'oelig':'oe', 'aelig':'ae',
|
||||||
|
'agrave':'a', 'aacute':'a', 'acirc':'a', 'atilde':'a', 'auml':'a', 'aring':'a',
|
||||||
|
'egrave':'e', 'eacute':'e', 'ecirc':'e', 'euml':'e',
|
||||||
|
'igrave':'i', 'iacute':'i', 'icirc':'i', 'iuml':'i',
|
||||||
|
'ograve':'o', 'oacute':'o', 'ocirc':'o', 'otilde':'o', 'ouml':'o',
|
||||||
|
'ugrave':'u', 'uacute':'u', 'ucirc':'u', 'uuml':'u'}
|
||||||
|
|
||||||
|
unifiable_n = {}
|
||||||
|
|
||||||
|
for k in unifiable.keys():
|
||||||
|
unifiable_n[name2cp(k)] = unifiable[k]
|
||||||
|
|
||||||
|
def charref(name):
|
||||||
|
if name[0] in ['x','X']:
|
||||||
|
c = int(name[1:], 16)
|
||||||
|
else:
|
||||||
|
c = int(name)
|
||||||
|
|
||||||
|
if not UNICODE_SNOB and c in unifiable_n.keys():
|
||||||
|
return unifiable_n[c]
|
||||||
|
else:
|
||||||
|
return unichr(c)
|
||||||
|
|
||||||
|
def entityref(c):
|
||||||
|
if not UNICODE_SNOB and c in unifiable.keys():
|
||||||
|
return unifiable[c]
|
||||||
|
else:
|
||||||
|
try: name2cp(c)
|
||||||
|
except KeyError: return "&" + c
|
||||||
|
else: return unichr(name2cp(c))
|
||||||
|
|
||||||
|
def replaceEntities(s):
|
||||||
|
s = s.group(1)
|
||||||
|
if s[0] == "#":
|
||||||
|
return charref(s[1:])
|
||||||
|
else: return entityref(s)
|
||||||
|
|
||||||
|
r_unescape = re.compile(r"&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));")
|
||||||
|
def unescape(s):
|
||||||
|
return r_unescape.sub(replaceEntities, s)
|
||||||
|
|
||||||
|
def fixattrs(attrs):
|
||||||
|
# Fix bug in sgmllib.py
|
||||||
|
if not attrs: return attrs
|
||||||
|
newattrs = []
|
||||||
|
for attr in attrs:
|
||||||
|
newattrs.append((attr[0], unescape(attr[1])))
|
||||||
|
return newattrs
|
||||||
|
|
||||||
|
### End Entity Nonsense ###
|
||||||
|
|
||||||
|
def onlywhite(line):
|
||||||
|
"""Return true if the line does only consist of whitespace characters."""
|
||||||
|
for c in line:
|
||||||
|
if c is not ' ' and c is not ' ':
|
||||||
|
return c is ' '
|
||||||
|
return line
|
||||||
|
|
||||||
|
def optwrap(text):
|
||||||
|
"""Wrap all paragraphs in the provided text."""
|
||||||
|
if not BODY_WIDTH:
|
||||||
|
return text
|
||||||
|
|
||||||
|
assert wrap, "Requires Python 2.3."
|
||||||
|
result = ''
|
||||||
|
newlines = 0
|
||||||
|
for para in text.split("\n"):
|
||||||
|
if len(para) > 0:
|
||||||
|
if para[0] is not ' ' and para[0] is not '-' and para[0] is not '*':
|
||||||
|
for line in wrap(para, BODY_WIDTH):
|
||||||
|
result += line + "\n"
|
||||||
|
result += "\n"
|
||||||
|
newlines = 2
|
||||||
|
else:
|
||||||
|
if not onlywhite(para):
|
||||||
|
result += para + "\n"
|
||||||
|
newlines = 1
|
||||||
|
else:
|
||||||
|
if newlines < 2:
|
||||||
|
result += "\n"
|
||||||
|
newlines += 1
|
||||||
|
return result
|
||||||
|
|
||||||
|
def hn(tag):
|
||||||
|
if tag[0] == 'h' and len(tag) == 2:
|
||||||
|
try:
|
||||||
|
n = int(tag[1])
|
||||||
|
if n in range(1, 10): return n
|
||||||
|
except ValueError: return 0
|
||||||
|
|
||||||
|
class _html2text(sgmllib.SGMLParser):
|
||||||
|
def __init__(self, out=None, baseurl=''):
|
||||||
|
sgmllib.SGMLParser.__init__(self)
|
||||||
|
|
||||||
|
if out is None: self.out = self.outtextf
|
||||||
|
else: self.out = out
|
||||||
|
self.outtext = u''
|
||||||
|
self.quiet = 0
|
||||||
|
self.p_p = 0
|
||||||
|
self.outcount = 0
|
||||||
|
self.start = 1
|
||||||
|
self.space = 0
|
||||||
|
self.a = []
|
||||||
|
self.astack = []
|
||||||
|
self.acount = 0
|
||||||
|
self.list = []
|
||||||
|
self.blockquote = 0
|
||||||
|
self.pre = 0
|
||||||
|
self.startpre = 0
|
||||||
|
self.lastWasNL = 0
|
||||||
|
self.abbr_title = None # current abbreviation definition
|
||||||
|
self.abbr_data = None # last inner HTML (for abbr being defined)
|
||||||
|
self.abbr_list = {} # stack of abbreviations to write later
|
||||||
|
self.baseurl = baseurl
|
||||||
|
|
||||||
|
def outtextf(self, s):
|
||||||
|
self.outtext += s
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
sgmllib.SGMLParser.close(self)
|
||||||
|
|
||||||
|
self.pbr()
|
||||||
|
self.o('', 0, 'end')
|
||||||
|
|
||||||
|
return self.outtext
|
||||||
|
|
||||||
|
def handle_charref(self, c):
|
||||||
|
self.o(charref(c))
|
||||||
|
|
||||||
|
def handle_entityref(self, c):
|
||||||
|
self.o(entityref(c))
|
||||||
|
|
||||||
|
def unknown_starttag(self, tag, attrs):
|
||||||
|
self.handle_tag(tag, attrs, 1)
|
||||||
|
|
||||||
|
def unknown_endtag(self, tag):
|
||||||
|
self.handle_tag(tag, None, 0)
|
||||||
|
|
||||||
|
def previousIndex(self, attrs):
|
||||||
|
""" returns the index of certain set of attributes (of a link) in the
|
||||||
|
self.a list
|
||||||
|
|
||||||
|
If the set of attributes is not found, returns None
|
||||||
|
"""
|
||||||
|
if not attrs.has_key('href'): return None
|
||||||
|
|
||||||
|
i = -1
|
||||||
|
for a in self.a:
|
||||||
|
i += 1
|
||||||
|
match = 0
|
||||||
|
|
||||||
|
if a.has_key('href') and a['href'] == attrs['href']:
|
||||||
|
if a.has_key('title') or attrs.has_key('title'):
|
||||||
|
if (a.has_key('title') and attrs.has_key('title') and
|
||||||
|
a['title'] == attrs['title']):
|
||||||
|
match = True
|
||||||
|
else:
|
||||||
|
match = True
|
||||||
|
|
||||||
|
if match: return i
|
||||||
|
|
||||||
|
def handle_tag(self, tag, attrs, start):
|
||||||
|
attrs = fixattrs(attrs)
|
||||||
|
|
||||||
|
if hn(tag):
|
||||||
|
self.p()
|
||||||
|
if start: self.o(hn(tag)*"#" + ' ')
|
||||||
|
|
||||||
|
if tag in ['p', 'div']: self.p()
|
||||||
|
|
||||||
|
if tag == "br" and start: self.o(" \n")
|
||||||
|
|
||||||
|
if tag == "hr" and start:
|
||||||
|
self.p()
|
||||||
|
self.o("* * *")
|
||||||
|
self.p()
|
||||||
|
|
||||||
|
if tag in ["head", "style", 'script']:
|
||||||
|
if start: self.quiet += 1
|
||||||
|
else: self.quiet -= 1
|
||||||
|
|
||||||
|
if tag in ["body"]:
|
||||||
|
self.quiet = 0 # sites like 9rules.com never close <head>
|
||||||
|
|
||||||
|
if tag == "blockquote":
|
||||||
|
if start:
|
||||||
|
self.p(); self.o('> ', 0, 1); self.start = 1
|
||||||
|
self.blockquote += 1
|
||||||
|
else:
|
||||||
|
self.blockquote -= 1
|
||||||
|
self.p()
|
||||||
|
|
||||||
|
if tag in ['em', 'i', 'u']: self.o("_")
|
||||||
|
if tag in ['strong', 'b']: self.o("**")
|
||||||
|
if tag == "code" and not self.pre: self.o('`') #TODO: `` `this` ``
|
||||||
|
if tag == "abbr":
|
||||||
|
if start:
|
||||||
|
attrsD = {}
|
||||||
|
for (x, y) in attrs: attrsD[x] = y
|
||||||
|
attrs = attrsD
|
||||||
|
|
||||||
|
self.abbr_title = None
|
||||||
|
self.abbr_data = ''
|
||||||
|
if attrs.has_key('title'):
|
||||||
|
self.abbr_title = attrs['title']
|
||||||
|
else:
|
||||||
|
if self.abbr_title != None:
|
||||||
|
self.abbr_list[self.abbr_data] = self.abbr_title
|
||||||
|
self.abbr_title = None
|
||||||
|
self.abbr_data = ''
|
||||||
|
|
||||||
|
if tag == "a":
|
||||||
|
if start:
|
||||||
|
attrsD = {}
|
||||||
|
for (x, y) in attrs: attrsD[x] = y
|
||||||
|
attrs = attrsD
|
||||||
|
if attrs.has_key('href') and not (SKIP_INTERNAL_LINKS and attrs['href'].startswith('#')):
|
||||||
|
self.astack.append(attrs)
|
||||||
|
self.o("[")
|
||||||
|
else:
|
||||||
|
self.astack.append(None)
|
||||||
|
else:
|
||||||
|
if self.astack:
|
||||||
|
a = self.astack.pop()
|
||||||
|
if a:
|
||||||
|
i = self.previousIndex(a)
|
||||||
|
if i is not None:
|
||||||
|
a = self.a[i]
|
||||||
|
else:
|
||||||
|
self.acount += 1
|
||||||
|
a['count'] = self.acount
|
||||||
|
a['outcount'] = self.outcount
|
||||||
|
self.a.append(a)
|
||||||
|
self.o("][" + `a['count']` + "]")
|
||||||
|
|
||||||
|
if tag == "img" and start:
|
||||||
|
attrsD = {}
|
||||||
|
for (x, y) in attrs: attrsD[x] = y
|
||||||
|
attrs = attrsD
|
||||||
|
if attrs.has_key('src'):
|
||||||
|
attrs['href'] = attrs['src']
|
||||||
|
alt = attrs.get('alt', '')
|
||||||
|
i = self.previousIndex(attrs)
|
||||||
|
if i is not None:
|
||||||
|
attrs = self.a[i]
|
||||||
|
else:
|
||||||
|
self.acount += 1
|
||||||
|
attrs['count'] = self.acount
|
||||||
|
attrs['outcount'] = self.outcount
|
||||||
|
self.a.append(attrs)
|
||||||
|
self.o("![")
|
||||||
|
self.o(alt)
|
||||||
|
self.o("]["+`attrs['count']`+"]")
|
||||||
|
|
||||||
|
if tag == 'dl' and start: self.p()
|
||||||
|
if tag == 'dt' and not start: self.pbr()
|
||||||
|
if tag == 'dd' and start: self.o(' ')
|
||||||
|
if tag == 'dd' and not start: self.pbr()
|
||||||
|
|
||||||
|
if tag in ["ol", "ul"]:
|
||||||
|
if start:
|
||||||
|
self.list.append({'name':tag, 'num':0})
|
||||||
|
else:
|
||||||
|
if self.list: self.list.pop()
|
||||||
|
|
||||||
|
self.p()
|
||||||
|
|
||||||
|
if tag == 'li':
|
||||||
|
if start:
|
||||||
|
self.pbr()
|
||||||
|
if self.list: li = self.list[-1]
|
||||||
|
else: li = {'name':'ul', 'num':0}
|
||||||
|
self.o(" "*len(self.list)) #TODO: line up <ol><li>s > 9 correctly.
|
||||||
|
if li['name'] == "ul": self.o("* ")
|
||||||
|
elif li['name'] == "ol":
|
||||||
|
li['num'] += 1
|
||||||
|
self.o(`li['num']`+". ")
|
||||||
|
self.start = 1
|
||||||
|
else:
|
||||||
|
self.pbr()
|
||||||
|
|
||||||
|
if tag in ["table", "tr"] and start: self.p()
|
||||||
|
if tag == 'td': self.pbr()
|
||||||
|
|
||||||
|
if tag == "pre":
|
||||||
|
if start:
|
||||||
|
self.startpre = 1
|
||||||
|
self.pre = 1
|
||||||
|
else:
|
||||||
|
self.pre = 0
|
||||||
|
self.p()
|
||||||
|
|
||||||
|
def pbr(self):
|
||||||
|
if self.p_p == 0: self.p_p = 1
|
||||||
|
|
||||||
|
def p(self): self.p_p = 2
|
||||||
|
|
||||||
|
def o(self, data, puredata=0, force=0):
|
||||||
|
if self.abbr_data is not None: self.abbr_data += data
|
||||||
|
|
||||||
|
if not self.quiet:
|
||||||
|
if puredata and not self.pre:
|
||||||
|
data = re.sub('\s+', ' ', data)
|
||||||
|
if data and data[0] == ' ':
|
||||||
|
self.space = 1
|
||||||
|
data = data[1:]
|
||||||
|
if not data and not force: return
|
||||||
|
|
||||||
|
if self.startpre:
|
||||||
|
#self.out(" :") #TODO: not output when already one there
|
||||||
|
self.startpre = 0
|
||||||
|
|
||||||
|
bq = (">" * self.blockquote)
|
||||||
|
if not (force and data and data[0] == ">") and self.blockquote: bq += " "
|
||||||
|
|
||||||
|
if self.pre:
|
||||||
|
bq += " "
|
||||||
|
data = data.replace("\n", "\n"+bq)
|
||||||
|
|
||||||
|
if self.start:
|
||||||
|
self.space = 0
|
||||||
|
self.p_p = 0
|
||||||
|
self.start = 0
|
||||||
|
|
||||||
|
if force == 'end':
|
||||||
|
# It's the end.
|
||||||
|
self.p_p = 0
|
||||||
|
self.out("\n")
|
||||||
|
self.space = 0
|
||||||
|
|
||||||
|
|
||||||
|
if self.p_p:
|
||||||
|
self.out(('\n'+bq)*self.p_p)
|
||||||
|
self.space = 0
|
||||||
|
|
||||||
|
if self.space:
|
||||||
|
if not self.lastWasNL: self.out(' ')
|
||||||
|
self.space = 0
|
||||||
|
|
||||||
|
if self.a and ((self.p_p == 2 and LINKS_EACH_PARAGRAPH) or force == "end"):
|
||||||
|
if force == "end": self.out("\n")
|
||||||
|
|
||||||
|
newa = []
|
||||||
|
for link in self.a:
|
||||||
|
if self.outcount > link['outcount']:
|
||||||
|
self.out(" ["+`link['count']`+"]: " + urlparse.urljoin(self.baseurl, link['href']))
|
||||||
|
if link.has_key('title'): self.out(" ("+link['title']+")")
|
||||||
|
self.out("\n")
|
||||||
|
else:
|
||||||
|
newa.append(link)
|
||||||
|
|
||||||
|
if self.a != newa: self.out("\n") # Don't need an extra line when nothing was done.
|
||||||
|
|
||||||
|
self.a = newa
|
||||||
|
|
||||||
|
if self.abbr_list and force == "end":
|
||||||
|
for abbr, definition in self.abbr_list.items():
|
||||||
|
self.out(" *[" + abbr + "]: " + definition + "\n")
|
||||||
|
|
||||||
|
self.p_p = 0
|
||||||
|
self.out(data)
|
||||||
|
self.lastWasNL = data and data[-1] == '\n'
|
||||||
|
self.outcount += 1
|
||||||
|
|
||||||
|
def handle_data(self, data):
|
||||||
|
if r'\/script>' in data: self.quiet -= 1
|
||||||
|
self.o(data, 1)
|
||||||
|
|
||||||
|
def unknown_decl(self, data): pass
|
||||||
|
|
||||||
|
def wrapwrite(text): sys.stdout.write(text.encode('utf8'))
|
||||||
|
|
||||||
|
def html2text_file(html, out=wrapwrite, baseurl=''):
|
||||||
|
h = _html2text(out, baseurl)
|
||||||
|
h.feed(html)
|
||||||
|
h.feed("")
|
||||||
|
return h.close()
|
||||||
|
|
||||||
|
def html2text(html, baseurl=''):
|
||||||
|
return optwrap(html2text_file(html, None, baseurl))
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
baseurl = ''
|
||||||
|
if sys.argv[1:]:
|
||||||
|
arg = sys.argv[1]
|
||||||
|
if arg.startswith('http://') or arg.startswith('https://'):
|
||||||
|
baseurl = arg
|
||||||
|
j = urllib.urlopen(baseurl)
|
||||||
|
try:
|
||||||
|
from feedparser import _getCharacterEncoding as enc
|
||||||
|
enc
|
||||||
|
except ImportError:
|
||||||
|
enc = lambda x, y: ('utf-8', 1)
|
||||||
|
text = j.read()
|
||||||
|
encoding = enc(j.headers, text)[0]
|
||||||
|
if encoding == 'us-ascii': encoding = 'utf-8'
|
||||||
|
data = text.decode(encoding)
|
||||||
|
|
||||||
|
else:
|
||||||
|
encoding = 'utf8'
|
||||||
|
if len(sys.argv) > 2:
|
||||||
|
encoding = sys.argv[2]
|
||||||
|
data = open(arg, 'r').read().decode(encoding)
|
||||||
|
else:
|
||||||
|
data = sys.stdin.read().decode('utf8')
|
||||||
|
wrapwrite(html2text(data, baseurl))
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user