merge from branch with kovid's integration of preprocess code

This commit is contained in:
ldolse 2010-09-14 11:03:03 +08:00
commit 0a2e16e466
24 changed files with 282 additions and 247 deletions

View File

@ -26,7 +26,7 @@ var current_library_request = null;
////////////////////////////// GET BOOK LIST ////////////////////////////// ////////////////////////////// GET BOOK LIST //////////////////////////////
var LIBRARY_FETCH_TIMEOUT = 30000; // milliseconds var LIBRARY_FETCH_TIMEOUT = 5*60000; // milliseconds
function create_table_headers() { function create_table_headers() {
var thead = $('table#book_list thead tr'); var thead = $('table#book_list thead tr');

View File

@ -114,3 +114,11 @@ add_new_book_tags_when_importing_books = False
# Set the maximum number of tags to show per book in the content server # Set the maximum number of tags to show per book in the content server
max_content_server_tags_shown=5 max_content_server_tags_shown=5
# Set the maximum number of sort 'levels' that calibre will use to resort the
# library after certain operations such as searches or device insertion. Each
# sort level adds a performance penalty. If the database is large (thousands of
# books) the penalty might be noticeable. If you are not concerned about multi-
# level sorts, and if you are seeing a slowdown, reduce the value of this tweak.
maximum_resort_levels = 5

View File

@ -1,12 +1,8 @@
#!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
''' '''
infobae.com infobae.com
''' '''
import re
import urllib, urlparse
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
@ -21,33 +17,22 @@ class Infobae(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
language = 'es' language = 'es'
lang = 'es-AR'
encoding = 'cp1252' encoding = 'cp1252'
cover_url = 'http://www.infobae.com/imgs/header/header.gif' masthead_url = 'http://www.infobae.com/imgs/header/header.gif'
remove_javascript = True remove_javascript = True
preprocess_regexps = [(re.compile( remove_empty_feeds = True
r'<meta name="Description" content="[^"]+">'), lambda m:'')]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
extra_css = ''' extra_css = '''
.col-center{font-family:Arial,Helvetica,sans-serif;} body{font-family:Arial,Helvetica,sans-serif;}
h1{font-family:Arial,Helvetica,sans-serif; color:#0D4261;} .popUpTitulo{color:#0D4261; font-size: xx-large}
.fuenteIntNota{font-family:Arial,Helvetica,sans-serif; color:#1D1D1D; font-size:x-small;}
''' '''
keep_only_tags = [dict(name='div', attrs={'class':['content']})] conversion_options = {
'comment' : description
, 'tags' : category
remove_tags = [ , 'publisher' : publisher
dict(name='div', attrs={'class':['options','col-right','controles', 'bannerLibre','tiulo-masleidas','masleidas-h']}), , 'language' : language
dict(name='a', attrs={'name' : 'comentario',}), , 'linearize_tables' : True
dict(name='iframe'), }
dict(name='img', alt = "Ver galerias de imagenes"),
]
feeds = [ feeds = [
@ -57,39 +42,14 @@ class Infobae(BasicNewsRecipe):
,(u'Deportes' , u'http://www.infobae.com/adjuntos/html/RSS/deportes.xml' ) ,(u'Deportes' , u'http://www.infobae.com/adjuntos/html/RSS/deportes.xml' )
] ]
# def print_version(self, url): def print_version(self, url):
# main, sep, article_part = url.partition('contenidos/') article_part = url.rpartition('/')[2]
# article_id, rsep, rrest = article_part.partition('-') article_id= article_part.partition('-')[0]
# return u'http://www.infobae.com/notas/nota_imprimir.php?Idx=' + article_id return 'http://www.infobae.com/notas/nota_imprimir.php?Idx=' + article_id
def get_article_url(self, article):
ans = article.get('link').encode('utf-8')
parts = list(urlparse.urlparse(ans))
parts[2] = urllib.quote(parts[2])
ans = urlparse.urlunparse(parts)
return ans.decode('utf-8')
def preprocess_html(self, soup):
for tag in soup.head.findAll('strong'):
tag.extract()
for tag in soup.findAll('meta'):
del tag['content']
tag.extract()
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">\n<meta http-equiv="Content-Language" content="es-AR"/>\n'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
def postprocess_html(self, soup, first): def postprocess_html(self, soup, first):
for tag in soup.findAll(name='strong'): for tag in soup.findAll(name='strong'):
tag.name = 'b' tag.name = 'b'
return soup return soup

View File

@ -6,6 +6,7 @@ nspm.rs
import re import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import NavigableString
class Nspm(BasicNewsRecipe): class Nspm(BasicNewsRecipe):
title = 'Nova srpska politicka misao' title = 'Nova srpska politicka misao'
@ -21,6 +22,7 @@ class Nspm(BasicNewsRecipe):
encoding = 'utf-8' encoding = 'utf-8'
language = 'sr' language = 'sr'
delay = 2 delay = 2
remove_empty_feeds = True
publication_type = 'magazine' publication_type = 'magazine'
masthead_url = 'http://www.nspm.rs/templates/jsn_epic_pro/images/logol.jpg' masthead_url = 'http://www.nspm.rs/templates/jsn_epic_pro/images/logol.jpg'
extra_css = """ @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} extra_css = """ @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
@ -45,6 +47,7 @@ class Nspm(BasicNewsRecipe):
dict(name=['link','object','embed','script','meta','base','iframe']) dict(name=['link','object','embed','script','meta','base','iframe'])
,dict(attrs={'class':'buttonheading'}) ,dict(attrs={'class':'buttonheading'})
] ]
remove_tags_before = dict(attrs={'class':'contentheading'})
remove_tags_after = dict(attrs={'class':'article_separator'}) remove_tags_after = dict(attrs={'class':'article_separator'})
remove_attributes = ['width','height'] remove_attributes = ['width','height']
@ -67,4 +70,8 @@ class Nspm(BasicNewsRecipe):
def preprocess_html(self, soup): def preprocess_html(self, soup):
for item in soup.body.findAll(style=True): for item in soup.body.findAll(style=True):
del item['style'] del item['style']
for item in soup.body.findAll('h1'):
nh = NavigableString(item.a.string)
item.a.extract()
item.insert(0,nh)
return self.adeify_images(soup) return self.adeify_images(soup)

View File

@ -35,7 +35,7 @@ class XkcdCom(BasicNewsRecipe):
'date': item['title'], 'date': item['title'],
'timestamp': time.mktime(time.strptime(item['title'], '%Y-%m-%d'))+1, 'timestamp': time.mktime(time.strptime(item['title'], '%Y-%m-%d'))+1,
'url': 'http://xkcd.com' + item['href'], 'url': 'http://xkcd.com' + item['href'],
'title': self.tag_to_string(item).encode('UTF-8'), 'title': self.tag_to_string(item),
'description': '', 'description': '',
'content': '', 'content': '',
}) })

View File

@ -459,7 +459,7 @@ from calibre.devices.iriver.driver import IRIVER_STORY
from calibre.devices.binatone.driver import README from calibre.devices.binatone.driver import README
from calibre.devices.hanvon.driver import N516, EB511, ALEX, AZBOOKA, THEBOOK from calibre.devices.hanvon.driver import N516, EB511, ALEX, AZBOOKA, THEBOOK
from calibre.devices.edge.driver import EDGE from calibre.devices.edge.driver import EDGE
from calibre.devices.teclast.driver import TECLAST_K3, NEWSMY, IPAPYRUS from calibre.devices.teclast.driver import TECLAST_K3, NEWSMY, IPAPYRUS, SOVOS
from calibre.devices.sne.driver import SNE from calibre.devices.sne.driver import SNE
from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL, KOGAN, GEMEI from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL, KOGAN, GEMEI
from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
@ -557,6 +557,7 @@ plugins += [
TECLAST_K3, TECLAST_K3,
NEWSMY, NEWSMY,
IPAPYRUS, IPAPYRUS,
SOVOS,
EDGE, EDGE,
SNE, SNE,
ALEX, ALEX,

View File

@ -52,3 +52,14 @@ class IPAPYRUS(TECLAST_K3):
VENDOR_NAME = 'E_READER' VENDOR_NAME = 'E_READER'
WINDOWS_MAIN_MEM = '' WINDOWS_MAIN_MEM = ''
class SOVOS(TECLAST_K3):
name = 'Sovos device interface'
gui_name = 'Sovos'
description = _('Communicate with the Sovos reader.')
FORMATS = ['epub', 'fb2', 'pdf', 'txt']
VENDOR_NAME = 'RK28XX'
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'USB-MSC'

View File

@ -132,7 +132,11 @@ class CHMReader(CHMFile):
for path in self.Contents(): for path in self.Contents():
lpath = os.path.join(output_dir, path) lpath = os.path.join(output_dir, path)
self._ensure_dir(lpath) self._ensure_dir(lpath)
try:
data = self.GetFile(path) data = self.GetFile(path)
except:
self.log.exception('Failed to extract %s from CHM, ignoring'%path)
continue
if lpath.find(';') != -1: if lpath.find(';') != -1:
# fix file names with ";<junk>" at the end, see _reformat() # fix file names with ";<junk>" at the end, see _reformat()
lpath = lpath.split(';')[0] lpath = lpath.split(';')[0]

View File

@ -168,6 +168,17 @@ class HTMLPreProcessor(object):
(re.compile(u'`\s*(<br.*?>)*\s*O', re.UNICODE), lambda match: u'Ò'), (re.compile(u'`\s*(<br.*?>)*\s*O', re.UNICODE), lambda match: u'Ò'),
(re.compile(u'`\s*(<br.*?>)*\s*u', re.UNICODE), lambda match: u'ù'), (re.compile(u'`\s*(<br.*?>)*\s*u', re.UNICODE), lambda match: u'ù'),
(re.compile(u'`\s*(<br.*?>)*\s*U', re.UNICODE), lambda match: u'Ù'), (re.compile(u'`\s*(<br.*?>)*\s*U', re.UNICODE), lambda match: u'Ù'),
# ` with letter before
(re.compile(u'a\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'à'),
(re.compile(u'A\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'À'),
(re.compile(u'e\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'è'),
(re.compile(u'E\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'È'),
(re.compile(u'i\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'ì'),
(re.compile(u'I\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'Ì'),
(re.compile(u'o\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'ò'),
(re.compile(u'O\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'Ò'),
(re.compile(u'u\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'ù'),
(re.compile(u'U\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'Ù'),
# ´ # ´
(re.compile(u'´\s*(<br.*?>)*\s*a', re.UNICODE), lambda match: u'á'), (re.compile(u'´\s*(<br.*?>)*\s*a', re.UNICODE), lambda match: u'á'),

View File

@ -10,12 +10,11 @@ from calibre.ebooks.conversion.preprocess import line_length
from calibre.utils.logging import default_log from calibre.utils.logging import default_log
class PreProcessor(object): class PreProcessor(object):
html_preprocess_sections = 0
found_indents = 0
def __init__(self, args): def __init__(self, log=None):
self.args = args self.log = default_log if log is None else log
self.log = default_log self.html_preprocess_sections = 0
self.found_indents = 0
def chapter_head(self, match): def chapter_head(self, match):
chap = match.group('chap') chap = match.group('chap')

View File

@ -491,6 +491,6 @@ class HTMLInput(InputFormatPlugin):
return (None, raw) return (None, raw)
def preprocess_html(self, html): def preprocess_html(self, html):
preprocessor = PreProcessor(html) preprocessor = PreProcessor(log=getattr(self, 'log', None))
html = preprocessor(html) return preprocessor(html)
return html

View File

@ -54,7 +54,6 @@ class LITInput(InputFormatPlugin):
def preprocess_html(self, html): def preprocess_html(self, html):
preprocessor = PreProcessor(html) preprocessor = PreProcessor(log=getattr(self, 'log', None))
html = preprocessor(html) return preprocessor(html)
return html

View File

@ -138,6 +138,7 @@ class CSSFlattener(object):
float(self.context.margin_left)) float(self.context.margin_left))
bs.append('margin-right : %fpt'%\ bs.append('margin-right : %fpt'%\
float(self.context.margin_right)) float(self.context.margin_right))
bs.extend(['padding-left: 0pt', 'padding-right: 0pt'])
if self.context.change_justification != 'original': if self.context.change_justification != 'original':
bs.append('text-align: '+ self.context.change_justification) bs.append('text-align: '+ self.context.change_justification)
body.set('style', '; '.join(bs)) body.set('style', '; '.join(bs))

View File

@ -207,6 +207,7 @@ class PML_HTMLizer(object):
while html != old: while html != old:
old = html old = html
html = self.cleanup_html_remove_redundant(html) html = self.cleanup_html_remove_redundant(html)
html = re.sub(r'(?imu)^\s*', '', html)
return html return html
def cleanup_html_remove_redundant(self, html): def cleanup_html_remove_redundant(self, html):
@ -216,7 +217,7 @@ class PML_HTMLizer(object):
html = re.sub(r'(?u)%s\s*%s' % (open % '.*?', close), '', html) html = re.sub(r'(?u)%s\s*%s' % (open % '.*?', close), '', html)
else: else:
html = re.sub(r'(?u)%s\s*%s' % (open, close), '', html) html = re.sub(r'(?u)%s\s*%s' % (open, close), '', html)
html = re.sub(r'<p>\s*</p>', '', html) html = re.sub(r'(?imu)<p>\s*</p>', '', html)
return html return html
def start_line(self): def start_line(self):
@ -556,7 +557,7 @@ class PML_HTMLizer(object):
text = t text = t
else: else:
self.toc.add_item(os.path.basename(self.file_name), id, value) self.toc.add_item(os.path.basename(self.file_name), id, value)
text = '<span id="%s"></span>%s' % (id, t) text = '%s<span id="%s"></span>' % (t, id)
elif c == 'm': elif c == 'm':
empty = False empty = False
src = self.code_value(line) src = self.code_value(line)

View File

@ -7,7 +7,6 @@ import os, glob, re, textwrap
from lxml import etree from lxml import etree
from calibre.customize.conversion import InputFormatPlugin from calibre.customize.conversion import InputFormatPlugin
from calibre.ebooks.conversion.preprocess import line_length
from calibre.ebooks.conversion.utils import PreProcessor from calibre.ebooks.conversion.utils import PreProcessor
class InlineClass(etree.XSLTExtension): class InlineClass(etree.XSLTExtension):
@ -230,7 +229,7 @@ class RTFInput(InputFormatPlugin):
res = transform.tostring(result) res = transform.tostring(result)
res = res[:100].replace('xmlns:html', 'xmlns') + res[100:] res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
if self.options.preprocess_html: if self.options.preprocess_html:
preprocessor = PreProcessor(res) preprocessor = PreProcessor(log=getattr(self, 'log', None))
res = preprocessor(res) res = preprocessor(res)
f.write(res) f.write(res)
self.write_inline_css(inline_class) self.write_inline_css(inline_class)

View File

@ -77,7 +77,7 @@ def separate_paragraphs_print_formatted(txt):
def preserve_spaces(txt): def preserve_spaces(txt):
txt = txt.replace(' ', '&nbsp;') txt = txt.replace(' ', '&nbsp;')
txt = txt.replace('\t', '&#09;') txt = txt.replace('\t', '&nbsp;&nbsp;&nbsp;&nbsp;')
return txt return txt
def opf_writer(path, opf_name, manifest, spine, mi): def opf_writer(path, opf_name, manifest, spine, mi):

View File

@ -209,8 +209,9 @@ class EditMetadataAction(InterfaceAction):
dest_id, src_books, src_ids = self.books_to_merge(rows) dest_id, src_books, src_ids = self.books_to_merge(rows)
if safe_merge: if safe_merge:
if not confirm('<p>'+_( if not confirm('<p>'+_(
'All book formats and metadata from the selected books ' 'Book formats and metadata from the selected books '
'will be added to the <b>first selected book.</b><br><br> ' 'will be added to the <b>first selected book.</b> '
'ISBN will <i>not</i> be merged.<br><br> '
'The second and subsequently selected books will not ' 'The second and subsequently selected books will not '
'be deleted or changed.<br><br>' 'be deleted or changed.<br><br>'
'Please confirm you want to proceed.') 'Please confirm you want to proceed.')
@ -220,8 +221,9 @@ class EditMetadataAction(InterfaceAction):
self.merge_metadata(dest_id, src_ids) self.merge_metadata(dest_id, src_ids)
else: else:
if not confirm('<p>'+_( if not confirm('<p>'+_(
'All book formats and metadata from the selected books will be merged ' 'Book formats and metadata from the selected books will be merged '
'into the <b>first selected book</b>.<br><br>' 'into the <b>first selected book</b>. '
'ISBN will <i>not</i> be merged.<br><br>'
'After merger the second and ' 'After merger the second and '
'subsequently selected books will be <b>deleted</b>. <br><br>' 'subsequently selected books will be <b>deleted</b>. <br><br>'
'All book formats of the first selected book will be kept ' 'All book formats of the first selected book will be kept '

View File

@ -121,10 +121,8 @@ class BooksModel(QAbstractTableModel): # {{{
def set_device_connected(self, is_connected): def set_device_connected(self, is_connected):
self.device_connected = is_connected self.device_connected = is_connected
self.db.refresh_ondevice() self.db.refresh_ondevice()
self.refresh() self.refresh() # does a resort()
self.research() self.research()
if is_connected and self.sorted_on[0] == 'ondevice':
self.resort()
def set_book_on_device_func(self, func): def set_book_on_device_func(self, func):
self.book_on_device = func self.book_on_device = func
@ -264,19 +262,15 @@ class BooksModel(QAbstractTableModel): # {{{
self.sorting_done.emit(self.db.index) self.sorting_done.emit(self.db.index)
def refresh(self, reset=True): def refresh(self, reset=True):
try:
col = self.column_map.index(self.sorted_on[0])
except:
col = 0
self.db.refresh(field=None) self.db.refresh(field=None)
self.sort(col, self.sorted_on[1], reset=reset) self.resort(reset=reset)
def resort(self, reset=True): def resort(self, reset=True):
try: if not self.db:
col = self.column_map.index(self.sorted_on[0]) return
except ValueError: self.db.multisort(self.sort_history[:tweaks['maximum_resort_levels']])
col = 0 if reset:
self.sort(col, self.sorted_on[1], reset=reset) self.reset()
def research(self, reset=True): def research(self, reset=True):
self.search(self.last_search, reset=reset) self.search(self.last_search, reset=reset)
@ -1030,6 +1024,11 @@ class DeviceBooksModel(BooksModel): # {{{
if reset: if reset:
self.reset() self.reset()
def resort(self, reset=True):
if self.sorted_on:
self.sort(self.column_map.index(self.sorted_on[0]),
self.sorted_on[1], reset=reset)
def columnCount(self, parent): def columnCount(self, parent):
if parent and parent.isValid(): if parent and parent.isValid():
return 0 return 0

View File

@ -6,7 +6,7 @@ __license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import re, itertools, functools import re, itertools
from itertools import repeat from itertools import repeat
from datetime import timedelta from datetime import timedelta
from threading import Thread, RLock from threading import Thread, RLock
@ -112,7 +112,7 @@ class ResultCache(SearchQueryParser):
''' '''
def __init__(self, FIELD_MAP, field_metadata): def __init__(self, FIELD_MAP, field_metadata):
self.FIELD_MAP = FIELD_MAP self.FIELD_MAP = FIELD_MAP
self._map = self._map_filtered = self._data = [] self._map = self._data = self._map_filtered = []
self.first_sort = True self.first_sort = True
self.search_restriction = '' self.search_restriction = ''
self.field_metadata = field_metadata self.field_metadata = field_metadata
@ -141,6 +141,8 @@ class ResultCache(SearchQueryParser):
for x in self.iterall(): for x in self.iterall():
yield x[idx] yield x[idx]
# Search functions {{{
def universal_set(self): def universal_set(self):
return set([i[0] for i in self._data if i is not None]) return set([i[0] for i in self._data if i is not None])
@ -462,12 +464,43 @@ class ResultCache(SearchQueryParser):
continue continue
return matches return matches
def search(self, query, return_matches=False):
ans = self.search_getting_ids(query, self.search_restriction)
if return_matches:
return ans
self._map_filtered = ans
def search_getting_ids(self, query, search_restriction):
q = ''
if not query or not query.strip():
q = search_restriction
else:
q = query
if search_restriction:
q = u'%s (%s)' % (search_restriction, query)
if not q:
return list(self._map)
matches = self.parse(q)
tmap = list(itertools.repeat(False, len(self._data)))
for x in matches:
tmap[x] = True
return [x for x in self._map if tmap[x]]
def set_search_restriction(self, s):
self.search_restriction = s
# }}}
def remove(self, id): def remove(self, id):
self._data[id] = None self._data[id] = None
if id in self._map: try:
self._map.remove(id) self._map.remove(id)
if id in self._map_filtered: except ValueError:
pass
try:
self._map_filtered.remove(id) self._map_filtered.remove(id)
except ValueError:
pass
def set(self, row, col, val, row_is_id=False): def set(self, row, col, val, row_is_id=False):
id = row if row_is_id else self._map_filtered[row] id = row if row_is_id else self._map_filtered[row]
@ -522,9 +555,7 @@ class ResultCache(SearchQueryParser):
def books_deleted(self, ids): def books_deleted(self, ids):
for id in ids: for id in ids:
self._data[id] = None self.remove(id)
if id in self._map: self._map.remove(id)
if id in self._map_filtered: self._map_filtered.remove(id)
def count(self): def count(self):
return len(self._map) return len(self._map)
@ -549,90 +580,97 @@ class ResultCache(SearchQueryParser):
self.sort(field, ascending) self.sort(field, ascending)
self._map_filtered = list(self._map) self._map_filtered = list(self._map)
if self.search_restriction: if self.search_restriction:
self.search('', return_matches=False, ignore_search_restriction=False) self.search('', return_matches=False)
def seriescmp(self, sidx, siidx, x, y, library_order=None): # Sorting functions {{{
try:
if library_order:
ans = cmp(title_sort(self._data[x][sidx].lower()),
title_sort(self._data[y][sidx].lower()))
else:
ans = cmp(self._data[x][sidx].lower(),
self._data[y][sidx].lower())
except AttributeError: # Some entries may be None
ans = cmp(self._data[x][sidx], self._data[y][sidx])
if ans != 0: return ans
return cmp(self._data[x][siidx], self._data[y][siidx])
def cmp(self, loc, x, y, asstr=True, subsort=False): def sanitize_sort_field_name(self, field):
try:
ans = cmp(self._data[x][loc].lower(), self._data[y][loc].lower()) if \
asstr else cmp(self._data[x][loc], self._data[y][loc])
except AttributeError: # Some entries may be None
ans = cmp(self._data[x][loc], self._data[y][loc])
except TypeError: ## raised when a datetime is None
x = self._data[x][loc]
if x is None:
x = UNDEFINED_DATE
y = self._data[y][loc]
if y is None:
y = UNDEFINED_DATE
return cmp(x, y)
if subsort and ans == 0:
return cmp(self._data[x][11].lower(), self._data[y][11].lower())
return ans
def sort(self, field, ascending, subsort=False):
field = field.lower().strip() field = field.lower().strip()
if field not in self.field_metadata.iterkeys():
if field in ('author', 'tag', 'comment'): if field in ('author', 'tag', 'comment'):
field += 's' field += 's'
if field == 'date': field = 'timestamp' if field == 'date': field = 'timestamp'
elif field == 'title': field = 'sort' elif field == 'title': field = 'sort'
elif field == 'authors': field = 'author_sort' elif field == 'authors': field = 'author_sort'
as_string = field not in ('size', 'rating', 'timestamp') return field
if self.first_sort: def sort(self, field, ascending, subsort=False):
subsort = True self.multisort([(field, ascending)])
self.first_sort = False
if self.field_metadata[field]['is_custom']: def multisort(self, fields=[], subsort=False):
if self.field_metadata[field]['datatype'] == 'series': fields = [(self.sanitize_sort_field_name(x), bool(y)) for x, y in fields]
fcmp = functools.partial(self.seriescmp, keys = self.field_metadata.field_keys()
self.field_metadata[field]['rec_index'], fields = [x for x in fields if x[0] in keys]
self.field_metadata.cc_series_index_column_for(field), if subsort and 'sort' not in [x[0] for x in fields]:
library_order=tweaks['title_series_sorting'] == 'library_order') fields += [('sort', True)]
if not fields:
fields = [('timestamp', False)]
keyg = SortKeyGenerator(fields, self.field_metadata, self._data)
if len(fields) == 1:
self._map.sort(key=keyg, reverse=not fields[0][1])
else: else:
as_string = self.field_metadata[field]['datatype'] in ('comments', 'text') self._map.sort(key=keyg)
field = self.field_metadata[field]['colnum']
fcmp = functools.partial(self.cmp, self.FIELD_MAP[field], tmap = list(itertools.repeat(False, len(self._data)))
subsort=subsort, asstr=as_string) for x in self._map_filtered:
elif field == 'series': tmap[x] = True
fcmp = functools.partial(self.seriescmp, self.FIELD_MAP['series'], self._map_filtered = [x for x in self._map if tmap[x]]
self.FIELD_MAP['series_index'],
library_order=tweaks['title_series_sorting'] == 'library_order')
class SortKey(object):
def __init__(self, orders, values):
self.orders, self.values = orders, values
def __cmp__(self, other):
for i, ascending in enumerate(self.orders):
ans = cmp(self.values[i], other.values[i])
if ans != 0:
return ans * ascending
return 0
class SortKeyGenerator(object):
def __init__(self, fields, field_metadata, data):
self.field_metadata = field_metadata
self.orders = [-1 if x[1] else 1 for x in fields]
self.entries = [(x[0], field_metadata[x[0]]) for x in fields]
self.library_order = tweaks['title_series_sorting'] == 'library_order'
self.data = data
def __call__(self, record):
values = tuple(self.itervals(self.data[record]))
if len(values) == 1:
return values[0]
return SortKey(self.orders, values)
def itervals(self, record):
for name, fm in self.entries:
dt = fm['datatype']
val = record[fm['rec_index']]
if dt == 'datetime':
if val is None:
val = UNDEFINED_DATE
elif dt == 'series':
if val is None:
val = ('', 1)
else: else:
fcmp = functools.partial(self.cmp, self.FIELD_MAP[field], val = val.lower()
subsort=subsort, asstr=as_string) if self.library_order:
self._map.sort(cmp=fcmp, reverse=not ascending) val = title_sort(val)
self._map_filtered = [id for id in self._map if id in self._map_filtered] sidx_fm = self.field_metadata[name + '_index']
sidx = record[sidx_fm['rec_index']]
val = (val, sidx)
def search(self, query, return_matches=False): elif dt in ('text', 'comments'):
ans = self.search_getting_ids(query, self.search_restriction) if val is None:
if return_matches: val = ''
return ans val = val.lower()
self._map_filtered = ans yield val
# }}}
def search_getting_ids(self, query, search_restriction):
q = ''
if not query or not query.strip():
q = search_restriction
else:
q = query
if search_restriction:
q = u'%s (%s)' % (search_restriction, query)
if not q:
return list(self._map)
matches = sorted(self.parse(q))
return [id for id in self._map if id in matches]
def set_search_restriction(self, s):
self.search_restriction = s

View File

@ -311,6 +311,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
self.search_getting_ids = self.data.search_getting_ids self.search_getting_ids = self.data.search_getting_ids
self.refresh = functools.partial(self.data.refresh, self) self.refresh = functools.partial(self.data.refresh, self)
self.sort = self.data.sort self.sort = self.data.sort
self.multisort = self.data.multisort
self.index = self.data.index self.index = self.data.index
self.refresh_ids = functools.partial(self.data.refresh_ids, self) self.refresh_ids = functools.partial(self.data.refresh_ids, self)
self.row = self.data.row self.row = self.data.row

View File

@ -69,6 +69,8 @@ class FieldMetadata(dict):
VALID_DATA_TYPES = frozenset([None, 'rating', 'text', 'comments', 'datetime', VALID_DATA_TYPES = frozenset([None, 'rating', 'text', 'comments', 'datetime',
'int', 'float', 'bool', 'series']) 'int', 'float', 'bool', 'series'])
# Builtin metadata {{{
_field_metadata = [ _field_metadata = [
('authors', {'table':'authors', ('authors', {'table':'authors',
'column':'name', 'column':'name',
@ -288,6 +290,7 @@ class FieldMetadata(dict):
'is_custom':False, 'is_custom':False,
'is_category':False}), 'is_category':False}),
] ]
# }}}
# search labels that are not db columns # search labels that are not db columns
search_items = [ 'all', search_items = [ 'all',
@ -332,6 +335,9 @@ class FieldMetadata(dict):
def keys(self): def keys(self):
return self._tb_cats.keys() return self._tb_cats.keys()
def field_keys(self):
return [k for k in self._tb_cats.keys() if self._tb_cats[k]['kind']=='field']
def iterkeys(self): def iterkeys(self):
for key in self._tb_cats: for key in self._tb_cats:
yield key yield key

View File

@ -5,7 +5,7 @@ __license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import re, os, cStringIO, operator import re, os, cStringIO
import cherrypy import cherrypy
try: try:
@ -16,7 +16,15 @@ except ImportError:
from calibre import fit_image, guess_type from calibre import fit_image, guess_type
from calibre.utils.date import fromtimestamp from calibre.utils.date import fromtimestamp
from calibre.ebooks.metadata import title_sort from calibre.library.caches import SortKeyGenerator
class CSSortKeyGenerator(SortKeyGenerator):
def __init__(self, fields, fm):
SortKeyGenerator.__init__(self, fields, fm, None)
def __call__(self, record):
return self.itervals(record).next()
class ContentServer(object): class ContentServer(object):
@ -47,32 +55,12 @@ class ContentServer(object):
def sort(self, items, field, order): def sort(self, items, field, order):
field = field.lower().strip() field = self.db.data.sanitize_sort_field_name(field)
if field == 'author':
field = 'authors'
if field == 'date':
field = 'timestamp'
if field not in ('title', 'authors', 'rating', 'timestamp', 'tags', 'size', 'series'): if field not in ('title', 'authors', 'rating', 'timestamp', 'tags', 'size', 'series'):
raise cherrypy.HTTPError(400, '%s is not a valid sort field'%field) raise cherrypy.HTTPError(400, '%s is not a valid sort field'%field)
cmpf = cmp if field in ('rating', 'size', 'timestamp') else \ keyg = CSSortKeyGenerator([(field, order)], self.db.field_metadata)
lambda x, y: cmp(x.lower() if x else '', y.lower() if y else '') items.sort(key=keyg, reverse=not order)
if field == 'series':
items.sort(cmp=self.seriescmp, reverse=not order)
else:
lookup = 'sort' if field == 'title' else field
lookup = 'author_sort' if field == 'authors' else field
field = self.db.FIELD_MAP[lookup]
getter = operator.itemgetter(field)
items.sort(cmp=lambda x, y: cmpf(getter(x), getter(y)), reverse=not order)
def seriescmp(self, x, y):
si = self.db.FIELD_MAP['series']
try:
ans = cmp(title_sort(x[si].lower()), title_sort(y[si].lower()))
except AttributeError: # Some entries may be None
ans = cmp(x[si], y[si])
if ans != 0: return ans
return cmp(x[self.db.FIELD_MAP['series_index']], y[self.db.FIELD_MAP['series_index']])
# }}} # }}}

View File

@ -54,10 +54,8 @@ def shorten_components_to(length, components):
r = x[0] if x is components[-1] else '' r = x[0] if x is components[-1] else ''
else: else:
if x is components[-1]: if x is components[-1]:
b, _, e = x.rpartition('.') b, e = os.path.splitext(x)
if not b and e: if e == '.': e = ''
b = e
e = ''
r = b[:-delta]+e r = b[:-delta]+e
if r.startswith('.'): r = x[0]+r if r.startswith('.'): r = x[0]+r
else: else:

View File

@ -165,7 +165,9 @@ class Feed(object):
if delta.days*24*3600 + delta.seconds <= 24*3600*self.oldest_article: if delta.days*24*3600 + delta.seconds <= 24*3600*self.oldest_article:
self.articles.append(article) self.articles.append(article)
else: else:
self.logger.debug('Skipping article %s (%s) from feed %s as it is too old.'%(title, article.localtime.strftime('%a, %d %b, %Y %H:%M'), self.title)) t = strftime(u'%a, %d %b, %Y %H:%M', article.localtime.timetuple())
self.logger.debug('Skipping article %s (%s) from feed %s as it is too old.'%
(title, t, self.title))
d = item.get('date', '') d = item.get('date', '')
article.formatted_date = d article.formatted_date = d