diff --git a/resources/content_server/gui.js b/resources/content_server/gui.js index 631fb8b617..d0fb49cc8e 100644 --- a/resources/content_server/gui.js +++ b/resources/content_server/gui.js @@ -26,7 +26,7 @@ var current_library_request = null; ////////////////////////////// GET BOOK LIST ////////////////////////////// -var LIBRARY_FETCH_TIMEOUT = 30000; // milliseconds +var LIBRARY_FETCH_TIMEOUT = 5*60000; // milliseconds function create_table_headers() { var thead = $('table#book_list thead tr'); diff --git a/resources/default_tweaks.py b/resources/default_tweaks.py index 66ee4d1471..71bf2c6c37 100644 --- a/resources/default_tweaks.py +++ b/resources/default_tweaks.py @@ -114,3 +114,11 @@ add_new_book_tags_when_importing_books = False # Set the maximum number of tags to show per book in the content server max_content_server_tags_shown=5 + +# Set the maximum number of sort 'levels' that calibre will use to resort the +# library after certain operations such as searches or device insertion. Each +# sort level adds a performance penalty. If the database is large (thousands of +# books) the penalty might be noticeable. If you are not concerned about multi- +# level sorts, and if you are seeing a slowdown, reduce the value of this tweak. +maximum_resort_levels = 5 + diff --git a/resources/recipes/infobae.recipe b/resources/recipes/infobae.recipe index cda9bf83d2..b7f9cd3c6c 100644 --- a/resources/recipes/infobae.recipe +++ b/resources/recipes/infobae.recipe @@ -1,12 +1,8 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' -__copyright__ = '2008-2009, Darko Miletic ' +__copyright__ = '2008-2010, Darko Miletic ' ''' infobae.com ''' -import re -import urllib, urlparse from calibre.web.feeds.news import BasicNewsRecipe @@ -20,35 +16,24 @@ class Infobae(BasicNewsRecipe): max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False - language = 'es' - lang = 'es-AR' - + language = 'es' encoding = 'cp1252' - cover_url = 'http://www.infobae.com/imgs/header/header.gif' + masthead_url = 'http://www.infobae.com/imgs/header/header.gif' remove_javascript = True - preprocess_regexps = [(re.compile( - r''), lambda m:'')] - - - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' - - extra_css = ''' - .col-center{font-family:Arial,Helvetica,sans-serif;} - h1{font-family:Arial,Helvetica,sans-serif; color:#0D4261;} - .fuenteIntNota{font-family:Arial,Helvetica,sans-serif; color:#1D1D1D; font-size:x-small;} - ''' - - keep_only_tags = [dict(name='div', attrs={'class':['content']})] - - - remove_tags = [ - dict(name='div', attrs={'class':['options','col-right','controles', 'bannerLibre','tiulo-masleidas','masleidas-h']}), - dict(name='a', attrs={'name' : 'comentario',}), - dict(name='iframe'), - dict(name='img', alt = "Ver galerias de imagenes"), - - ] - + remove_empty_feeds = True + extra_css = ''' + body{font-family:Arial,Helvetica,sans-serif;} + .popUpTitulo{color:#0D4261; font-size: xx-large} + ''' + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + , 'linearize_tables' : True + } + feeds = [ (u'Noticias' , u'http://www.infobae.com/adjuntos/html/RSS/hoy.xml' ) @@ -57,39 +42,14 @@ class Infobae(BasicNewsRecipe): ,(u'Deportes' , u'http://www.infobae.com/adjuntos/html/RSS/deportes.xml' ) ] -# def print_version(self, url): -# main, sep, article_part = url.partition('contenidos/') -# article_id, rsep, rrest = article_part.partition('-') -# return u'http://www.infobae.com/notas/nota_imprimir.php?Idx=' + article_id - - def get_article_url(self, article): - ans = article.get('link').encode('utf-8') - parts = list(urlparse.urlparse(ans)) - parts[2] = urllib.quote(parts[2]) - ans = urlparse.urlunparse(parts) - return ans.decode('utf-8') - - - def preprocess_html(self, soup): - - for tag in soup.head.findAll('strong'): - tag.extract() - for tag in soup.findAll('meta'): - del tag['content'] - tag.extract() - - mtag = '\n\n' - soup.head.insert(0,mtag) - for item in soup.findAll(style=True): - del item['style'] - - return soup + def print_version(self, url): + article_part = url.rpartition('/')[2] + article_id= article_part.partition('-')[0] + return 'http://www.infobae.com/notas/nota_imprimir.php?Idx=' + article_id def postprocess_html(self, soup, first): - for tag in soup.findAll(name='strong'): tag.name = 'b' - return soup diff --git a/resources/recipes/nspm.recipe b/resources/recipes/nspm.recipe index 13ff42b277..58b782415b 100644 --- a/resources/recipes/nspm.recipe +++ b/resources/recipes/nspm.recipe @@ -6,6 +6,7 @@ nspm.rs import re from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import NavigableString class Nspm(BasicNewsRecipe): title = 'Nova srpska politicka misao' @@ -21,6 +22,7 @@ class Nspm(BasicNewsRecipe): encoding = 'utf-8' language = 'sr' delay = 2 + remove_empty_feeds = True publication_type = 'magazine' masthead_url = 'http://www.nspm.rs/templates/jsn_epic_pro/images/logol.jpg' extra_css = """ @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @@ -45,8 +47,9 @@ class Nspm(BasicNewsRecipe): dict(name=['link','object','embed','script','meta','base','iframe']) ,dict(attrs={'class':'buttonheading'}) ] - remove_tags_after = dict(attrs={'class':'article_separator'}) - remove_attributes = ['width','height'] + remove_tags_before = dict(attrs={'class':'contentheading'}) + remove_tags_after = dict(attrs={'class':'article_separator'}) + remove_attributes = ['width','height'] def get_browser(self): br = BasicNewsRecipe.get_browser() @@ -67,4 +70,8 @@ class Nspm(BasicNewsRecipe): def preprocess_html(self, soup): for item in soup.body.findAll(style=True): del item['style'] + for item in soup.body.findAll('h1'): + nh = NavigableString(item.a.string) + item.a.extract() + item.insert(0,nh) return self.adeify_images(soup) diff --git a/resources/recipes/xkcd.recipe b/resources/recipes/xkcd.recipe index 312027004e..ad0d420deb 100644 --- a/resources/recipes/xkcd.recipe +++ b/resources/recipes/xkcd.recipe @@ -24,18 +24,18 @@ class XkcdCom(BasicNewsRecipe): (re.compile(r'()'), lambda m: '%s%s

%s

' % (m.group(1), m.group(3), m.group(2))) ] - + def parse_index(self): INDEX = 'http://xkcd.com/archive/' - soup = self.index_to_soup(INDEX) + soup = self.index_to_soup(INDEX) articles = [] for item in soup.findAll('a', title=True): articles.append({ 'date': item['title'], 'timestamp': time.mktime(time.strptime(item['title'], '%Y-%m-%d'))+1, 'url': 'http://xkcd.com' + item['href'], - 'title': self.tag_to_string(item).encode('UTF-8'), + 'title': self.tag_to_string(item), 'description': '', 'content': '', }) diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index 4c87236e71..68df832048 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -459,7 +459,7 @@ from calibre.devices.iriver.driver import IRIVER_STORY from calibre.devices.binatone.driver import README from calibre.devices.hanvon.driver import N516, EB511, ALEX, AZBOOKA, THEBOOK from calibre.devices.edge.driver import EDGE -from calibre.devices.teclast.driver import TECLAST_K3, NEWSMY, IPAPYRUS +from calibre.devices.teclast.driver import TECLAST_K3, NEWSMY, IPAPYRUS, SOVOS from calibre.devices.sne.driver import SNE from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL, KOGAN, GEMEI from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG @@ -557,6 +557,7 @@ plugins += [ TECLAST_K3, NEWSMY, IPAPYRUS, + SOVOS, EDGE, SNE, ALEX, diff --git a/src/calibre/devices/teclast/driver.py b/src/calibre/devices/teclast/driver.py index 0c60a367cf..2055ff9306 100644 --- a/src/calibre/devices/teclast/driver.py +++ b/src/calibre/devices/teclast/driver.py @@ -52,3 +52,14 @@ class IPAPYRUS(TECLAST_K3): VENDOR_NAME = 'E_READER' WINDOWS_MAIN_MEM = '' +class SOVOS(TECLAST_K3): + + name = 'Sovos device interface' + gui_name = 'Sovos' + description = _('Communicate with the Sovos reader.') + + FORMATS = ['epub', 'fb2', 'pdf', 'txt'] + + VENDOR_NAME = 'RK28XX' + WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'USB-MSC' + diff --git a/src/calibre/ebooks/chm/reader.py b/src/calibre/ebooks/chm/reader.py index 67a2d36607..831c16bf6a 100644 --- a/src/calibre/ebooks/chm/reader.py +++ b/src/calibre/ebooks/chm/reader.py @@ -132,7 +132,11 @@ class CHMReader(CHMFile): for path in self.Contents(): lpath = os.path.join(output_dir, path) self._ensure_dir(lpath) - data = self.GetFile(path) + try: + data = self.GetFile(path) + except: + self.log.exception('Failed to extract %s from CHM, ignoring'%path) + continue if lpath.find(';') != -1: # fix file names with ";" at the end, see _reformat() lpath = lpath.split(';')[0] diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index 9464be1210..7742a20a21 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -168,6 +168,17 @@ class HTMLPreProcessor(object): (re.compile(u'`\s*()*\s*O', re.UNICODE), lambda match: u'Ò'), (re.compile(u'`\s*()*\s*u', re.UNICODE), lambda match: u'ù'), (re.compile(u'`\s*()*\s*U', re.UNICODE), lambda match: u'Ù'), + # ` with letter before + (re.compile(u'a\s*()*\s*`', re.UNICODE), lambda match: u'à'), + (re.compile(u'A\s*()*\s*`', re.UNICODE), lambda match: u'À'), + (re.compile(u'e\s*()*\s*`', re.UNICODE), lambda match: u'è'), + (re.compile(u'E\s*()*\s*`', re.UNICODE), lambda match: u'È'), + (re.compile(u'i\s*()*\s*`', re.UNICODE), lambda match: u'ì'), + (re.compile(u'I\s*()*\s*`', re.UNICODE), lambda match: u'Ì'), + (re.compile(u'o\s*()*\s*`', re.UNICODE), lambda match: u'ò'), + (re.compile(u'O\s*()*\s*`', re.UNICODE), lambda match: u'Ò'), + (re.compile(u'u\s*()*\s*`', re.UNICODE), lambda match: u'ù'), + (re.compile(u'U\s*()*\s*`', re.UNICODE), lambda match: u'Ù'), # ´ (re.compile(u'´\s*()*\s*a', re.UNICODE), lambda match: u'á'), diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py index ecf030b27d..5301f70a16 100644 --- a/src/calibre/ebooks/conversion/utils.py +++ b/src/calibre/ebooks/conversion/utils.py @@ -10,24 +10,23 @@ from calibre.ebooks.conversion.preprocess import line_length from calibre.utils.logging import default_log class PreProcessor(object): - html_preprocess_sections = 0 - found_indents = 0 - def __init__(self, args): - self.args = args - self.log = default_log - + def __init__(self, log=None): + self.log = default_log if log is None else log + self.html_preprocess_sections = 0 + self.found_indents = 0 + def chapter_head(self, match): chap = match.group('chap') title = match.group('title') if not title: - self.html_preprocess_sections = self.html_preprocess_sections + 1 - self.log("found " + str(self.html_preprocess_sections) + " chapters. - " + str(chap)) - return '

'+chap+'

\n' + self.html_preprocess_sections = self.html_preprocess_sections + 1 + self.log("found " + str(self.html_preprocess_sections) + " chapters. - " + str(chap)) + return '

'+chap+'

\n' else: - self.html_preprocess_sections = self.html_preprocess_sections + 1 - self.log("found " + str(self.html_preprocess_sections) + " chapters & titles. - " + str(chap) + ", " + str(title)) - return '

'+chap+'

\n

'+title+'

\n' + self.html_preprocess_sections = self.html_preprocess_sections + 1 + self.log("found " + str(self.html_preprocess_sections) + " chapters & titles. - " + str(chap) + ", " + str(title)) + return '

'+chap+'

\n

'+title+'

\n' def chapter_break(self, match): chap = match.group('section') @@ -35,7 +34,7 @@ class PreProcessor(object): self.html_preprocess_sections = self.html_preprocess_sections + 1 self.log("marked " + str(self.html_preprocess_sections) + " section markers based on punctuation. - " + str(chap)) return '<'+styles+' style="page-break-before:always">'+chap - + def insert_indent(self, match): pstyle = match.group('formatting') span = match.group('span') @@ -50,11 +49,11 @@ class PreProcessor(object): return '

' else: return '

'+span - + def no_markup(self, raw, percent): ''' - Detects total marked up line endings in the file. raw is the text to - inspect. Percent is the minimum percent of line endings which should + Detects total marked up line endings in the file. raw is the text to + inspect. Percent is the minimum percent of line endings which should be marked up to return true. ''' htm_end_ere = re.compile('

', re.DOTALL) @@ -68,13 +67,13 @@ class PreProcessor(object): if percent > 1: percent = 1 if percent < 0: - percent = 0 - + percent = 0 + min_lns = tot_ln_fds * percent self.log("There must be fewer than " + str(min_lns) + " unmarked lines to add markup") if min_lns > tot_htm_ends: return True - + def __call__(self, html): self.log("********* Preprocessing HTML *********") # Replace series of non-breaking spaces with text-indent @@ -88,7 +87,7 @@ class PreProcessor(object): html = re.sub(ur'\s*\s*', ' ', html) # Get rid of empty span tags html = re.sub(r"\s*]*>\s*", " ", html) - + # If more than 40% of the lines are empty paragraphs then delete them to clean up spacing linereg = re.compile('(?<=)', re.IGNORECASE|re.DOTALL) blankreg = re.compile(r'\s*]*>\s*(<(b|i|u)>)?\s*()?\s*

', re.IGNORECASE) @@ -102,19 +101,19 @@ class PreProcessor(object): # Arrange line feeds and

tags so the line_length and no_markup functions work correctly html = re.sub(r"\s*

", "

\n", html) html = re.sub(r"\s*

\s*", "\n

", html) - - # some lit files don't have any

tags or equivalent (generally just plain text between + + # some lit files don't have any

tags or equivalent (generally just plain text between #

 tags), check and  mark up line endings if required before proceeding
         if self.no_markup(html, 0.1):
              self.log("not enough paragraph markers, adding now")
              add_markup = re.compile('(?)(\n)')
              html = add_markup.sub('

\n

', html) - + # detect chapters/sections to match xpath or splitting logic heading = re.compile(']*>', re.IGNORECASE) self.html_preprocess_sections = len(heading.findall(html)) self.log("found " + str(self.html_preprocess_sections) + " pre-existing headings") - # + # # Start with most typical chapter headings, get more aggressive until one works if self.html_preprocess_sections < 10: chapdetect = re.compile(r'(?=]*>)\s*(<[ibu]>){0,2}\s*(]*>)?\s*(?P(<[ibu]>){0,2}s*(]*>)?\s*.?(Introduction|Synopsis|Acknowledgements|Chapter|Epilogue|Volume|Prologue|Book\s|Part\s|Dedication)\s*([\d\w-]+\:?\s*){0,8}\s*(){0,2})\s*()?s*()?\s*(){0,2}\s*()\s*(<(/?br|p)[^>]*>\s*(<[ibu]>){0,2}\s*(]*>)?\s*(?P(<[ibu]>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.IGNORECASE) @@ -122,18 +121,18 @@ class PreProcessor(object): if self.html_preprocess_sections < 10: self.log("not enough chapters, only " + str(self.html_preprocess_sections) + ", trying numeric chapters") chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<[ibu]>){0,2}\s*.?(\d+\.?|(CHAPTER\s*([\dA-Z\-\'\"\?\.!#,]+\s*){1,10}))\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<[ibu]>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.UNICODE) - html = chapdetect2.sub(self.chapter_head, html) + html = chapdetect2.sub(self.chapter_head, html) if self.html_preprocess_sections < 10: self.log("not enough chapters, only " + str(self.html_preprocess_sections) + ", trying with uppercase words") chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<[ibu]>){0,2}\s*.?(([A-Z#-]+\s*){1,9})\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<[ibu]>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.UNICODE) html = chapdetect2.sub(self.chapter_head, html) - + # Unwrap lines - # + # self.log("Unwrapping Lines") # Some OCR sourced files have line breaks in the html using a combination of span & p tags - # span are used for hard line breaks, p for new paragraphs. Determine which is used so + # span are used for hard line breaks, p for new paragraphs. Determine which is used so # that lines can be un-wrapped across page boundaries paras_reg = re.compile('<p[^>]*>', re.IGNORECASE) spans_reg = re.compile('<span[^>]*>', re.IGNORECASE) @@ -146,7 +145,7 @@ class PreProcessor(object): format = 'html' else: format = 'html' - + # Calculate Length length = line_length(format, html, 0.4) self.log("*** Median line length is " + str(length) + ",calculated with " + format + " format ***") @@ -154,8 +153,8 @@ class PreProcessor(object): # Unwrap and/or delete soft-hyphens, hyphens html = re.sub(u'­\s*(</span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*', '', html) html = re.sub(u'(?<=[-–—])\s*(?=<)(</span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*(?=[[a-z\d])', '', html) - - # Unwrap lines using punctation if the median length of all lines is less than 200 + + # Unwrap lines using punctation if the median length of all lines is less than 200 unwrap = re.compile(r"(?<=.{%i}[a-z,;:\IA])\s*</(span|p|div)>\s*(</(p|span|div)>)?\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*" % length, re.UNICODE) html = unwrap.sub(' ', html) @@ -164,11 +163,11 @@ class PreProcessor(object): self.log("Looking for more split points based on punctuation, currently have " + str(self.html_preprocess_sections)) #self.log(html) chapdetect3 = re.compile(r'<(?P<styles>(p|div)[^>]*)>\s*(?P<section>(<span[^>]*>)?\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*.?([a-z#-*]+\s*){1,5}\s*\s*(</span>)?(</[ibu]>){0,2}\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</span>)?\s*</(p|div)>)', re.IGNORECASE) - html = chapdetect3.sub(self.chapter_break, html) + html = chapdetect3.sub(self.chapter_break, html) # search for places where a first or second level heading is immediately followed by another # top level heading. demote the second heading to h3 to prevent splitting between chapter # headings and titles, images, etc doubleheading = re.compile(r'(?P<firsthead><h(1|2)[^>]*>.+?</h(1|2)>\s*(<(?!h\d)[^>]*>\s*)*)<h(1|2)(?P<secondhead>[^>]*>.+?)</h(1|2)>', re.IGNORECASE) html = doubleheading.sub('\g<firsthead>'+'<h3'+'\g<secondhead>'+'</h3>', html) - - return html \ No newline at end of file + + return html diff --git a/src/calibre/ebooks/html/input.py b/src/calibre/ebooks/html/input.py index e83216ae1f..084d48e54b 100644 --- a/src/calibre/ebooks/html/input.py +++ b/src/calibre/ebooks/html/input.py @@ -491,6 +491,6 @@ class HTMLInput(InputFormatPlugin): return (None, raw) def preprocess_html(self, html): - preprocessor = PreProcessor(html) - html = preprocessor(html) - return html + preprocessor = PreProcessor(log=getattr(self, 'log', None)) + return preprocessor(html) + diff --git a/src/calibre/ebooks/lit/input.py b/src/calibre/ebooks/lit/input.py index 58e7bc84bf..65f5c607a2 100644 --- a/src/calibre/ebooks/lit/input.py +++ b/src/calibre/ebooks/lit/input.py @@ -54,7 +54,6 @@ class LITInput(InputFormatPlugin): def preprocess_html(self, html): - preprocessor = PreProcessor(html) - html = preprocessor(html) - return html + preprocessor = PreProcessor(log=getattr(self, 'log', None)) + return preprocessor(html) diff --git a/src/calibre/ebooks/oeb/transforms/flatcss.py b/src/calibre/ebooks/oeb/transforms/flatcss.py index f48bdb9934..ffdc641d1e 100644 --- a/src/calibre/ebooks/oeb/transforms/flatcss.py +++ b/src/calibre/ebooks/oeb/transforms/flatcss.py @@ -138,6 +138,7 @@ class CSSFlattener(object): float(self.context.margin_left)) bs.append('margin-right : %fpt'%\ float(self.context.margin_right)) + bs.extend(['padding-left: 0pt', 'padding-right: 0pt']) if self.context.change_justification != 'original': bs.append('text-align: '+ self.context.change_justification) body.set('style', '; '.join(bs)) diff --git a/src/calibre/ebooks/pml/pmlconverter.py b/src/calibre/ebooks/pml/pmlconverter.py index 166695ff5c..b0fc15197a 100644 --- a/src/calibre/ebooks/pml/pmlconverter.py +++ b/src/calibre/ebooks/pml/pmlconverter.py @@ -207,6 +207,7 @@ class PML_HTMLizer(object): while html != old: old = html html = self.cleanup_html_remove_redundant(html) + html = re.sub(r'(?imu)^\s*', '', html) return html def cleanup_html_remove_redundant(self, html): @@ -216,7 +217,7 @@ class PML_HTMLizer(object): html = re.sub(r'(?u)%s\s*%s' % (open % '.*?', close), '', html) else: html = re.sub(r'(?u)%s\s*%s' % (open, close), '', html) - html = re.sub(r'<p>\s*</p>', '', html) + html = re.sub(r'(?imu)<p>\s*</p>', '', html) return html def start_line(self): @@ -556,7 +557,7 @@ class PML_HTMLizer(object): text = t else: self.toc.add_item(os.path.basename(self.file_name), id, value) - text = '<span id="%s"></span>%s' % (id, t) + text = '%s<span id="%s"></span>' % (t, id) elif c == 'm': empty = False src = self.code_value(line) diff --git a/src/calibre/ebooks/rtf/input.py b/src/calibre/ebooks/rtf/input.py index d229b80c16..000c603c1c 100644 --- a/src/calibre/ebooks/rtf/input.py +++ b/src/calibre/ebooks/rtf/input.py @@ -7,7 +7,6 @@ import os, glob, re, textwrap from lxml import etree from calibre.customize.conversion import InputFormatPlugin -from calibre.ebooks.conversion.preprocess import line_length from calibre.ebooks.conversion.utils import PreProcessor class InlineClass(etree.XSLTExtension): @@ -230,7 +229,7 @@ class RTFInput(InputFormatPlugin): res = transform.tostring(result) res = res[:100].replace('xmlns:html', 'xmlns') + res[100:] if self.options.preprocess_html: - preprocessor = PreProcessor(res) + preprocessor = PreProcessor(log=getattr(self, 'log', None)) res = preprocessor(res) f.write(res) self.write_inline_css(inline_class) diff --git a/src/calibre/ebooks/txt/processor.py b/src/calibre/ebooks/txt/processor.py index a12e8a0761..dac1e34df7 100644 --- a/src/calibre/ebooks/txt/processor.py +++ b/src/calibre/ebooks/txt/processor.py @@ -77,7 +77,7 @@ def separate_paragraphs_print_formatted(txt): def preserve_spaces(txt): txt = txt.replace(' ', ' ') - txt = txt.replace('\t', ' ') + txt = txt.replace('\t', '    ') return txt def opf_writer(path, opf_name, manifest, spine, mi): diff --git a/src/calibre/gui2/actions/edit_metadata.py b/src/calibre/gui2/actions/edit_metadata.py index f0232d9859..878ba77a43 100644 --- a/src/calibre/gui2/actions/edit_metadata.py +++ b/src/calibre/gui2/actions/edit_metadata.py @@ -209,8 +209,9 @@ class EditMetadataAction(InterfaceAction): dest_id, src_books, src_ids = self.books_to_merge(rows) if safe_merge: if not confirm('<p>'+_( - 'All book formats and metadata from the selected books ' - 'will be added to the <b>first selected book.</b><br><br> ' + 'Book formats and metadata from the selected books ' + 'will be added to the <b>first selected book.</b> ' + 'ISBN will <i>not</i> be merged.<br><br> ' 'The second and subsequently selected books will not ' 'be deleted or changed.<br><br>' 'Please confirm you want to proceed.') @@ -220,8 +221,9 @@ class EditMetadataAction(InterfaceAction): self.merge_metadata(dest_id, src_ids) else: if not confirm('<p>'+_( - 'All book formats and metadata from the selected books will be merged ' - 'into the <b>first selected book</b>.<br><br>' + 'Book formats and metadata from the selected books will be merged ' + 'into the <b>first selected book</b>. ' + 'ISBN will <i>not</i> be merged.<br><br>' 'After merger the second and ' 'subsequently selected books will be <b>deleted</b>. <br><br>' 'All book formats of the first selected book will be kept ' diff --git a/src/calibre/gui2/library/models.py b/src/calibre/gui2/library/models.py index bb47508531..c746a5aa56 100644 --- a/src/calibre/gui2/library/models.py +++ b/src/calibre/gui2/library/models.py @@ -121,10 +121,8 @@ class BooksModel(QAbstractTableModel): # {{{ def set_device_connected(self, is_connected): self.device_connected = is_connected self.db.refresh_ondevice() - self.refresh() + self.refresh() # does a resort() self.research() - if is_connected and self.sorted_on[0] == 'ondevice': - self.resort() def set_book_on_device_func(self, func): self.book_on_device = func @@ -264,19 +262,15 @@ class BooksModel(QAbstractTableModel): # {{{ self.sorting_done.emit(self.db.index) def refresh(self, reset=True): - try: - col = self.column_map.index(self.sorted_on[0]) - except: - col = 0 self.db.refresh(field=None) - self.sort(col, self.sorted_on[1], reset=reset) + self.resort(reset=reset) def resort(self, reset=True): - try: - col = self.column_map.index(self.sorted_on[0]) - except ValueError: - col = 0 - self.sort(col, self.sorted_on[1], reset=reset) + if not self.db: + return + self.db.multisort(self.sort_history[:tweaks['maximum_resort_levels']]) + if reset: + self.reset() def research(self, reset=True): self.search(self.last_search, reset=reset) @@ -1030,6 +1024,11 @@ class DeviceBooksModel(BooksModel): # {{{ if reset: self.reset() + def resort(self, reset=True): + if self.sorted_on: + self.sort(self.column_map.index(self.sorted_on[0]), + self.sorted_on[1], reset=reset) + def columnCount(self, parent): if parent and parent.isValid(): return 0 diff --git a/src/calibre/library/caches.py b/src/calibre/library/caches.py index b9c1211c7f..4f795ab733 100644 --- a/src/calibre/library/caches.py +++ b/src/calibre/library/caches.py @@ -6,7 +6,7 @@ __license__ = 'GPL v3' __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>' __docformat__ = 'restructuredtext en' -import re, itertools, functools +import re, itertools from itertools import repeat from datetime import timedelta from threading import Thread, RLock @@ -112,7 +112,7 @@ class ResultCache(SearchQueryParser): ''' def __init__(self, FIELD_MAP, field_metadata): self.FIELD_MAP = FIELD_MAP - self._map = self._map_filtered = self._data = [] + self._map = self._data = self._map_filtered = [] self.first_sort = True self.search_restriction = '' self.field_metadata = field_metadata @@ -141,6 +141,8 @@ class ResultCache(SearchQueryParser): for x in self.iterall(): yield x[idx] + # Search functions {{{ + def universal_set(self): return set([i[0] for i in self._data if i is not None]) @@ -462,12 +464,43 @@ class ResultCache(SearchQueryParser): continue return matches + def search(self, query, return_matches=False): + ans = self.search_getting_ids(query, self.search_restriction) + if return_matches: + return ans + self._map_filtered = ans + + def search_getting_ids(self, query, search_restriction): + q = '' + if not query or not query.strip(): + q = search_restriction + else: + q = query + if search_restriction: + q = u'%s (%s)' % (search_restriction, query) + if not q: + return list(self._map) + matches = self.parse(q) + tmap = list(itertools.repeat(False, len(self._data))) + for x in matches: + tmap[x] = True + return [x for x in self._map if tmap[x]] + + def set_search_restriction(self, s): + self.search_restriction = s + + # }}} + def remove(self, id): self._data[id] = None - if id in self._map: + try: self._map.remove(id) - if id in self._map_filtered: + except ValueError: + pass + try: self._map_filtered.remove(id) + except ValueError: + pass def set(self, row, col, val, row_is_id=False): id = row if row_is_id else self._map_filtered[row] @@ -522,9 +555,7 @@ class ResultCache(SearchQueryParser): def books_deleted(self, ids): for id in ids: - self._data[id] = None - if id in self._map: self._map.remove(id) - if id in self._map_filtered: self._map_filtered.remove(id) + self.remove(id) def count(self): return len(self._map) @@ -549,90 +580,97 @@ class ResultCache(SearchQueryParser): self.sort(field, ascending) self._map_filtered = list(self._map) if self.search_restriction: - self.search('', return_matches=False, ignore_search_restriction=False) + self.search('', return_matches=False) - def seriescmp(self, sidx, siidx, x, y, library_order=None): - try: - if library_order: - ans = cmp(title_sort(self._data[x][sidx].lower()), - title_sort(self._data[y][sidx].lower())) - else: - ans = cmp(self._data[x][sidx].lower(), - self._data[y][sidx].lower()) - except AttributeError: # Some entries may be None - ans = cmp(self._data[x][sidx], self._data[y][sidx]) - if ans != 0: return ans - return cmp(self._data[x][siidx], self._data[y][siidx]) + # Sorting functions {{{ - def cmp(self, loc, x, y, asstr=True, subsort=False): - try: - ans = cmp(self._data[x][loc].lower(), self._data[y][loc].lower()) if \ - asstr else cmp(self._data[x][loc], self._data[y][loc]) - except AttributeError: # Some entries may be None - ans = cmp(self._data[x][loc], self._data[y][loc]) - except TypeError: ## raised when a datetime is None - x = self._data[x][loc] - if x is None: - x = UNDEFINED_DATE - y = self._data[y][loc] - if y is None: - y = UNDEFINED_DATE - return cmp(x, y) - if subsort and ans == 0: - return cmp(self._data[x][11].lower(), self._data[y][11].lower()) - return ans + def sanitize_sort_field_name(self, field): + field = field.lower().strip() + if field not in self.field_metadata.iterkeys(): + if field in ('author', 'tag', 'comment'): + field += 's' + if field == 'date': field = 'timestamp' + elif field == 'title': field = 'sort' + elif field == 'authors': field = 'author_sort' + return field def sort(self, field, ascending, subsort=False): - field = field.lower().strip() - if field in ('author', 'tag', 'comment'): - field += 's' - if field == 'date': field = 'timestamp' - elif field == 'title': field = 'sort' - elif field == 'authors': field = 'author_sort' - as_string = field not in ('size', 'rating', 'timestamp') + self.multisort([(field, ascending)]) - if self.first_sort: - subsort = True - self.first_sort = False - if self.field_metadata[field]['is_custom']: - if self.field_metadata[field]['datatype'] == 'series': - fcmp = functools.partial(self.seriescmp, - self.field_metadata[field]['rec_index'], - self.field_metadata.cc_series_index_column_for(field), - library_order=tweaks['title_series_sorting'] == 'library_order') - else: - as_string = self.field_metadata[field]['datatype'] in ('comments', 'text') - field = self.field_metadata[field]['colnum'] - fcmp = functools.partial(self.cmp, self.FIELD_MAP[field], - subsort=subsort, asstr=as_string) - elif field == 'series': - fcmp = functools.partial(self.seriescmp, self.FIELD_MAP['series'], - self.FIELD_MAP['series_index'], - library_order=tweaks['title_series_sorting'] == 'library_order') + def multisort(self, fields=[], subsort=False): + fields = [(self.sanitize_sort_field_name(x), bool(y)) for x, y in fields] + keys = self.field_metadata.field_keys() + fields = [x for x in fields if x[0] in keys] + if subsort and 'sort' not in [x[0] for x in fields]: + fields += [('sort', True)] + if not fields: + fields = [('timestamp', False)] + + keyg = SortKeyGenerator(fields, self.field_metadata, self._data) + if len(fields) == 1: + self._map.sort(key=keyg, reverse=not fields[0][1]) else: - fcmp = functools.partial(self.cmp, self.FIELD_MAP[field], - subsort=subsort, asstr=as_string) - self._map.sort(cmp=fcmp, reverse=not ascending) - self._map_filtered = [id for id in self._map if id in self._map_filtered] + self._map.sort(key=keyg) - def search(self, query, return_matches=False): - ans = self.search_getting_ids(query, self.search_restriction) - if return_matches: - return ans - self._map_filtered = ans + tmap = list(itertools.repeat(False, len(self._data))) + for x in self._map_filtered: + tmap[x] = True + self._map_filtered = [x for x in self._map if tmap[x]] + + +class SortKey(object): + + def __init__(self, orders, values): + self.orders, self.values = orders, values + + def __cmp__(self, other): + for i, ascending in enumerate(self.orders): + ans = cmp(self.values[i], other.values[i]) + if ans != 0: + return ans * ascending + return 0 + +class SortKeyGenerator(object): + + def __init__(self, fields, field_metadata, data): + self.field_metadata = field_metadata + self.orders = [-1 if x[1] else 1 for x in fields] + self.entries = [(x[0], field_metadata[x[0]]) for x in fields] + self.library_order = tweaks['title_series_sorting'] == 'library_order' + self.data = data + + def __call__(self, record): + values = tuple(self.itervals(self.data[record])) + if len(values) == 1: + return values[0] + return SortKey(self.orders, values) + + def itervals(self, record): + for name, fm in self.entries: + dt = fm['datatype'] + val = record[fm['rec_index']] + + if dt == 'datetime': + if val is None: + val = UNDEFINED_DATE + + elif dt == 'series': + if val is None: + val = ('', 1) + else: + val = val.lower() + if self.library_order: + val = title_sort(val) + sidx_fm = self.field_metadata[name + '_index'] + sidx = record[sidx_fm['rec_index']] + val = (val, sidx) + + elif dt in ('text', 'comments'): + if val is None: + val = '' + val = val.lower() + yield val + + # }}} - def search_getting_ids(self, query, search_restriction): - q = '' - if not query or not query.strip(): - q = search_restriction - else: - q = query - if search_restriction: - q = u'%s (%s)' % (search_restriction, query) - if not q: - return list(self._map) - matches = sorted(self.parse(q)) - return [id for id in self._map if id in matches] - def set_search_restriction(self, s): - self.search_restriction = s diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index 4106f8c965..8a5ab75c3c 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -311,6 +311,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): self.search_getting_ids = self.data.search_getting_ids self.refresh = functools.partial(self.data.refresh, self) self.sort = self.data.sort + self.multisort = self.data.multisort self.index = self.data.index self.refresh_ids = functools.partial(self.data.refresh_ids, self) self.row = self.data.row diff --git a/src/calibre/library/field_metadata.py b/src/calibre/library/field_metadata.py index 66cdee51f0..276a6ba971 100644 --- a/src/calibre/library/field_metadata.py +++ b/src/calibre/library/field_metadata.py @@ -69,6 +69,8 @@ class FieldMetadata(dict): VALID_DATA_TYPES = frozenset([None, 'rating', 'text', 'comments', 'datetime', 'int', 'float', 'bool', 'series']) + # Builtin metadata {{{ + _field_metadata = [ ('authors', {'table':'authors', 'column':'name', @@ -287,7 +289,8 @@ class FieldMetadata(dict): 'search_terms':[], 'is_custom':False, 'is_category':False}), - ] + ] + # }}} # search labels that are not db columns search_items = [ 'all', @@ -332,6 +335,9 @@ class FieldMetadata(dict): def keys(self): return self._tb_cats.keys() + def field_keys(self): + return [k for k in self._tb_cats.keys() if self._tb_cats[k]['kind']=='field'] + def iterkeys(self): for key in self._tb_cats: yield key diff --git a/src/calibre/library/server/content.py b/src/calibre/library/server/content.py index 6784abd8f4..ecb467b4c2 100644 --- a/src/calibre/library/server/content.py +++ b/src/calibre/library/server/content.py @@ -5,7 +5,7 @@ __license__ = 'GPL v3' __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>' __docformat__ = 'restructuredtext en' -import re, os, cStringIO, operator +import re, os, cStringIO import cherrypy try: @@ -16,7 +16,15 @@ except ImportError: from calibre import fit_image, guess_type from calibre.utils.date import fromtimestamp -from calibre.ebooks.metadata import title_sort +from calibre.library.caches import SortKeyGenerator + +class CSSortKeyGenerator(SortKeyGenerator): + + def __init__(self, fields, fm): + SortKeyGenerator.__init__(self, fields, fm, None) + + def __call__(self, record): + return self.itervals(record).next() class ContentServer(object): @@ -47,32 +55,12 @@ class ContentServer(object): def sort(self, items, field, order): - field = field.lower().strip() - if field == 'author': - field = 'authors' - if field == 'date': - field = 'timestamp' + field = self.db.data.sanitize_sort_field_name(field) if field not in ('title', 'authors', 'rating', 'timestamp', 'tags', 'size', 'series'): raise cherrypy.HTTPError(400, '%s is not a valid sort field'%field) - cmpf = cmp if field in ('rating', 'size', 'timestamp') else \ - lambda x, y: cmp(x.lower() if x else '', y.lower() if y else '') - if field == 'series': - items.sort(cmp=self.seriescmp, reverse=not order) - else: - lookup = 'sort' if field == 'title' else field - lookup = 'author_sort' if field == 'authors' else field - field = self.db.FIELD_MAP[lookup] - getter = operator.itemgetter(field) - items.sort(cmp=lambda x, y: cmpf(getter(x), getter(y)), reverse=not order) + keyg = CSSortKeyGenerator([(field, order)], self.db.field_metadata) + items.sort(key=keyg, reverse=not order) - def seriescmp(self, x, y): - si = self.db.FIELD_MAP['series'] - try: - ans = cmp(title_sort(x[si].lower()), title_sort(y[si].lower())) - except AttributeError: # Some entries may be None - ans = cmp(x[si], y[si]) - if ans != 0: return ans - return cmp(x[self.db.FIELD_MAP['series_index']], y[self.db.FIELD_MAP['series_index']]) # }}} diff --git a/src/calibre/utils/filenames.py b/src/calibre/utils/filenames.py index 9fd57ab53c..47ccbe73c2 100644 --- a/src/calibre/utils/filenames.py +++ b/src/calibre/utils/filenames.py @@ -54,10 +54,8 @@ def shorten_components_to(length, components): r = x[0] if x is components[-1] else '' else: if x is components[-1]: - b, _, e = x.rpartition('.') - if not b and e: - b = e - e = '' + b, e = os.path.splitext(x) + if e == '.': e = '' r = b[:-delta]+e if r.startswith('.'): r = x[0]+r else: diff --git a/src/calibre/web/feeds/__init__.py b/src/calibre/web/feeds/__init__.py index a70cf8b664..8aef350498 100644 --- a/src/calibre/web/feeds/__init__.py +++ b/src/calibre/web/feeds/__init__.py @@ -165,7 +165,9 @@ class Feed(object): if delta.days*24*3600 + delta.seconds <= 24*3600*self.oldest_article: self.articles.append(article) else: - self.logger.debug('Skipping article %s (%s) from feed %s as it is too old.'%(title, article.localtime.strftime('%a, %d %b, %Y %H:%M'), self.title)) + t = strftime(u'%a, %d %b, %Y %H:%M', article.localtime.timetuple()) + self.logger.debug('Skipping article %s (%s) from feed %s as it is too old.'% + (title, t, self.title)) d = item.get('date', '') article.formatted_date = d