diff --git a/recipes/cracked_com.recipe b/recipes/cracked_com.recipe index 49ed9d2279..829299ae17 100644 --- a/recipes/cracked_com.recipe +++ b/recipes/cracked_com.recipe @@ -1,83 +1,63 @@ from calibre.web.feeds.news import BasicNewsRecipe -import re class Cracked(BasicNewsRecipe): title = u'Cracked.com' - __author__ = u'Nudgenudge' + __author__ = 'UnWeave' language = 'en' - description = 'America''s Only Humor and Video Site, since 1958' + description = "America's Only HumorSite since 1958" publisher = 'Cracked' category = 'comedy, lists' - oldest_article = 2 - delay = 10 - max_articles_per_feed = 2 + oldest_article = 3 #days + max_articles_per_feed = 100 no_stylesheets = True - encoding = 'cp1252' + encoding = 'ascii' remove_javascript = True use_embedded_content = False - INDEX = u'http://www.cracked.com' - extra_css = """ - .pageheader_type{font-size: x-large; font-weight: bold; color: #828D74} - .pageheader_title{font-size: xx-large; color: #394128} - .pageheader_byline{font-size: small; font-weight: bold; color: #394128} - .score_bg {display: inline; width: 100%; margin-bottom: 2em} - .score_column_1{ padding-left: 10px; font-size: small; width: 50%} - .score_column_2{ padding-left: 10px; font-size: small; width: 50%} - .score_column_3{ padding-left: 10px; font-size: small; width: 50%} - .score_header{font-size: large; color: #50544A} - .bodytext{display: block} - body{font-family: Helvetica,Arial,sans-serif} - """ + + feeds = [ (u'Articles', u'http://feeds.feedburner.com/CrackedRSS/') ] conversion_options = { 'comment' : description , 'tags' : category , 'publisher' : publisher , 'language' : language - , 'linearize_tables' : True } - keep_only_tags = [ - dict(name='div', attrs={'class':['Column1']}) - ] + remove_tags_before = dict(id='PrimaryContent') - feeds = [(u'Articles', u'http://feeds.feedburner.com/CrackedRSS')] + remove_tags_after = dict(name='div', attrs={'class':'shareBar'}) - def get_article_url(self, article): - return article.get('guid', None) + remove_tags = [ dict(name='div', attrs={'class':['social', + 'FacebookLike', + 'shareBar' + ]}), - def cleanup_page(self, soup): - for item in soup.findAll(style=True): - del item['style'] - for alink in soup.findAll('a'): - if alink.string is not None: - tstr = alink.string - alink.replaceWith(tstr) - for div_to_remove in soup.findAll('div', attrs={'id':['googlead_1','fb-like-article','comments_section']}): - div_to_remove.extract() - for div_to_remove in soup.findAll('div', attrs={'class':['share_buttons_col_1','GenericModule1']}): - div_to_remove.extract() - for div_to_remove in soup.findAll('div', attrs={'class':re.compile("prev_next")}): - div_to_remove.extract() - for ul_to_remove in soup.findAll('ul', attrs={'class':['Nav6']}): - ul_to_remove.extract() - for image in soup.findAll('img', attrs={'alt': 'article image'}): - image.extract() + dict(name='div', attrs={'id':['inline-share-buttons', + ]}), - def append_page(self, soup, appendtag, position): - pager = soup.find('a',attrs={'class':'next_arrow_active'}) - if pager: - nexturl = self.INDEX + pager['href'] - soup2 = self.index_to_soup(nexturl) - texttag = soup2.find('div', attrs={'class':re.compile("userStyled")}) - newpos = len(texttag.contents) - self.append_page(soup2,texttag,newpos) - texttag.extract() - self.cleanup_page(appendtag) - appendtag.insert(position,texttag) - else: - self.cleanup_page(appendtag) + dict(name='span', attrs={'class':['views', + 'KonaFilter' + ]}), + #dict(name='img'), + ] + + def appendPage(self, soup, appendTag, position): + # Check if article has multiple pages + pageNav = soup.find('nav', attrs={'class':'PaginationContent'}) + if pageNav: + # Check not at last page + nextPage = pageNav.find('a', attrs={'class':'next'}) + if nextPage: + nextPageURL = nextPage['href'] + nextPageSoup = self.index_to_soup(nextPageURL) + # 8th
tag contains article content + nextPageContent = nextPageSoup.findAll('section')[7] + newPosition = len(nextPageContent.contents) + self.appendPage(nextPageSoup,nextPageContent,newPosition) + nextPageContent.extract() + pageNav.extract() + appendTag.insert(position,nextPageContent) def preprocess_html(self, soup): - self.append_page(soup, soup.body, 3) - return self.adeify_images(soup) + self.appendPage(soup, soup.body, 3) + return soup diff --git a/recipes/galicia_confidential.recipe b/recipes/galicia_confidential.recipe new file mode 100644 index 0000000000..d07946001e --- /dev/null +++ b/recipes/galicia_confidential.recipe @@ -0,0 +1,49 @@ +# -*- coding: utf-8 -*- + +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.web.feeds import Feed + +class GC_gl(BasicNewsRecipe): + title = u'Galicia Confidencial (RSS)' + __author__ = u'Susana Sotelo Docío' + description = u'Unha fiestra de información aberta a todos' + publisher = u'Galicia Confidencial' + category = u'news, society, politics, Galicia' + encoding = 'utf-8' + language = 'gl' + direction = 'ltr' + cover_url = 'http://galiciaconfidencial.com/imagenes/header/logo_gc.gif' + oldest_article = 5 + max_articles_per_feed = 100 + center_navbar = False + + feeds = [(u'Novas no RSS', u'http://galiciaconfidencial.com/rss2/xeral.rss')] + + extra_css = u' p{text-align:left} ' + + def print_version(self, url): + return url.replace('http://galiciaconfidencial.com/nova/', 'http://galiciaconfidencial.com/imprimir/') + + def parse_index(self): + feeds = [] + self.gc_parse_feeds(feeds) + return feeds + + def gc_parse_feeds(self, feeds): + rssFeeds = Feed() + rssFeeds = BasicNewsRecipe.parse_feeds(self) + self.feed_to_index_append(rssFeeds[:], feeds) + + + def feed_to_index_append(self, feedObject, masterFeed): + for feed in feedObject: + newArticles = [] + for article in feed.articles: + newArt = { + 'title' : article.title, + 'url' : article.url, + 'date' : article.date + } + newArticles.append(newArt) + masterFeed.append((feed.title,newArticles)) + diff --git a/recipes/menorca.recipe b/recipes/menorca.recipe new file mode 100644 index 0000000000..9a5afa665a --- /dev/null +++ b/recipes/menorca.recipe @@ -0,0 +1,138 @@ +# -*- coding: utf-8 -*- + +import re +from calibre.web.feeds.recipes import BasicNewsRecipe +from calibre.web.feeds import Feed + +class Menorca(BasicNewsRecipe): + + title = 'Menorca' + publisher = 'Editorial Menorca S.A. ' + __author__ = 'M. Sintes' + description = u'Peri\xf3dico con informaci\xf3n de Menorca, Espa\xf1a' + category = 'news, politics, economy, culture, Menorca, Spain ' + language = 'es' + enconding = 'cp1252' + + no_stylesheets = True + oldest_article = 5 + max_articles_per_feed = 25 + + + feeds = [ (u'Principal',u'http://www.menorca.info/rss'), + (u'Opini\xf3n',u'http://www.menorca.info/rss?seccion=opinion'), + (u'Menorca',u'http://www.menorca.info/rss?seccion=menorca'), + (u'Alaior',u'http://www.menorca.info/rss?seccion=pueblos/alaior'), + (u'Ciutadella', u'http://www.menorca.info/rss?seccion=pueblos/ciutadella'), + (u'Es Castell', u'http://www.menorca.info/rss?seccion=pueblos/escastell'), + (u'Es Mercadal', u'http://www.menorca.info/rss?seccion=pueblos/esmercadal'), + (u'Es Migjorn', u'http://www.menorca.info/rss?seccion=pueblos/esmigjorn'), + (u'Ferreries', u'http://www.menorca.info/rss?seccion=pueblos/ferreries'), + (u'Fornells', u'http://www.menorca.info/rss?seccion=pueblos/fornells'), + (u'Llucma\xe7anes', u'http://www.menorca.info/rss?seccion=pueblos/llucmaanes'), + (u'Ma\xf3', u'http://www.menorca.info/rss?seccion=pueblos/mao'), + (u'Sant Climent', u'http://www.menorca.info/rss?seccion=pueblos/santcliment'), + (u'Sant Llu\xeds', u'http://www.menorca.info/rss?seccion=pueblos/santlluis'), + (u'Deportes',u'http://www.menorca.info/rss?seccion=deportes'), + (u'Balears', u'http://www.menorca.info/rss?seccion=balears')] + + #Seccions amb link rss erroni. Es recupera directament de la pagina web + seccions_web = [(u'Mundo',u'http://www.menorca.info/actualidad/mundo'), + (u'Econom\xeda',u'http://www.menorca.info/actualidad/economia'), + (u'Espa\xf1a',u'http://www.menorca.info/actualidad/espana')] + + remove_tags_before = dict(name='div', attrs={'class':'bloqueTitulosNoticia'}) + remove_tags_after = dict(name='div', attrs={'class':'compartir'}) + remove_tags = [dict(id = 'utilidades'), + dict(name='div', attrs={'class': 'totalComentarios'}), + dict(name='div', attrs={'class': 'compartir'}), + dict(name='div', attrs={'class': re.compile("img_noticia*")}) + ] + + def print_version(self, url): + url_imprimir = url + '?d=print' + return url.replace(url, url_imprimir) + + def feed_to_index_append(self, feedObject, masterFeed): + + # Loop thru the feed object and build the correct type of article list + for feed in feedObject: + newArticles = [] + for article in feed.articles: + newArt = { + 'title' : article.title, + 'url' : article.url, + 'date' : article.date, + 'description' : article.text_summary + } + + newArticles.append(newArt) + + # append the newly-built list object to the index object # passed in as masterFeed. + masterFeed.append((feed.title,newArticles)) + + + def parse_index(self): + + rssFeeds = Feed() + rssFeeds = BasicNewsRecipe.parse_feeds(self) + + articles = [] + feeds = [] + + self.feed_to_index_append(rssFeeds,feeds) + + + + for (nom_seccio, url_seccio) in self.seccions_web: + + + articles = [] + + soup = self.index_to_soup(url_seccio) + for article in soup.findAll('div', attrs={'class':re.compile("articulo noticia|cajaNoticiaPortada")}): + h = article.find(['h2','h3']) + titol = self.tag_to_string(h) + a = article.find('a', href=True) + url = 'http://www.menorca.info' + a['href'] + + desc = None + autor = '' + dt = '' + + soup_art = self.index_to_soup(url) + aut = soup_art.find('div', attrs={'class':'autor'}) + tx = self.tag_to_string(aut) + ls = re.split('[,;]',tx) + + t = len(ls) + if t >= 1: + autor = ls[0] + + if t > 1: + d = ls[t-1] + + if len(d) >= 10: + lt = len(d) - 10 + dt = d[lt:] + + + + self.log('\tTrobat article: ', titol, 'a', url, 'Seccio: ', nom_seccio, 'Autor: ', autor, 'Data: ', dt) + + articles.append({'title': titol, 'url': url, 'description': desc, 'date':dt, 'author': autor}) + + + + + + if articles: + feeds.append((nom_seccio, articles)) + + + + + return feeds + + + diff --git a/recipes/spiegel_int.recipe b/recipes/spiegel_int.recipe index 7af5c8a41e..2950c37556 100644 --- a/recipes/spiegel_int.recipe +++ b/recipes/spiegel_int.recipe @@ -1,94 +1,67 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' -__copyright__ = '2008-2009, Darko Miletic ' +__copyright__ = '2008-2011, Darko Miletic ' ''' spiegel.de ''' from calibre.web.feeds.news import BasicNewsRecipe - class Spiegel_int(BasicNewsRecipe): title = 'Spiegel Online International' __author__ = 'Darko Miletic and Sujata Raman' - description = "News and POV from Europe's largest newsmagazine" + description = "Daily news, analysis and opinion from Europe's leading newsmagazine and Germany's top news Web site" oldest_article = 7 max_articles_per_feed = 100 - language = 'en' - + language = 'en_DE' no_stylesheets = True use_embedded_content = False + encoding = 'cp1252' publisher = 'SPIEGEL ONLINE GmbH' category = 'news, politics, Germany' - lang = 'en' - recursions = 1 - match_regexps = [r'http://www.spiegel.de/.*-[1-9],00.html'] + masthead_url = 'http://www.spiegel.de/static/sys/v9/spiegelonline_logo.png' + publication_type = 'magazine' + conversion_options = { - 'comments' : description - ,'tags' : category - ,'language' : lang - ,'publisher' : publisher - ,'pretty_print': True + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher': publisher } extra_css = ''' - #spArticleColumn{font-family:verdana,arial,helvetica,geneva,sans-serif ; } + #spArticleContent{font-family: Verdana,Arial,Helvetica,Geneva,sans-serif} h1{color:#666666; font-weight:bold;} h2{color:#990000;} h3{color:#990000;} h4 {color:#990000;} a{color:#990000;} .spAuthor{font-style:italic;} - #spIntroTeaser{font-weight:bold;} + #spIntroTeaser{font-weight:bold} .spCredit{color:#666666; font-size:x-small;} .spShortDate{font-size:x-small;} .spArticleImageBox {font-size:x-small;} .spPhotoGallery{font-size:x-small; color:#990000 ;} ''' - keep_only_tags = [ - dict(name ='div', attrs={'id': ['spArticleImageBox spAssetAlignleft','spArticleColumn']}), - ] + keep_only_tags = [dict(attrs={'id':'spArticleContent'})] + remove_tags_after = dict(attrs={'id':'spArticleBody'}) + remove_tags = [dict(name=['meta','base','iframe','embed','object'])] + remove_attributes = ['clear'] + feeds = [(u'Spiegel Online', u'http://www.spiegel.de/international/index.rss')] - remove_tags = [ - dict(name='div', attrs={'id':['spSocialBookmark','spArticleFunctions','spMultiPagerHeadlines',]}), - dict(name='div', attrs={'class':['spCommercial spM520','spArticleCredit','spPicZoom']}), - ] - - feeds = [(u'Spiegel Online', u'http://www.spiegel.de/schlagzeilen/rss/0,5291,676,00.xml')] - - def postprocess_html(self, soup,first): - - for tag in soup.findAll(name='div',attrs={'id':"spMultiPagerControl"}): - tag.extract() - - p = soup.find(name = 'p', attrs={'id':'spIntroTeaser'}) - - if p.string is not None: - t = p.string.rpartition(':')[0] - - if 'Part'in t: - if soup.h1 is not None: - soup.h1.extract() - if soup.h2 is not None: - soup.h2.extract() - functag = soup.find(name= 'div', attrs={'id':"spArticleFunctions"}) - if functag is not None: - functag.extract() - auttag = soup.find(name= 'p', attrs={'class':"spAuthor"}) - if auttag is not None: - auttag.extract() - - pictag = soup.find(name= 'div', attrs={'id':"spArticleTopAsset"}) - if pictag is not None: - pictag.extract() - - - return soup - - # def print_version(self, url): - # main, sep, rest = url.rpartition(',') - # rmain, rsep, rrest = main.rpartition(',') - # return rmain + ',druck-' + rrest + ',' + rest + def print_version(self, url): + main, sep, rest = url.rpartition(',') + rmain, rsep, rrest = main.rpartition(',') + return rmain + ',druck-' + rrest + ',' + rest + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + for item in soup.findAll('a'): + if item.string is not None: + str = item.string + item.replaceWith(str) + else: + str = self.tag_to_string(item) + item.replaceWith(str) + return soup diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index 82d1d2ff01..6ecf68e7de 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -3,57 +3,16 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' -import textwrap, os, glob, functools, re +import os, glob, functools, re from calibre import guess_type from calibre.customize import FileTypePlugin, MetadataReaderPlugin, \ MetadataWriterPlugin, PreferencesPlugin, InterfaceActionBase, StoreBase from calibre.constants import numeric_version from calibre.ebooks.metadata.archive import ArchiveExtract, get_cbz_metadata from calibre.ebooks.metadata.opf2 import metadata_to_opf +from calibre.ebooks.html.to_zip import HTML2ZIP # To archive plugins {{{ -class HTML2ZIP(FileTypePlugin): - name = 'HTML to ZIP' - author = 'Kovid Goyal' - description = textwrap.dedent(_('''\ -Follow all local links in an HTML file and create a ZIP \ -file containing all linked files. This plugin is run \ -every time you add an HTML file to the library.\ -''')) - version = numeric_version - file_types = set(['html', 'htm', 'xhtml', 'xhtm', 'shtm', 'shtml']) - supported_platforms = ['windows', 'osx', 'linux'] - on_import = True - - def run(self, htmlfile): - from calibre.ptempfile import TemporaryDirectory - from calibre.gui2.convert.gui_conversion import gui_convert - from calibre.customize.conversion import OptionRecommendation - from calibre.ebooks.epub import initialize_container - - with TemporaryDirectory('_plugin_html2zip') as tdir: - recs =[('debug_pipeline', tdir, OptionRecommendation.HIGH)] - recs.append(['keep_ligatures', True, OptionRecommendation.HIGH]) - if self.site_customization and self.site_customization.strip(): - recs.append(['input_encoding', self.site_customization.strip(), - OptionRecommendation.HIGH]) - gui_convert(htmlfile, tdir, recs, abort_after_input_dump=True) - of = self.temporary_file('_plugin_html2zip.zip') - tdir = os.path.join(tdir, 'input') - opf = glob.glob(os.path.join(tdir, '*.opf'))[0] - ncx = glob.glob(os.path.join(tdir, '*.ncx')) - if ncx: - os.remove(ncx[0]) - epub = initialize_container(of.name, os.path.basename(opf)) - epub.add_dir(tdir) - epub.close() - - return of.name - - def customization_help(self, gui=False): - return _('Character encoding for the input HTML files. Common choices ' - 'include: cp1252, latin1, iso-8859-1 and utf-8.') - class PML2PMLZ(FileTypePlugin): name = 'PML to PMLZ' diff --git a/src/calibre/db/cache.py b/src/calibre/db/cache.py new file mode 100644 index 0000000000..cc6da1e995 --- /dev/null +++ b/src/calibre/db/cache.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2011, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + + + diff --git a/src/calibre/db/locking.py b/src/calibre/db/locking.py new file mode 100644 index 0000000000..3092f1a2fa --- /dev/null +++ b/src/calibre/db/locking.py @@ -0,0 +1,331 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2011, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +from threading import Lock, Condition, current_thread + +class LockingError(RuntimeError): + pass + +def create_locks(): + ''' + Return a pair of locks: (read_lock, write_lock) + + The read_lock can be acquired by multiple threads simultaneously, it can + also be acquired multiple times by the same thread. + + Only one thread can hold write_lock at a time, and only if there are no + current read_locks. While the write_lock is held no + other threads can acquire read locks. The write_lock can also be acquired + multiple times by the same thread. + + Both read_lock and write_lock are meant to be used in with statements (they + operate on a single underlying lock. + + WARNING: Be very careful to not try to acquire a read lock while the same + thread holds a write lock and vice versa. That is, a given thread should + always release *all* locks of type A before trying to acquire a lock of type + B. Bad things will happen if you violate this rule, the most benign of + which is the raising of a LockingError (I haven't been able to eliminate + the possibility of deadlocking in this scenario). + ''' + l = SHLock() + return RWLockWrapper(l), RWLockWrapper(l, is_shared=False) + +class SHLock(object): + ''' + Shareable lock class. Used to implement the Multiple readers-single writer + paradigm. As best as I can tell, neither writer nor reader starvation + should be possible. + + Based on code from: https://github.com/rfk/threading2 + ''' + + def __init__(self): + self._lock = Lock() + # When a shared lock is held, is_shared will give the cumulative + # number of locks and _shared_owners maps each owning thread to + # the number of locks is holds. + self.is_shared = 0 + self._shared_owners = {} + # When an exclusive lock is held, is_exclusive will give the number + # of locks held and _exclusive_owner will give the owning thread + self.is_exclusive = 0 + self._exclusive_owner = None + # When someone is forced to wait for a lock, they add themselves + # to one of these queues along with a "waiter" condition that + # is used to wake them up. + self._shared_queue = [] + self._exclusive_queue = [] + # This is for recycling waiter objects. + self._free_waiters = [] + + def acquire(self, blocking=True, shared=False): + ''' + Acquire the lock in shared or exclusive mode. + + If blocking is False this method will return False if acquiring the + lock failed. + ''' + with self._lock: + if shared: + return self._acquire_shared(blocking) + else: + return self._acquire_exclusive(blocking) + assert not (self.is_shared and self.is_exclusive) + + def release(self): + ''' Release the lock. ''' + # This decrements the appropriate lock counters, and if the lock + # becomes free, it looks for a queued thread to hand it off to. + # By doing the handoff here we ensure fairness. + me = current_thread() + with self._lock: + if self.is_exclusive: + if self._exclusive_owner is not me: + raise LockingError("release() called on unheld lock") + self.is_exclusive -= 1 + if not self.is_exclusive: + self._exclusive_owner = None + # If there are waiting shared locks, issue them + # all and them wake everyone up. + if self._shared_queue: + for (thread, waiter) in self._shared_queue: + self.is_shared += 1 + self._shared_owners[thread] = 1 + waiter.notify() + del self._shared_queue[:] + # Otherwise, if there are waiting exclusive locks, + # they get first dibbs on the lock. + elif self._exclusive_queue: + (thread, waiter) = self._exclusive_queue.pop(0) + self._exclusive_owner = thread + self.is_exclusive += 1 + waiter.notify() + elif self.is_shared: + try: + self._shared_owners[me] -= 1 + if self._shared_owners[me] == 0: + del self._shared_owners[me] + except KeyError: + raise LockingError("release() called on unheld lock") + self.is_shared -= 1 + if not self.is_shared: + # If there are waiting exclusive locks, + # they get first dibbs on the lock. + if self._exclusive_queue: + (thread, waiter) = self._exclusive_queue.pop(0) + self._exclusive_owner = thread + self.is_exclusive += 1 + waiter.notify() + else: + assert not self._shared_queue + else: + raise LockingError("release() called on unheld lock") + + def _acquire_shared(self, blocking=True): + me = current_thread() + # Each case: acquiring a lock we already hold. + if self.is_shared and me in self._shared_owners: + self.is_shared += 1 + self._shared_owners[me] += 1 + return True + # If the lock is already spoken for by an exclusive, add us + # to the shared queue and it will give us the lock eventually. + if self.is_exclusive or self._exclusive_queue: + if self._exclusive_owner is me: + raise LockingError("can't downgrade SHLock object") + if not blocking: + return False + waiter = self._take_waiter() + try: + self._shared_queue.append((me, waiter)) + waiter.wait() + assert not self.is_exclusive + finally: + self._return_waiter(waiter) + else: + self.is_shared += 1 + self._shared_owners[me] = 1 + return True + + def _acquire_exclusive(self, blocking=True): + me = current_thread() + # Each case: acquiring a lock we already hold. + if self._exclusive_owner is me: + assert self.is_exclusive + self.is_exclusive += 1 + return True + # Do not allow upgrade of lock + if self.is_shared and me in self._shared_owners: + raise LockingError("can't upgrade SHLock object") + # If the lock is already spoken for, add us to the exclusive queue. + # This will eventually give us the lock when it's our turn. + if self.is_shared or self.is_exclusive: + if not blocking: + return False + waiter = self._take_waiter() + try: + self._exclusive_queue.append((me, waiter)) + waiter.wait() + finally: + self._return_waiter(waiter) + else: + self._exclusive_owner = me + self.is_exclusive += 1 + return True + + def _take_waiter(self): + try: + return self._free_waiters.pop() + except IndexError: + return Condition(self._lock)#, verbose=True) + + def _return_waiter(self, waiter): + self._free_waiters.append(waiter) + +class RWLockWrapper(object): + + def __init__(self, shlock, is_shared=True): + self._shlock = shlock + self._is_shared = is_shared + + def __enter__(self): + self._shlock.acquire(shared=self._is_shared) + return self + + def __exit__(self, *args): + self._shlock.release() + + +# Tests {{{ +if __name__ == '__main__': + import time, random, unittest + from threading import Thread + + class TestSHLock(unittest.TestCase): + """Testcases for SHLock class.""" + + def test_upgrade(self): + lock = SHLock() + lock.acquire(shared=True) + self.assertRaises(LockingError, lock.acquire, shared=False) + lock.release() + + def test_downgrade(self): + lock = SHLock() + lock.acquire(shared=False) + self.assertRaises(LockingError, lock.acquire, shared=True) + lock.release() + + def test_recursive(self): + lock = SHLock() + lock.acquire(shared=True) + lock.acquire(shared=True) + self.assertEqual(lock.is_shared, 2) + lock.release() + lock.release() + self.assertFalse(lock.is_shared) + lock.acquire(shared=False) + lock.acquire(shared=False) + self.assertEqual(lock.is_exclusive, 2) + lock.release() + lock.release() + self.assertFalse(lock.is_exclusive) + + def test_release(self): + lock = SHLock() + self.assertRaises(LockingError, lock.release) + + def get_lock(shared): + lock.acquire(shared=shared) + time.sleep(1) + lock.release() + + threads = [Thread(target=get_lock, args=(x,)) for x in (True, + False)] + for t in threads: + t.daemon = True + t.start() + self.assertRaises(LockingError, lock.release) + t.join(2) + self.assertFalse(t.is_alive()) + self.assertFalse(lock.is_shared) + self.assertFalse(lock.is_exclusive) + + def test_acquire(self): + lock = SHLock() + + def get_lock(shared): + lock.acquire(shared=shared) + time.sleep(1) + lock.release() + + shared = Thread(target=get_lock, args=(True,)) + shared.daemon = True + shared.start() + time.sleep(0.1) + self.assertTrue(lock.acquire(shared=True, blocking=False)) + lock.release() + self.assertFalse(lock.acquire(shared=False, blocking=False)) + lock.acquire(shared=False) + self.assertFalse(shared.is_alive()) + lock.release() + self.assertTrue(lock.acquire(shared=False, blocking=False)) + lock.release() + + exclusive = Thread(target=get_lock, args=(False,)) + exclusive.daemon = True + exclusive.start() + time.sleep(0.1) + self.assertFalse(lock.acquire(shared=False, blocking=False)) + self.assertFalse(lock.acquire(shared=True, blocking=False)) + lock.acquire(shared=True) + self.assertFalse(exclusive.is_alive()) + lock.release() + lock.acquire(shared=False) + lock.release() + lock.acquire(shared=True) + lock.release() + self.assertFalse(lock.is_shared) + self.assertFalse(lock.is_exclusive) + + def test_contention(self): + lock = SHLock() + done = [] + def lots_of_acquires(): + for _ in xrange(1000): + shared = random.choice([True,False]) + lock.acquire(shared=shared) + lock.acquire(shared=shared) + time.sleep(random.random() * 0.0001) + lock.release() + time.sleep(random.random() * 0.0001) + lock.acquire(shared=shared) + time.sleep(random.random() * 0.0001) + lock.release() + lock.release() + done.append(True) + threads = [Thread(target=lots_of_acquires) for _ in xrange(10)] + for t in threads: + t.daemon = True + t.start() + for t in threads: + t.join(20) + live = [t for t in threads if t.is_alive()] + self.assertListEqual(live, [], 'ShLock hung') + self.assertEqual(len(done), len(threads), 'SHLock locking failed') + self.assertFalse(lock.is_shared) + self.assertFalse(lock.is_exclusive) + + + suite = unittest.TestLoader().loadTestsFromTestCase(TestSHLock) + unittest.TextTestRunner(verbosity=2).run(suite) + +# }}} + diff --git a/src/calibre/devices/android/driver.py b/src/calibre/devices/android/driver.py index 685d1286eb..d9c30d56fe 100644 --- a/src/calibre/devices/android/driver.py +++ b/src/calibre/devices/android/driver.py @@ -24,12 +24,12 @@ class ANDROID(USBMS): 0xff9 : [0x0100, 0x0227, 0x0226], 0xc87 : [0x0100, 0x0227, 0x0226], 0xc91 : [0x0100, 0x0227, 0x0226], - 0xc92 : [0x100], - 0xc97 : [0x226], - 0xc99 : [0x0100], - 0xca2 : [0x226], - 0xca3 : [0x100], - 0xca4 : [0x226], + 0xc92 : [0x100, 0x0227, 0x0226, 0x222], + 0xc97 : [0x100, 0x0227, 0x0226, 0x222], + 0xc99 : [0x100, 0x0227, 0x0226, 0x222], + 0xca2 : [0x100, 0x0227, 0x0226, 0x222], + 0xca3 : [0x100, 0x0227, 0x0226, 0x222], + 0xca4 : [0x100, 0x0227, 0x0226, 0x222], }, # Eken diff --git a/src/calibre/devices/kobo/driver.py b/src/calibre/devices/kobo/driver.py index 62e15452f1..2d6e66afc3 100644 --- a/src/calibre/devices/kobo/driver.py +++ b/src/calibre/devices/kobo/driver.py @@ -203,12 +203,13 @@ class KOBO(USBMS): result = cursor.fetchone() self.dbversion = result[0] + debug_print("Database Version: ", self.dbversion) if self.dbversion >= 14: query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \ - 'ImageID, ReadStatus, ___ExpirationStatus, FavouritesIndex from content where BookID is Null' + 'ImageID, ReadStatus, ___ExpirationStatus, FavouritesIndex from content where BookID is Null and ( ___ExpirationStatus <> "3" or ___ExpirationStatus is Null)' elif self.dbversion < 14 and self.dbversion >= 8: query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \ - 'ImageID, ReadStatus, ___ExpirationStatus, "-1" as FavouritesIndex from content where BookID is Null' + 'ImageID, ReadStatus, ___ExpirationStatus, "-1" as FavouritesIndex from content where BookID is Null and ( ___ExpirationStatus <> "3" or ___ExpirationStatus is Null)' else: query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \ 'ImageID, ReadStatus, "-1" as ___ExpirationStatus, "-1" as FavouritesIndex from content where BookID is Null' @@ -542,7 +543,72 @@ class KOBO(USBMS): paths[source_id] = os.path.join(prefix, *(path.split('/'))) return paths + def reset_readstatus(self, connection, oncard): + cursor = connection.cursor() + + # Reset Im_Reading list in the database + if oncard == 'carda': + query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ContentID like \'file:///mnt/sd/%\'' + elif oncard != 'carda' and oncard != 'cardb': + query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ContentID not like \'file:///mnt/sd/%\'' + + try: + cursor.execute (query) + except: + debug_print(' Database Exception: Unable to reset ReadStatus list') + raise + else: + connection.commit() + debug_print(' Commit: Reset ReadStatus list') + + cursor.close() + + def set_readstatus(self, connection, ContentID, ReadStatus): + cursor = connection.cursor() + t = (ContentID,) + cursor.execute('select DateLastRead from Content where BookID is Null and ContentID = ?', t) + result = cursor.fetchone() + if result is None: + datelastread = '1970-01-01T00:00:00' + else: + datelastread = result[0] if result[0] is not None else '1970-01-01T00:00:00' + + t = (ReadStatus,datelastread,ContentID,) + + try: + cursor.execute('update content set ReadStatus=?,FirstTimeReading=\'false\',DateLastRead=? where BookID is Null and ContentID = ?', t) + except: + debug_print(' Database Exception: Unable update ReadStatus') + raise + else: + connection.commit() + debug_print(' Commit: Setting ReadStatus List') + cursor.close() + + def reset_favouritesindex(self, connection, oncard): + # Reset FavouritesIndex list in the database + if oncard == 'carda': + query= 'update content set FavouritesIndex=-1 where BookID is Null and ContentID like \'file:///mnt/sd/%\'' + elif oncard != 'carda' and oncard != 'cardb': + query= 'update content set FavouritesIndex=-1 where BookID is Null and ContentID not like \'file:///mnt/sd/%\'' + + cursor = connection.cursor() + try: + cursor.execute (query) + except: + debug_print('Database Exception: Unable to reset Shortlist list') + raise + else: + connection.commit() + debug_print(' Commit: Reset FavouritesIndex list') + def update_device_database_collections(self, booklists, collections_attributes, oncard): + # Define lists for the ReadStatus + readstatuslist = { + "Im_Reading":1, + "Read":2, + "Closed":3, + } # debug_print('Starting update_device_database_collections', collections_attributes) # Force collections_attributes to be 'tags' as no other is currently supported @@ -561,149 +627,35 @@ class KOBO(USBMS): # return bytestrings if the content cannot the decoded as unicode connection.text_factory = lambda x: unicode(x, "utf-8", "ignore") - cursor = connection.cursor() - - if collections: + + # Need to reset the collections outside the particular loops + # otherwise the last item will not be removed + self.reset_readstatus(connection, oncard) + self.reset_favouritesindex(connection, oncard) + # Process any collections that exist for category, books in collections.items(): - # debug_print (category) - if category == 'Im_Reading': - # Reset Im_Reading list in the database - if oncard == 'carda': - query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ReadStatus = 1 and ContentID like \'file:///mnt/sd/%\'' - elif oncard != 'carda' and oncard != 'cardb': - query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ReadStatus = 1 and ContentID not like \'file:///mnt/sd/%\'' - - try: - cursor.execute (query) - except: - debug_print('Database Exception: Unable to reset Im_Reading list') - raise - else: -# debug_print('Commit: Reset Im_Reading list') - connection.commit() + # This is used to reset the Im_Reading, Read and Closed list + # in the ReadStatus column of the Content table + if category in readstatuslist.keys(): + debug_print("Category: ", category, " id = ", readstatuslist.get(category)) for book in books: -# debug_print('Title:', book.title, 'lpath:', book.path) - if 'Im_Reading' not in book.device_collections: - book.device_collections.append('Im_Reading') + debug_print(' Title:', book.title, 'category: ', category) + if category not in book.device_collections: + book.device_collections.append(category) extension = os.path.splitext(book.path)[1] ContentType = self.get_content_type_from_extension(extension) if extension != '' else self.get_content_type_from_path(book.path) ContentID = self.contentid_from_path(book.path, ContentType) - - t = (ContentID,) - cursor.execute('select DateLastRead from Content where BookID is Null and ContentID = ?', t) - result = cursor.fetchone() - if result is None: - datelastread = '1970-01-01T00:00:00' - else: - datelastread = result[0] if result[0] is not None else '1970-01-01T00:00:00' - - t = (datelastread,ContentID,) - - try: - cursor.execute('update content set ReadStatus=1,FirstTimeReading=\'false\',DateLastRead=? where BookID is Null and ContentID = ?', t) - except: - debug_print('Database Exception: Unable create Im_Reading list') - raise - else: - connection.commit() - # debug_print('Database: Commit create Im_Reading list') - if category == 'Read': - # Reset Im_Reading list in the database - if oncard == 'carda': - query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ReadStatus = 2 and ContentID like \'file:///mnt/sd/%\'' - elif oncard != 'carda' and oncard != 'cardb': - query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ReadStatus = 2 and ContentID not like \'file:///mnt/sd/%\'' - - try: - cursor.execute (query) - except: - debug_print('Database Exception: Unable to reset Im_Reading list') - raise - else: -# debug_print('Commit: Reset Im_Reading list') - connection.commit() - - for book in books: -# debug_print('Title:', book.title, 'lpath:', book.path) - if 'Read' not in book.device_collections: - book.device_collections.append('Read') - - extension = os.path.splitext(book.path)[1] - ContentType = self.get_content_type_from_extension(extension) if extension != '' else self.get_content_type_from_path(book.path) - - ContentID = self.contentid_from_path(book.path, ContentType) -# datelastread = time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime()) - - t = (ContentID,) - - try: - cursor.execute('update content set ReadStatus=2,FirstTimeReading=\'true\' where BookID is Null and ContentID = ?', t) - except: - debug_print('Database Exception: Unable set book as Finished') - raise - else: - connection.commit() -# debug_print('Database: Commit set ReadStatus as Finished') - if category == 'Closed': - # Reset Im_Reading list in the database - if oncard == 'carda': - query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ReadStatus = 3 and ContentID like \'file:///mnt/sd/%\'' - elif oncard != 'carda' and oncard != 'cardb': - query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ReadStatus = 3 and ContentID not like \'file:///mnt/sd/%\'' - - try: - cursor.execute (query) - except: - debug_print('Database Exception: Unable to reset Closed list') - raise - else: -# debug_print('Commit: Reset Closed list') - connection.commit() - - for book in books: -# debug_print('Title:', book.title, 'lpath:', book.path) - if 'Closed' not in book.device_collections: - book.device_collections.append('Closed') - - extension = os.path.splitext(book.path)[1] - ContentType = self.get_content_type_from_extension(extension) if extension != '' else self.get_content_type_from_path(book.path) - - ContentID = self.contentid_from_path(book.path, ContentType) -# datelastread = time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime()) - - t = (ContentID,) - - try: - cursor.execute('update content set ReadStatus=3,FirstTimeReading=\'true\' where BookID is Null and ContentID = ?', t) - except: - debug_print('Database Exception: Unable set book as Closed') - raise - else: - connection.commit() -# debug_print('Database: Commit set ReadStatus as Closed') + self.set_readstatus(connection, ContentID, readstatuslist.get(category)) if category == 'Shortlist': - # Reset FavouritesIndex list in the database - if oncard == 'carda': - query= 'update content set FavouritesIndex=-1 where BookID is Null and ContentID like \'file:///mnt/sd/%\'' - elif oncard != 'carda' and oncard != 'cardb': - query= 'update content set FavouritesIndex=-1 where BookID is Null and ContentID not like \'file:///mnt/sd/%\'' - - try: - cursor.execute (query) - except: - debug_print('Database Exception: Unable to reset Shortlist list') - raise - else: -# debug_print('Commit: Reset Shortlist list') - connection.commit() - + debug_print("Category: ", category) + cursor = connection.cursor() for book in books: -# debug_print('Title:', book.title, 'lpath:', book.path) + debug_print(' Title:', book.title, 'category: ', category) if 'Shortlist' not in book.device_collections: book.device_collections.append('Shortlist') # debug_print ("Shortlist found for: ", book.title) @@ -726,23 +678,11 @@ class KOBO(USBMS): else: # No collections # Since no collections exist the ReadStatus needs to be reset to 0 (Unread) - print "Reseting ReadStatus to 0" - # Reset Im_Reading list in the database - if oncard == 'carda': - query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ContentID like \'file:///mnt/sd/%\'' - elif oncard != 'carda' and oncard != 'cardb': - query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ContentID not like \'file:///mnt/sd/%\'' + debug_print("No Collections - reseting ReadStatus to 0") + self.reset_readstatus(connection, oncard) + debug_print("No Collections - reseting FavouritesIndex") + self.reset_favouritesindex(connection, oncard) - try: - cursor.execute (query) - except: - debug_print('Database Exception: Unable to reset Im_Reading list') - raise - else: -# debug_print('Commit: Reset Im_Reading list') - connection.commit() - - cursor.close() connection.close() # debug_print('Finished update_device_database_collections', collections_attributes) diff --git a/src/calibre/ebooks/conversion/cli.py b/src/calibre/ebooks/conversion/cli.py index 1767019972..6527dfb855 100644 --- a/src/calibre/ebooks/conversion/cli.py +++ b/src/calibre/ebooks/conversion/cli.py @@ -176,7 +176,7 @@ def add_pipeline_options(parser, plumber): [ 'level1_toc', 'level2_toc', 'level3_toc', 'toc_threshold', 'max_toc_links', 'no_chapters_in_toc', - 'use_auto_toc', 'toc_filter', + 'use_auto_toc', 'toc_filter', 'duplicate_links_in_toc', ] ), diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index 9ec474e60f..d0d427bf74 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -265,6 +265,14 @@ OptionRecommendation(name='toc_filter', ) ), +OptionRecommendation(name='duplicate_links_in_toc', + recommended_value=False, level=OptionRecommendation.LOW, + help=_('When creating a TOC from links in the input document, ' + 'allow duplicate entries, i.e. allow more than one entry ' + 'with the same text, provided that they point to a ' + 'different location.') + ), + OptionRecommendation(name='chapter', recommended_value="//*[((name()='h1' or name()='h2') and " diff --git a/src/calibre/ebooks/html/to_zip.py b/src/calibre/ebooks/html/to_zip.py new file mode 100644 index 0000000000..5c6aba010f --- /dev/null +++ b/src/calibre/ebooks/html/to_zip.py @@ -0,0 +1,117 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2011, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +import textwrap, os, glob + +from calibre.customize import FileTypePlugin +from calibre.constants import numeric_version + +class HTML2ZIP(FileTypePlugin): + name = 'HTML to ZIP' + author = 'Kovid Goyal' + description = textwrap.dedent(_('''\ +Follow all local links in an HTML file and create a ZIP \ +file containing all linked files. This plugin is run \ +every time you add an HTML file to the library.\ +''')) + version = numeric_version + file_types = set(['html', 'htm', 'xhtml', 'xhtm', 'shtm', 'shtml']) + supported_platforms = ['windows', 'osx', 'linux'] + on_import = True + + def run(self, htmlfile): + from calibre.ptempfile import TemporaryDirectory + from calibre.gui2.convert.gui_conversion import gui_convert + from calibre.customize.conversion import OptionRecommendation + from calibre.ebooks.epub import initialize_container + + with TemporaryDirectory('_plugin_html2zip') as tdir: + recs =[('debug_pipeline', tdir, OptionRecommendation.HIGH)] + recs.append(['keep_ligatures', True, OptionRecommendation.HIGH]) + if self.site_customization and self.site_customization.strip(): + sc = self.site_customization.strip() + enc, _, bf = sc.partition('|') + if enc: + recs.append(['input_encoding', enc, + OptionRecommendation.HIGH]) + if bf == 'bf': + recs.append(['breadth_first', True, + OptionRecommendation.HIGH]) + gui_convert(htmlfile, tdir, recs, abort_after_input_dump=True) + of = self.temporary_file('_plugin_html2zip.zip') + tdir = os.path.join(tdir, 'input') + opf = glob.glob(os.path.join(tdir, '*.opf'))[0] + ncx = glob.glob(os.path.join(tdir, '*.ncx')) + if ncx: + os.remove(ncx[0]) + epub = initialize_container(of.name, os.path.basename(opf)) + epub.add_dir(tdir) + epub.close() + + return of.name + + def customization_help(self, gui=False): + return _('Character encoding for the input HTML files. Common choices ' + 'include: cp1252, cp1251, latin1 and utf-8.') + + def do_user_config(self, parent=None): + ''' + This method shows a configuration dialog for this plugin. It returns + True if the user clicks OK, False otherwise. The changes are + automatically applied. + ''' + from PyQt4.Qt import (QDialog, QDialogButtonBox, QVBoxLayout, + QLabel, Qt, QLineEdit, QCheckBox) + + config_dialog = QDialog(parent) + button_box = QDialogButtonBox(QDialogButtonBox.Ok | QDialogButtonBox.Cancel) + v = QVBoxLayout(config_dialog) + + def size_dialog(): + config_dialog.resize(config_dialog.sizeHint()) + + button_box.accepted.connect(config_dialog.accept) + button_box.rejected.connect(config_dialog.reject) + config_dialog.setWindowTitle(_('Customize') + ' ' + self.name) + from calibre.customize.ui import (plugin_customization, + customize_plugin) + help_text = self.customization_help(gui=True) + help_text = QLabel(help_text, config_dialog) + help_text.setWordWrap(True) + help_text.setTextInteractionFlags(Qt.LinksAccessibleByMouse + | Qt.LinksAccessibleByKeyboard) + help_text.setOpenExternalLinks(True) + v.addWidget(help_text) + bf = QCheckBox(_('Add linked files in breadth first order')) + bf.setToolTip(_('Normally, when following links in HTML files' + ' calibre does it depth first, i.e. if file A links to B and ' + ' C, but B links to D, the files are added in the order A, B, D, C. ' + ' With this option, they will instead be added as A, B, C, D')) + sc = plugin_customization(self) + if not sc: + sc = '' + sc = sc.strip() + enc = sc.partition('|')[0] + bfs = sc.partition('|')[-1] + bf.setChecked(bfs == 'bf') + sc = QLineEdit(enc, config_dialog) + v.addWidget(sc) + v.addWidget(bf) + v.addWidget(button_box) + size_dialog() + config_dialog.exec_() + + if config_dialog.result() == QDialog.Accepted: + sc = unicode(sc.text()).strip() + if bf.isChecked(): + sc += '|bf' + customize_plugin(self, sc) + + return config_dialog.result() + diff --git a/src/calibre/ebooks/metadata/sources/douban.py b/src/calibre/ebooks/metadata/sources/douban.py index 8f8f5b80c4..06e874e8ca 100644 --- a/src/calibre/ebooks/metadata/sources/douban.py +++ b/src/calibre/ebooks/metadata/sources/douban.py @@ -153,7 +153,8 @@ class Douban(Source): author = 'Li Fanxi' version = (2, 0, 0) - description = _('Downloads metadata and covers from Douban.com') + description = _('Downloads metadata and covers from Douban.com. ' + 'Useful only for chinese language books.') capabilities = frozenset(['identify', 'cover']) touched_fields = frozenset(['title', 'authors', 'tags', diff --git a/src/calibre/ebooks/metadata/sources/identify.py b/src/calibre/ebooks/metadata/sources/identify.py index 849dbd1555..872a3c7b87 100644 --- a/src/calibre/ebooks/metadata/sources/identify.py +++ b/src/calibre/ebooks/metadata/sources/identify.py @@ -19,7 +19,7 @@ from calibre.customize.ui import metadata_plugins, all_metadata_plugins from calibre.ebooks.metadata.sources.base import create_log, msprefs from calibre.ebooks.metadata.xisbn import xisbn from calibre.ebooks.metadata.book.base import Metadata -from calibre.utils.date import utc_tz +from calibre.utils.date import utc_tz, as_utc from calibre.utils.html2text import html2text from calibre.utils.icu import lower @@ -57,11 +57,34 @@ def is_worker_alive(workers): # Merge results from different sources {{{ +class xISBN(Thread): + + def __init__(self, isbn): + Thread.__init__(self) + self.isbn = isbn + self.isbns = frozenset() + self.min_year = None + self.daemon = True + self.exception = self.tb = None + + def run(self): + try: + self.isbns, self.min_year = xisbn.get_isbn_pool(self.isbn) + except Exception as e: + import traceback + self.exception = e + self.tb = traceback.format_exception() + + + class ISBNMerge(object): - def __init__(self): + def __init__(self, log): self.pools = {} self.isbnless_results = [] + self.results = [] + self.log = log + self.use_xisbn = True def isbn_in_pool(self, isbn): if isbn: @@ -82,7 +105,20 @@ class ISBNMerge(object): if isbn: pool = self.isbn_in_pool(isbn) if pool is None: - isbns, min_year = xisbn.get_isbn_pool(isbn) + isbns = min_year = None + if self.use_xisbn: + xw = xISBN(isbn) + xw.start() + xw.join(10) + if xw.is_alive(): + self.log.error('Query to xISBN timed out') + self.use_xisbn = False + else: + if xw.exception: + self.log.error('Query to xISBN failed:') + self.log.debug(xw.tb) + else: + isbns, min_year = xw.isbns, xw.min_year if not isbns: isbns = frozenset([isbn]) if isbns in self.pools: @@ -102,15 +138,19 @@ class ISBNMerge(object): if results: has_isbn_result = True break - self.has_isbn_result = has_isbn_result + isbn_sources = frozenset() if has_isbn_result: - self.merge_isbn_results() - else: - results = sorted(self.isbnless_results, - key=attrgetter('relevance_in_source')) + isbn_sources = self.merge_isbn_results() + + # Now handle results that have no ISBNs + results = sorted(self.isbnless_results, + key=attrgetter('relevance_in_source')) + # Only use results that are from sources that have not also returned a + # result with an ISBN + results = [r for r in results if r.identify_plugin not in isbn_sources] + if results: # Pick only the most relevant result from each source - self.results = [] seen = set() for result in results: if result.identify_plugin not in seen: @@ -190,11 +230,15 @@ class ISBNMerge(object): def merge_isbn_results(self): self.results = [] + sources = set() for min_year, results in self.pools.itervalues(): if results: + for r in results: + sources.add(r.identify_plugin) self.results.append(self.merge(results, min_year)) self.results.sort(key=attrgetter('average_source_relevance')) + return sources def length_merge(self, attr, results, null_value=None, shortest=True): values = [getattr(x, attr) for x in results if not x.is_null(attr)] @@ -254,13 +298,23 @@ class ISBNMerge(object): # Published date if min_year: - min_date = datetime(min_year, 1, 2, tzinfo=utc_tz) + for r in results: + year = getattr(r.pubdate, 'year', None) + if year == min_year: + ans.pubdate = r.pubdate + break + if getattr(ans.pubdate, 'year', None) == min_year: + min_date = datetime(min_year, ans.pubdate.month, ans.pubdate.day) + else: + min_date = datetime(min_year, 1, 2, tzinfo=utc_tz) ans.pubdate = min_date else: min_date = datetime(3001, 1, 1, tzinfo=utc_tz) for r in results: - if r.pubdate is not None and r.pubdate < min_date: - min_date = r.pubdate + if r.pubdate is not None: + candidate = as_utc(r.pubdate) + if candidate < min_date: + min_date = candidate if min_date.year < 3000: ans.pubdate = min_date @@ -293,7 +347,7 @@ class ISBNMerge(object): def merge_identify_results(result_map, log): - isbn_merge = ISBNMerge() + isbn_merge = ISBNMerge(log) for plugin, results in result_map.iteritems(): for result in results: isbn_merge.add_result(result) diff --git a/src/calibre/ebooks/oeb/transforms/structure.py b/src/calibre/ebooks/oeb/transforms/structure.py index 613429c3ec..f2a61ba6e1 100644 --- a/src/calibre/ebooks/oeb/transforms/structure.py +++ b/src/calibre/ebooks/oeb/transforms/structure.py @@ -121,14 +121,16 @@ class DetectStructure(object): if not self.oeb.toc.has_href(href): text = xml2text(a) text = text[:100].strip() - if not self.oeb.toc.has_text(text): - num += 1 - self.oeb.toc.add(text, href, - play_order=self.oeb.toc.next_play_order()) - if self.opts.max_toc_links > 0 and \ - num >= self.opts.max_toc_links: - self.log('Maximum TOC links reached, stopping.') - return + if (not self.opts.duplicate_links_in_toc and + self.oeb.toc.has_text(text)): + continue + num += 1 + self.oeb.toc.add(text, href, + play_order=self.oeb.toc.next_play_order()) + if self.opts.max_toc_links > 0 and \ + num >= self.opts.max_toc_links: + self.log('Maximum TOC links reached, stopping.') + return diff --git a/src/calibre/gui2/convert/toc.py b/src/calibre/gui2/convert/toc.py index 8121f23192..dab7559f1b 100644 --- a/src/calibre/gui2/convert/toc.py +++ b/src/calibre/gui2/convert/toc.py @@ -22,7 +22,7 @@ class TOCWidget(Widget, Ui_Form): Widget.__init__(self, parent, ['level1_toc', 'level2_toc', 'level3_toc', 'toc_threshold', 'max_toc_links', 'no_chapters_in_toc', - 'use_auto_toc', 'toc_filter', + 'use_auto_toc', 'toc_filter', 'duplicate_links_in_toc', ] ) self.db, self.book_id = db, book_id diff --git a/src/calibre/gui2/convert/toc.ui b/src/calibre/gui2/convert/toc.ui index 342e44f5d8..da15551f80 100644 --- a/src/calibre/gui2/convert/toc.ui +++ b/src/calibre/gui2/convert/toc.ui @@ -21,7 +21,7 @@ - + Number of &links to add to Table of Contents @@ -31,14 +31,14 @@ - + 10000 - + Chapter &threshold @@ -48,7 +48,7 @@ - + @@ -58,7 +58,7 @@ - + TOC &Filter: @@ -68,19 +68,19 @@ - + - + - + - + - + Qt::Vertical @@ -93,6 +93,13 @@ + + + + Allow &duplicate links when creating the Table of Contents + + + diff --git a/src/calibre/gui2/dialogs/scheduler.py b/src/calibre/gui2/dialogs/scheduler.py index 7d1d87b472..7531d3f6bb 100644 --- a/src/calibre/gui2/dialogs/scheduler.py +++ b/src/calibre/gui2/dialogs/scheduler.py @@ -336,7 +336,12 @@ class SchedulerDialog(QDialog, Ui_Dialog): self.download_button.setVisible(True) self.detail_box.setCurrentIndex(0) recipe = self.recipe_model.recipe_from_urn(urn) - schedule_info = self.recipe_model.schedule_info_from_urn(urn) + try: + schedule_info = self.recipe_model.schedule_info_from_urn(urn) + except: + # Happens if user does something stupid like unchecking all the + # days of the week + schedule_info = None account_info = self.recipe_model.account_info_from_urn(urn) customize_info = self.recipe_model.get_customize_info(urn) diff --git a/src/calibre/gui2/metadata/bulk_download.py b/src/calibre/gui2/metadata/bulk_download.py index 2a307fc902..3461da8666 100644 --- a/src/calibre/gui2/metadata/bulk_download.py +++ b/src/calibre/gui2/metadata/bulk_download.py @@ -20,6 +20,7 @@ from calibre.ebooks.metadata.sources.covers import download_cover from calibre.ebooks.metadata.book.base import Metadata from calibre.customize.ui import metadata_plugins from calibre.ptempfile import PersistentTemporaryFile +from calibre.utils.date import as_utc # Start download {{{ def show_config(gui, parent): @@ -124,10 +125,18 @@ def merge_result(oldmi, newmi): for plugin in metadata_plugins(['identify']): fields |= plugin.touched_fields + def is_equal(x, y): + if hasattr(x, 'tzinfo'): + x = as_utc(x) + if hasattr(y, 'tzinfo'): + y = as_utc(y) + return x == y + for f in fields: # Optimize so that set_metadata does not have to do extra work later if not f.startswith('identifier:'): - if (not newmi.is_null(f) and getattr(newmi, f) == getattr(oldmi, f)): + if (not newmi.is_null(f) and is_equal(getattr(newmi, f), + getattr(oldmi, f))): setattr(newmi, f, getattr(dummy, f)) newmi.last_modified = oldmi.last_modified diff --git a/src/calibre/gui2/metadata/single_download.py b/src/calibre/gui2/metadata/single_download.py index ff4b9feac8..2dcf767a38 100644 --- a/src/calibre/gui2/metadata/single_download.py +++ b/src/calibre/gui2/metadata/single_download.py @@ -254,6 +254,10 @@ class ResultsView(QTableView): # {{{ '

%s

'%book.title, '
%s
'%authors_to_string(book.authors), ] + if not book.is_null('series'): + series = book.format_field('series') + if series[1]: + parts.append('
%s: %s
'%series) if not book.is_null('rating'): parts.append('
%s
'%('\u2605'*int(book.rating))) parts.append('') diff --git a/src/calibre/gui2/preferences/metadata_sources.py b/src/calibre/gui2/preferences/metadata_sources.py index ae882ce255..6c9122abf7 100644 --- a/src/calibre/gui2/preferences/metadata_sources.py +++ b/src/calibre/gui2/preferences/metadata_sources.py @@ -17,12 +17,13 @@ from calibre.gui2.preferences.metadata_sources_ui import Ui_Form from calibre.ebooks.metadata.sources.base import msprefs from calibre.customize.ui import (all_metadata_plugins, is_disabled, enable_plugin, disable_plugin, default_disabled_plugins) -from calibre.gui2 import NONE, error_dialog +from calibre.gui2 import NONE, error_dialog, question_dialog class SourcesModel(QAbstractTableModel): # {{{ def __init__(self, parent=None): QAbstractTableModel.__init__(self, parent) + self.gui_parent = parent self.plugins = [] self.enabled_overrides = {} @@ -87,6 +88,15 @@ class SourcesModel(QAbstractTableModel): # {{{ if col == 0 and role == Qt.CheckStateRole: val, ok = val.toInt() if ok: + if val == Qt.Checked and 'Douban' in plugin.name: + if not question_dialog(self.gui_parent, + _('Are you sure?'), '

'+ + _('This plugin is useful only for Chinese' + ' language books. It can return incorrect' + ' results for books in English. Are you' + ' sure you want to enable it?'), + show_copy_button=False): + return ret self.enabled_overrides[plugin] = val ret = True if col == 1 and role == Qt.EditRole: diff --git a/src/calibre/gui2/preferences/plugboard.py b/src/calibre/gui2/preferences/plugboard.py index cf632c04c0..f0a1a5fd04 100644 --- a/src/calibre/gui2/preferences/plugboard.py +++ b/src/calibre/gui2/preferences/plugboard.py @@ -241,7 +241,8 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form): if self.current_format != plugboard_any_format_value and \ self.current_device in self.device_to_formats_map: allowable_formats = self.device_to_formats_map[self.current_device] - if self.current_format not in allowable_formats: + if (self.current_format not in allowable_formats and + self.current_format != 'device_db'): error_dialog(self, '', _('The {0} device does not support the {1} format.'). format(self.current_device, self.current_format), @@ -358,5 +359,5 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form): if __name__ == '__main__': from PyQt4.Qt import QApplication app = QApplication([]) - test_widget('Import/Export', 'plugboards') + test_widget('Import/Export', 'Plugboard') diff --git a/src/calibre/gui2/store/search/models.py b/src/calibre/gui2/store/search/models.py index 1fb0e5327b..1a2327fc45 100644 --- a/src/calibre/gui2/store/search/models.py +++ b/src/calibre/gui2/store/search/models.py @@ -22,6 +22,7 @@ from calibre.utils.icu import sort_key from calibre.utils.search_query_parser import SearchQueryParser def comparable_price(text): + text = re.sub(r'[^0-9.,]', '', text) if len(text) < 3 or text[-3] not in ('.', ','): text += '00' text = re.sub(r'\D', '', text) @@ -293,6 +294,7 @@ class SearchFilter(SearchQueryParser): return self.srs def get_matches(self, location, query): + query = query.strip() location = location.lower().strip() if location == 'authors': location = 'author' diff --git a/src/calibre/gui2/store/search/search.py b/src/calibre/gui2/store/search/search.py index f6fa423e23..9e78f75b4a 100644 --- a/src/calibre/gui2/store/search/search.py +++ b/src/calibre/gui2/store/search/search.py @@ -22,6 +22,7 @@ from calibre.gui2.store.search.adv_search_builder import AdvSearchBuilderDialog from calibre.gui2.store.search.download_thread import SearchThreadPool, \ CacheUpdateThreadPool from calibre.gui2.store.search.search_ui import Ui_Dialog +from calibre.utils.filenames import ascii_filename class SearchDialog(QDialog, Ui_Dialog): @@ -350,6 +351,7 @@ class SearchDialog(QDialog, Ui_Dialog): if d.exec_() == d.Accepted: ext = d.format() fname = result.title + '.' + ext.lower() + fname = ascii_filename(fname) self.gui.download_ebook(result.downloads[ext], filename=fname) def open_store(self, result): diff --git a/src/calibre/gui2/store/stores/smashwords_plugin.py b/src/calibre/gui2/store/stores/smashwords_plugin.py index 73700ed546..7a7e756a05 100644 --- a/src/calibre/gui2/store/stores/smashwords_plugin.py +++ b/src/calibre/gui2/store/stores/smashwords_plugin.py @@ -77,9 +77,12 @@ class SmashwordsStore(BasicStoreConfig, StorePlugin): title = ''.join(data.xpath('//a[@class="bookTitle"]/text()')) subnote = ''.join(data.xpath('//span[@class="subnote"]/text()')) author = ''.join(data.xpath('//span[@class="subnote"]/a/text()')) - price = subnote.partition('$')[2] - price = price.split(u'\xa0')[0] - price = '$' + price + if '$' in subnote: + price = subnote.partition('$')[2] + price = price.split(u'\xa0')[0] + price = '$' + price + else: + price = '$0.00' counter -= 1 diff --git a/src/calibre/manual/develop.rst b/src/calibre/manual/develop.rst index 506615914c..acf7cf1a6f 100644 --- a/src/calibre/manual/develop.rst +++ b/src/calibre/manual/develop.rst @@ -9,7 +9,7 @@ Setting up a |app| development environment This means that you are free to download and modify the program to your hearts content. In this section, you will learn how to get a |app| development environment setup on the operating system of your choice. |app| is written primarily in `Python `_ with some C/C++ code for speed and system interfacing. -Note that |app| is not compatible with Python 3 and requires at least Python 2.6. +Note that |app| is not compatible with Python 3 and requires at least Python 2.7. .. contents:: Contents :depth: 2 @@ -122,7 +122,7 @@ Setting this environment variable means that |app| will now load all its Python That's it! You are now ready to start hacking on the |app| code. For example, open the file :file:`src\\calibre\\__init__.py` in your favorite editor and add the line:: - print "Hello, world!" + print ("Hello, world!") near the top of the file. Now run the command :command:`calibredb`. The very first line of output should be ``Hello, world!``. @@ -151,7 +151,7 @@ Setting this environment variable means that |app| will now load all its Python That's it! You are now ready to start hacking on the |app| code. For example, open the file :file:`src/calibre/__init__.py` in your favorite editor and add the line:: - print "Hello, world!" + print ("Hello, world!") near the top of the file. Now run the command :command:`calibredb`. The very first line of output should be ``Hello, world!``. @@ -161,7 +161,8 @@ Linux development environment |app| is primarily developed on linux. You have two choices in setting up the development environment. You can install the |app| binary as normal and use that as a runtime environment to do your development. This approach is similar to that used in windows and OS X. Alternatively, you can install |app| from source. Instructions for setting up a development -environment from source are in the INSTALL file in the source tree. Here we will address using the binary a runtime. +environment from source are in the INSTALL file in the source tree. Here we will address using the binary a runtime, which is the +recommended method. Install the |app| using the binary installer. Then open a terminal and change to the previously checked out |app| code directory, for example:: @@ -183,7 +184,7 @@ Setting this environment variable means that |app| will now load all its Python That's it! You are now ready to start hacking on the |app| code. For example, open the file :file:`src/calibre/__init__.py` in your favorite editor and add the line:: - print "Hello, world!" + print ("Hello, world!") near the top of the file. Now run the command :command:`calibredb`. The very first line of output should be ``Hello, world!``. @@ -193,9 +194,9 @@ Having separate "normal" and "development" |app| installs on the same computer The calibre source tree is very stable, it rarely breaks, but if you feel the need to run from source on a separate test library and run the released calibre version with your everyday library, you can achieve this easily using .bat files or shell scripts to launch |app|. The example below shows how to do this on windows using .bat files (the -instructions for other platforms are the same, just use a BASh script instead of a .bat file) +instructions for other platforms are the same, just use a BASH script instead of a .bat file) -To launch the relase version of |app| with your everyday library: +To launch the release version of |app| with your everyday library: calibre-normal.bat:: diff --git a/src/calibre/ptempfile.py b/src/calibre/ptempfile.py index 01e8f18339..af34408a05 100644 --- a/src/calibre/ptempfile.py +++ b/src/calibre/ptempfile.py @@ -72,8 +72,19 @@ class PersistentTemporaryFile(object): prefix = "" if dir is None: dir = base_dir() - fd, name = tempfile.mkstemp(suffix, __appname__+"_"+ __version__+"_" + prefix, + try: + fd, name = tempfile.mkstemp(suffix, __appname__+"_"+ __version__+"_" + prefix, dir=dir) + except UnicodeDecodeError: + global _base_dir + from calibre.constants import filesystem_encoding + base_dir() + if not isinstance(_base_dir, unicode): + _base_dir = _base_dir.decode(filesystem_encoding) + dir = dir.decode(filesystem_encoding) + fd, name = tempfile.mkstemp(suffix, __appname__+"_"+ __version__+"_" + prefix, + dir=dir) + self._file = os.fdopen(fd, mode) self._name = name self._fd = fd diff --git a/src/calibre/utils/date.py b/src/calibre/utils/date.py index 2c973da224..99be4af47c 100644 --- a/src/calibre/utils/date.py +++ b/src/calibre/utils/date.py @@ -132,6 +132,14 @@ def as_local_time(date_time, assume_utc=True): _local_tz) return date_time.astimezone(_local_tz) +def as_utc(date_time, assume_utc=True): + if not hasattr(date_time, 'tzinfo'): + return date_time + if date_time.tzinfo is None: + date_time = date_time.replace(tzinfo=_utc_tz if assume_utc else + _local_tz) + return date_time.astimezone(_utc_tz) + def now(): return datetime.now().replace(tzinfo=_local_tz)