diff --git a/resources/images/news/zerohedge.png b/resources/images/news/zerohedge.png new file mode 100644 index 0000000000..a2bc6cde14 Binary files /dev/null and b/resources/images/news/zerohedge.png differ diff --git a/resources/recipes/expansion_spanish.recipe b/resources/recipes/expansion_spanish.recipe index 31a1504eb0..f2229e90e6 100644 --- a/resources/recipes/expansion_spanish.recipe +++ b/resources/recipes/expansion_spanish.recipe @@ -1,59 +1,79 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- - __license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic ' +__author__ = 'Gerardo Diez' +__copyright__ = 'Gerardo Diez' +description = 'Main daily newspaper from Spain - v1.00 (05, Enero 2011)' +__docformat__ = 'restructuredtext en' + ''' -www.expansion.com +expansion.es ''' +from calibre.web.feeds.recipes import BasicNewsRecipe +class Publico(BasicNewsRecipe): + title =u'Expansion.com' + __author__ ='Gerardo Diez' + publisher =u'Unidad Editorial Información Económica, S.L.' + category ='finances, catalunya' + oldest_article =1 + max_articles_per_feed =100 + simultaneous_downloads =10 + cover_url =u'http://estaticos01.expansion.com/iconos/v2.x/v2.0/cabeceras/logo_expansion.png' + timefmt ='[%A, %d %B, %Y]' + encoding ='latin' + language ='es' + remove_javascript =True + no_stylesheets =True + keep_only_tags =dict(name='div', attrs={'class':['noticia primer_elemento']}) + remove_tags =[ + dict(name='div', attrs={'class':['compartir', 'metadata_desarrollo_noticia', 'relacionadas', 'mas_info','publicidad publicidad_textlink', 'ampliarfoto']}), + dict(name='ul', attrs={'class':['bolos_desarrollo_noticia']}), + dict(name='span', attrs={'class':['comentarios']}), + dict(name='p', attrs={'class':['cintillo_comentarios', 'cintillo_comentarios formulario']}), + dict(name='div', attrs={'id':['comentarios_lectores_listado']}) + ] + feeds =[ + (u'Portada', u'http://estaticos.expansion.com/rss/portada.xml'), + (u'Portada: Bolsas', u'http://estaticos.expansion.com/rss/mercados.xml'), + (u'Divisas', u'http://estaticos.expansion.com/rss/mercadosdivisas.xml'), + (u'Euribor', u'http://estaticos.expansion.com/rss/mercadoseuribor.xml'), + (u'Materias Primas', u'http://estaticos.expansion.com/rss/mercadosmateriasprimas.xml'), + (u'Renta Fija', u'http://estaticos.expansion.com/rss/mercadosrentafija.xml'), -from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import Tag + (u'Portada: Mi Dinero', u'http://estaticos.expansion.com/rss/midinero.xml'), + (u'Hipotecas', u'http://estaticos.expansion.com/rss/midinerohipotecas.xml'), + (u'Créditos', u'http://estaticos.expansion.com/rss/midinerocreditos.xml'), + (u'Pensiones', u'http://estaticos.expansion.com/rss/midineropensiones.xml'), + (u'Fondos de Inversión', u'http://estaticos.expansion.com/rss/midinerofondos.xml'), + (u'Motor', u'http://estaticos.expansion.com/rss/midineromotor.xml'), -class Expansion(BasicNewsRecipe): - title = 'Diario Expansion' - __author__ = 'Darko Miletic' - description = 'Lider de informacion de mercados, economica y politica' - publisher = 'expansion.com' - category = 'news, politics, Spain' - oldest_article = 2 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - delay = 1 - encoding = 'iso-8859-15' - language = 'es' + (u'Portada: Empresas', u'http://estaticos.expansion.com/rss/empresas.xml'), + (u'Banca', u'http://estaticos.expansion.com/rss/empresasbanca.xml'), + (u'TMT', u'http://estaticos.expansion.com/rss/empresastmt.xml'), + (u'Energía', u'http://estaticos.expansion.com/rss/empresasenergia.xml'), + (u'Inmobiliario y Construcción', u'http://estaticos.expansion.com/rss/empresasinmobiliario.xml'), + (u'Transporte y Turismo', u'http://estaticos.expansion.com/rss/empresastransporte.xml'), + (u'Automoción e Industria', u'http://estaticos.expansion.com/rss/empresasauto-industria.xml'), + (u'Distribución', u'http://estaticos.expansion.com/rss/empresasdistribucion.xml'), + (u'Deporte y Negocio', u' http://estaticos.expansion.com/rss/empresasdeporte.xml'), + (u'Mi Negocio', u'http://estaticos.expansion.com/rss/empresasminegocio.xml'), + (u'Interiores', u'http://estaticos.expansion.com/rss/empresasinteriores.xml'), + (u'Digitech', u'http://estaticos.expansion.com/rss/empresasdigitech.xml'), - direction = 'ltr' + (u'Portada: Economía y Política', u'http://estaticos.expansion.com/rss/economiapolitica.xml'), + (u'Política', u'http://estaticos.expansion.com/rss/economia.xml'), + (u'Portada: Sociedad', u'http://estaticos.expansion.com/rss/entorno.xml'), - html2lrf_options = [ - '--comment' , description - , '--category' , category - , '--publisher', publisher - ] + (u'Portada: Opinión', u'http://estaticos.expansion.com/rss/opinion.xml'), + (u'Llaves y editoriales', u'http://estaticos.expansion.com/rss/opinioneditorialyllaves.xml'), + (u'Tribunas', u'http://estaticos.expansion.com/rss/opiniontribunas.xml'), - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + (u'Portada: Jurídico', u'http://estaticos.expansion.com/rss/juridico.xml'), + (u'Entrevistas', u'http://estaticos.expansion.com/rss/juridicoentrevistas.xml'), + (u'Opinión', u'http://estaticos.expansion.com/rss/juridicoopinion.xml'), + (u'Sentencias', u'http://estaticos.expansion.com/rss/juridicosentencias.xml'), - feeds = [ - (u'Ultimas noticias', u'http://rss.expansion.com/rss/descarga.htm?data2=178') - ,(u'Temas del dia' , u'http://rss.expansion.com/rss/descarga.htm?data2=178') - ] - - - keep_only_tags = [dict(name='div', attrs={'id':'principal'})] - - remove_tags = [ - dict(name=['object','link','script']) - ,dict(name='div', attrs={'class':['utilidades','tit_relacionadas']}) - ] - - remove_tags_after = [dict(name='div', attrs={'class':'tit_relacionadas'})] - - def preprocess_html(self, soup): - soup.html['dir' ] = self.direction - mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")]) - soup.head.insert(0,mcharset) - for item in soup.findAll(style=True): - del item['style'] - return soup + (u'Mujer', u'http://estaticos.expansion.com/rss/mujer-empresa.xml'), + (u'Cataluña', u'http://estaticos.expansion.com/rss/catalunya.xml'), + (u'Función pública', u'http://estaticos.expansion.com/rss/funcion-publica.xml') + ] diff --git a/resources/recipes/msnbc.recipe b/resources/recipes/msnbc.recipe index 6e2fc50aaa..6e58585341 100644 --- a/resources/recipes/msnbc.recipe +++ b/resources/recipes/msnbc.recipe @@ -1,10 +1,9 @@ __license__ = 'GPL v3' -__copyright__ = '2010, Darko Miletic ' +__copyright__ = '2010-2011, Darko Miletic ' ''' msnbc.msn.com ''' -import re from calibre.web.feeds.recipes import BasicNewsRecipe class MsNBC(BasicNewsRecipe): @@ -19,7 +18,16 @@ class MsNBC(BasicNewsRecipe): publisher = 'msnbc.com' category = 'news, USA, world' language = 'en' - extra_css = ' body{ font-family: sans-serif } .head{font-family: serif; font-size: xx-large; font-weight: bold; color: #CC0000} .abstract{font-weight: bold} .source{font-size: small} .updateTime{font-size: small} ' + extra_css = """ + body{ font-family: Georgia,Times,serif } + .hide{display: none} + .caption{font-family: Arial,sans-serif; font-size: x-small} + .entry-summary{font-family: Arial,sans-serif} + .copyright{font-size: 0.95em; font-style: italic} + .source-org{font-size: small; font-family: Arial,sans-serif} + img{display: block; margin-bottom: 0.5em} + span.byline{display: none} + """ conversion_options = { 'comments' : description @@ -28,14 +36,20 @@ class MsNBC(BasicNewsRecipe): ,'publisher': publisher } - preprocess_regexps = [ - (re.compile(r'', re.DOTALL|re.IGNORECASE),lambda match: '') - ,(re.compile(r'
', re.DOTALL|re.IGNORECASE),lambda match: '
'), - ] + remove_tags_before = dict(name='h1', attrs={'id':'headline'}) + remove_tags_after = dict(name='span', attrs={'class':['copyright','Linear copyright']}) + keep_only_tags=[ + dict(attrs={'id':['headline','deck','byline','source','intelliTXT']}) + ,dict(attrs={'class':['gl_headline','articleText','drawer-content Linear','v-center3','byline','textBodyBlack']}) + ] + remove_attributes=['property','lang','rel','xmlns:fb','xmlns:v','xmlns:dc','xmlns:dcmitype','xmlns:og','xmlns:media','xmlns:vcard','typeof','itemscope','itemtype','itemprop','about','type','size','width','height','onreadystatechange','data','border','hspace','vspace'] + + remove_tags = [ + dict(name=['iframe','object','link','embed','meta','table']) + ,dict(name='span', attrs={'class':['copyright','Linear copyright']}) + ,dict(name='div', attrs={'class':'social'}) + ] - remove_tags_before = dict(name='div', attrs={'class':'head'}) - remove_tags_after = dict(name='div', attrs={'class':'copyright'}) - remove_tags = [dict(name=['iframe','object','link','script','form'])] feeds = [ (u'US News' , u'http://rss.msnbc.msn.com/id/3032524/device/rss/rss.xml' ) @@ -48,11 +62,26 @@ class MsNBC(BasicNewsRecipe): ,(u'Tech & Science', u'http://rss.msnbc.msn.com/id/3032117/device/rss/rss.xml' ) ] - def print_version(self, url): - return url + 'print/1/displaymode/1098/' - def preprocess_html(self, soup): - for item in soup.head.findAll('div'): - item.extract() + for item in soup.body.findAll('html'): + item.name='div' + for item in soup.body.findAll('div'): + if item.has_key('id') and item['id'].startswith('vine-'): + item.extract() + if item.has_key('class') and ( item['class'].startswith('ad') or item['class'].startswith('vine')): + item.extract() + for item in soup.body.findAll('img'): + if not item.has_key('alt'): + item['alt'] = 'image' + for item in soup.body.findAll('ol'): + if item.has_key('class') and item['class'].startswith('grid'): + item.extract() + for item in soup.body.findAll('span'): + if ( item.has_key('id') and item['id'].startswith('byLine') and item.string is None) or ( item.has_key('class') and item['class'].startswith('inline') ): + item.extract() + for alink in soup.findAll('a'): + if alink.string is not None: + tstr = alink.string + alink.replaceWith(tstr) return soup diff --git a/resources/recipes/technology_review.recipe b/resources/recipes/technology_review.recipe index cc8f13733e..e7cc6700d7 100644 --- a/resources/recipes/technology_review.recipe +++ b/resources/recipes/technology_review.recipe @@ -35,7 +35,6 @@ class TechnologyReview(BasicNewsRecipe): def get_article_url(self, article): return article.get('guid', article.get('id', None)) - def print_version(self, url): baseurl='http://www.technologyreview.com/printer_friendly_article.aspx?id=' split1 = string.split(url,"/") @@ -43,3 +42,25 @@ class TechnologyReview(BasicNewsRecipe): split2= string.split(xxx,"/") s = baseurl + split2[0] return s + + + def postprocess_html(self,soup, True): + #remove picture + headerhtml = soup.find(True, {'class':'header'}) + headerhtml.replaceWith("") + + #remove close button + closehtml = soup.find(True, {'class':'close'}) + closehtml.replaceWith("") + + #remove banner advertisement + bannerhtml = soup.find(True, {'class':'bannerad'}) + bannerhtml.replaceWith("") + + #thanks kiklop74! This code removes all links from the text + for alink in soup.findAll('a'): + if alink.string is not None: + tstr = alink.string + alink.replaceWith(tstr) + + return soup diff --git a/resources/recipes/wired_daily.recipe b/resources/recipes/wired_daily.recipe index f06d28796e..df59c7c826 100644 --- a/resources/recipes/wired_daily.recipe +++ b/resources/recipes/wired_daily.recipe @@ -2,8 +2,10 @@ __license__ = 'GPL v3' __docformat__ = 'restructuredtext en' +import re from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.chardet import xml_to_unicode class Wired_Daily(BasicNewsRecipe): @@ -15,30 +17,43 @@ class Wired_Daily(BasicNewsRecipe): no_stylesheets = True + preprocess_regexps = [(re.compile(r'', re.DOTALL), lambda m: + '')] + remove_tags_before = dict(name='div', id='content') - remove_tags = [dict(id=['social_tools', 'outerWrapper', 'sidebar', - 'footer', 'advertisement', 'blog_subscription_unit', - 'brightcove_component']), - {'class':'entryActions'}, - dict(name=['noscript', 'script'])] + remove_tags = [dict(id=['header', 'commenting_module', 'post_nav', + 'social_tools', 'sidebar', 'footer', 'social_wishlist', 'pgwidget', + 'outerWrapper', 'inf_widget']), + {'class':['entryActions', 'advertisement', 'entryTags']}, + dict(name=['noscript', 'script']), + dict(name='h4', attrs={'class':re.compile(r'rat\d+')}), + {'class':lambda x: x and x.startswith('contentjump')}, + dict(name='li', attrs={'class':['entryCategories', 'entryEdit']})] + feeds = [ ('Top News', 'http://feeds.wired.com/wired/index'), - ('Culture', 'http://feeds.wired.com/wired/culture'), - ('Software', 'http://feeds.wired.com/wired/software'), - ('Mac', 'http://feeds.feedburner.com/cultofmac/bFow'), - ('Gadgets', 'http://feeds.wired.com/wired/gadgets'), - ('Cars', 'http://feeds.wired.com/wired/cars'), - ('Entertainment', 'http://feeds.wired.com/wired/entertainment'), - ('Gaming', 'http://feeds.wired.com/wired/gaming'), - ('Science', 'http://feeds.wired.com/wired/science'), - ('Med Tech', 'http://feeds.wired.com/wired/medtech'), - ('Politics', 'http://feeds.wired.com/wired/politics'), - ('Tech Biz', 'http://feeds.wired.com/wired/techbiz'), - ('Commentary', 'http://feeds.wired.com/wired/commentary'), + ('Product Reviews', + 'http://www.wired.com/reviews/feeds/latestProductsRss'), + ('Autopia', 'http://www.wired.com/autopia/feed/'), + ('Danger Room', 'http://www.wired.com/dangerroom/feed/'), + ('Epicenter', 'http://www.wired.com/epicenter/feed/'), + ('Gadget Lab', 'http://www.wired.com/gadgetlab/feed/'), + ('Geek Dad', 'http://www.wired.com/geekdad/feed/'), + ('Playbook', 'http://www.wired.com/playbook/feed/'), + ('Rawfile', 'http://www.wired.com/rawfile/feed/'), + ('This Day in Tech', 'http://www.wired.com/thisdayintech/feed/'), + ('Threat Level', 'http://www.wired.com/threatlevel/feed/'), + ('Underwire', 'http://www.wired.com/underwire/feed/'), + ('Web Monkey', 'http://www.webmonkey.com/feed/'), + ('Science', 'http://www.wired.com/wiredscience/feed/'), ] + def populate_article_metadata(self, article, soup, first): + if article.text_summary: + article.text_summary = xml_to_unicode(article.text_summary, + resolve_entities=True)[0] + def print_version(self, url): - return url.replace('http://www.wired.com/', 'http://www.wired.com/print/') - + return url + '/all/1' diff --git a/resources/recipes/zerohedge.recipe b/resources/recipes/zerohedge.recipe new file mode 100644 index 0000000000..09f62e5b52 --- /dev/null +++ b/resources/recipes/zerohedge.recipe @@ -0,0 +1,33 @@ +__license__ = 'GPL v3' +__copyright__ = '2011, Darko Miletic ' +''' +www.zerohedge.com +''' + +from calibre.web.feeds.recipes import BasicNewsRecipe + +class ZeroHedge(BasicNewsRecipe): + title = 'Zero Hedge' + __author__ = 'Darko Miletic' + description = 'On a long enough timeline the survival rate for everyone drops to zero' + oldest_article = 10 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = True + encoding = 'utf8' + publisher = 'zero hedge' + category = 'news, USA, world, economy, politics' + language = 'en' + masthead_url = 'http://www.zerohedge.com/themes/newsflash/logo.png' + publication_type = 'blog' + extra_css = 'body{ font-family: sans-serif }' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher': publisher + } + + + feeds = [(u'Articles', u'http://feeds.feedburner.com/zerohedge/feed')] diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py index 2585b5d081..a4f7439405 100644 --- a/src/calibre/__init__.py +++ b/src/calibre/__init__.py @@ -459,6 +459,18 @@ def force_unicode(obj, enc=preferred_encoding): obj = obj.decode('utf-8') return obj +def as_unicode(obj, enc=preferred_encoding): + if not isbytestring(obj): + try: + obj = unicode(obj) + except: + try: + obj = str(obj) + except: + obj = repr(obj) + return force_unicode(obj, enc=enc) + + def human_readable(size): """ Convert a size in bytes into a human readable form """ diff --git a/src/calibre/ebooks/metadata/rtf.py b/src/calibre/ebooks/metadata/rtf.py index ad41125575..c20d880a2f 100644 --- a/src/calibre/ebooks/metadata/rtf.py +++ b/src/calibre/ebooks/metadata/rtf.py @@ -10,7 +10,8 @@ from calibre.ebooks.metadata import MetaInformation, string_to_authors title_pat = re.compile(r'\{\\info.*?\{\\title(.*?)(? 6: - md += '}' + md.append(r'{\subject %s}'%(comment,)) + if options.publisher: + publisher = options.publisher.encode('ascii', 'ignore') + md.append(r'{\manager %s}'%(publisher,)) + if options.tags: + tags = u', '.join(options.tags) + tags = tags.encode('ascii', 'ignore') + md.append(r'{\category %s}'%(tags,)) + if len(md) > 1: + md.append('}') stream.seek(0) src = stream.read() - ans = src[:6] + md + src[6:] + ans = src[:6] + u''.join(md) + src[6:] stream.seek(0) stream.write(ans) @@ -156,7 +169,7 @@ def set_metadata(stream, options): base_pat = r'\{\\name(.*?)(?' +__docformat__ = 'restructuredtext en' + ''' Read content from txt file. @@ -10,10 +14,7 @@ from calibre import prepare_string_for_xml, isbytestring from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.txt.heuristicprocessor import TXTHeuristicProcessor from calibre.ebooks.conversion.preprocess import DocAnalysis - -__license__ = 'GPL v3' -__copyright__ = '2009, John Schember ' -__docformat__ = 'restructuredtext en' +from calibre.utils.cleantext import clean_ascii_chars HTML_TEMPLATE = u'%s\n%s\n' @@ -33,9 +34,7 @@ def clean_txt(txt): # Remove excessive line breaks. txt = re.sub('\n{3,}', '\n\n', txt) #remove ASCII invalid chars : 0 to 8 and 11-14 to 24 - chars = list(range(8)) + [0x0B, 0x0E, 0x0F] + list(range(0x10, 0x19)) - illegal_chars = re.compile(u'|'.join(map(unichr, chars))) - txt = illegal_chars.sub('', txt) + txt = clean_ascii_chars(txt) return txt diff --git a/src/calibre/gui2/catalog/catalog_bibtex.py b/src/calibre/gui2/catalog/catalog_bibtex.py index 5030cf6ec8..7b7739bb46 100644 --- a/src/calibre/gui2/catalog/catalog_bibtex.py +++ b/src/calibre/gui2/catalog/catalog_bibtex.py @@ -27,14 +27,17 @@ class PluginWidget(QWidget, Ui_Form): def __init__(self, parent=None): QWidget.__init__(self, parent) self.setupUi(self) - from calibre.library.catalog import FIELDS - self.all_fields = [] - for x in FIELDS : - if x != 'all': - self.all_fields.append(x) - QListWidgetItem(x, self.db_fields) def initialize(self, name, db): #not working properly to update + from calibre.library.catalog import FIELDS + + self.all_fields = [x for x in FIELDS if x != 'all'] + #add custom columns + self.all_fields.extend([x for x in sorted(db.custom_field_keys())]) + #populate + for x in self.all_fields: + QListWidgetItem(x, self.db_fields) + self.name = name fields = gprefs.get(name+'_db_fields', self.all_fields) # Restore the activated db_fields from last use diff --git a/src/calibre/gui2/dialogs/metadata_bulk.py b/src/calibre/gui2/dialogs/metadata_bulk.py index e1ee4327f3..5ea8f00148 100644 --- a/src/calibre/gui2/dialogs/metadata_bulk.py +++ b/src/calibre/gui2/dialogs/metadata_bulk.py @@ -15,7 +15,7 @@ from calibre.ebooks.metadata import string_to_authors, authors_to_string from calibre.ebooks.metadata.book.base import composite_formatter from calibre.ebooks.metadata.meta import get_metadata from calibre.gui2.custom_column_widgets import populate_metadata_page -from calibre.gui2 import error_dialog +from calibre.gui2 import error_dialog, ResizableDialog from calibre.gui2.progress_indicator import ProgressIndicator from calibre.utils.config import dynamic from calibre.utils.titlecase import titlecase @@ -49,7 +49,7 @@ def get_cover_data(path): -class MyBlockingBusy(QDialog): +class MyBlockingBusy(QDialog): # {{{ do_one_signal = pyqtSignal() @@ -241,8 +241,9 @@ class MyBlockingBusy(QDialog): self.current_index += 1 self.do_one_signal.emit() + # }}} -class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog): +class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog): s_r_functions = { '' : lambda x: x, _('Lower Case') : lambda x: icu_lower(x), @@ -261,9 +262,8 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog): ] def __init__(self, window, rows, model, tab): - QDialog.__init__(self, window) + ResizableDialog.__init__(self, window) Ui_MetadataBulkDialog.__init__(self) - self.setupUi(self) self.model = model self.db = model.db self.ids = [self.db.id(r) for r in rows] diff --git a/src/calibre/gui2/dialogs/metadata_bulk.ui b/src/calibre/gui2/dialogs/metadata_bulk.ui index 41858b099b..9240cd1af8 100644 --- a/src/calibre/gui2/dialogs/metadata_bulk.ui +++ b/src/calibre/gui2/dialogs/metadata_bulk.ui @@ -6,8 +6,8 @@ 0 0 - 752 - 633 + 850 + 650 @@ -17,8 +17,8 @@ :/images/edit_input.png:/images/edit_input.png - - + + @@ -28,818 +28,836 @@ - - - - 6 + + + + QFrame::NoFrame - + 0 - - - + + true + + + + + 0 + 0 + 842 + 589 + + + + 0 - - - &Basic metadata - - - - - - &Author(s): - - - Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter - - - authors - - - - - - - A&utomatically set author sort - - - - - - - Author s&ort: - - - Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter - - - author_sort - - - - - - - Specify how the author(s) of this book should be sorted. For example Charles Dickens should be sorted as Dickens, Charles. - - - - - - - &Rating: - - - Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter - - - rating - - - - - - - Rating of this book. 0-5 stars - - - Rating of this book. 0-5 stars - - - QAbstractSpinBox::PlusMinus - - - No change - - - stars - - - -1 - - - 5 - - - -1 - - - - - - - &Publisher: - - - Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter - - - publisher - - - - - - - true - - - - - - - Add ta&gs: - - - Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter - - - tags - - - - - - - Tags categorize the book. This is particularly useful while searching. <br><br>They can be any words or phrases, separated by commas. - - - - - - - Open Tag Editor - - - Open Tag Editor - - - - :/images/chapters.png:/images/chapters.png - - - - - - - &Remove tags: - - - remove_tags - - - - - - - Comma separated list of tags to remove from the books. - - - - - - - Check this box to remove all tags from the books. - - - Remove all - - - - - - - &Series: - - - Qt::PlainText - - - Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter - - - series - - - - - - - + + + + 0 + + + + &Basic metadata + + + + + + &Author(s): + + + Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter + + + authors + + + + + + + A&utomatically set author sort + + + + + + + Author s&ort: + + + Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter + + + author_sort + + + + + - List of known series. You can add new series. + Specify how the author(s) of this book should be sorted. For example Charles Dickens should be sorted as Dickens, Charles. + + + + + + + &Rating: + + + Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter + + + rating + + + + + + + Rating of this book. 0-5 stars - List of known series. You can add new series. + Rating of this book. 0-5 stars + + QAbstractSpinBox::PlusMinus + + + No change + + + stars + + + -1 + + + 5 + + + -1 + + + + + + + &Publisher: + + + Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter + + + publisher + + + + + true - - QComboBox::InsertAlphabetically + + + + + + Add ta&gs: - - QComboBox::AdjustToContents + + Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter + + + tags - - + + - If checked, the series will be cleared + Tags categorize the book. This is particularly useful while searching. <br><br>They can be any words or phrases, separated by commas. + + + + + + + Open Tag Editor - Clear series + Open Tag Editor + + + + :/images/chapters.png:/images/chapters.png - - - - Qt::Horizontal + + + + &Remove tags: - - - 20 - 0 - + + remove_tags - + - - - - - - + + - If not checked, the series number for the books will be set to 1. + Comma separated list of tags to remove from the books. + + + + + + + Check this box to remove all tags from the books. + + + Remove all + + + + + + + &Series: + + + Qt::PlainText + + + Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter + + + series + + + + + + + + + List of known series. You can add new series. + + + List of known series. You can add new series. + + + true + + + QComboBox::InsertAlphabetically + + + QComboBox::AdjustToContents + + + + + + + If checked, the series will be cleared + + + Clear series + + + + + + + Qt::Horizontal + + + + 20 + 0 + + + + + + + + + + + + If not checked, the series number for the books will be set to 1. If checked, selected books will be automatically numbered, in the order you selected them. So if you selected Book A and then Book B, Book A will have series number 1 and Book B series number 2. - - - Automatically number books in this series - - - - - - - false - - - Series will normally be renumbered from the highest number in the database + + + Automatically number books in this series + + + + + + + false + + + Series will normally be renumbered from the highest number in the database for that series. Checking this box will tell calibre to start numbering from the value in the box + + + Force numbers to start with + + + + + + + false + + + 1 + + + 990000 + + + 1 + + + + + + + Qt::Horizontal + + + + 20 + 10 + + + + + + + + + + Remove &format: + + + remove_format + + + + + + + + + + true + + + + + + + &Swap title and author + + + + + + + Force the title to be in title case. If both this and swap authors are checked, +title and author are swapped before the title case is set - Force numbers to start with + Change title to title case - - - - false - - - 1 - - - 990000 - - - 1 - - - - - - - Qt::Horizontal - - - - 20 - 10 - - - - - - - - - - Remove &format: - - - remove_format - - - - - - - - - - true - - - - - - - &Swap title and author - - - - - - - Force the title to be in title case. If both this and swap authors are checked, -title and author are swapped before the title case is set - - - Change title to title case - - - - - - - Remove stored conversion settings for the selected books. + + + + Remove stored conversion settings for the selected books. Future conversion of these books will use the default settings. - - - Remove &stored conversion settings for the selected books - - - - - - - Qt::Vertical - - - - 20 - 40 - - - - - - - - Change &cover - - - - - - &Generate default cover - - - - - - - &Remove cover - - - - - - - Set from &ebook file(s) - - - - - - - - - - - &Custom metadata - - - - - &Search and replace - - - - QLayout::SetMinimumSize - - - - - true - - - true - - - - - - - - - - - - - - Search &field: - - - search_field - - - - - - - The name of the field that you want to search - - - - - - - + - Search &mode: - - - search_mode + Remove &stored conversion settings for the selected books - - - - Choose whether to use basic text matching or advanced regular expression matching - - - - - + + - Qt::Horizontal + Qt::Vertical 20 - 10 + 40 - - - - - - Te&mplate: - - - s_r_template - - - - - - - - 100 - 0 - - - - Enter a template to be used as the source for the search/replace - - - - - - - &Search for: - - - search_for - - - - - - - - 100 - 0 - - - - Enter the what you are looking for, either plain text or a regular expression, depending on the mode - - - - - - - Check this box if the search string must match exactly upper and lower case. Uncheck it if case is to be ignored - - - Cas&e sensitive - - - true - - - - - - - &Replace with: - - - replace_with - - - - - - - The replacement text. The matched search text will be replaced with this string - - - - - - - - - &Apply function after replace: - - - replace_func + + + + Change &cover + + + + + &Generate default cover + + + + + + + &Remove cover + + + + + + + Set from &ebook file(s) + + + + - - - - Specify how the text is to be processed after matching and replacement. In character mode, the entire -field is processed. In regular expression mode, only the matched text is processed - - - - - - - Qt::Horizontal - - - - 20 - 10 - - - - - - - - - &Destination field: + + + + &Custom metadata + + + + + &Search and replace + + + + QLayout::SetMinimumSize - - destination_field - - - - - - - The field that the text will be put into after all replacements. -If blank, the source field is used if the field is modifiable - - - - - - - + + + + true + + + true + + + + + - M&ode: + + + + + + + + Search &field: - replace_mode + search_field - - + + - Specify how the text should be copied into the destination. + The name of the field that you want to search - - + + + + + + Search &mode: + + + search_mode + + + + + + + Choose whether to use basic text matching or advanced regular expression matching + + + + + + + Qt::Horizontal + + + + 20 + 10 + + + + + + + + + + Te&mplate: + + + s_r_template + + + + + + + + 100 + 0 + + - Specifies whether result items should be split into multiple values or -left as single values. This option has the most effect when the source field is -not multiple and the destination field is multiple + Enter a template to be used as the source for the search/replace + + + + + + + &Search for: + + + search_for + + + + + + + + 100 + 0 + + + + Enter the what you are looking for, either plain text or a regular expression, depending on the mode + + + + + + + Check this box if the search string must match exactly upper and lower case. Uncheck it if case is to be ignored - Split &result + Cas&e sensitive true - - - - Qt::Horizontal - - - - 20 - 10 - - - - - - - - - - - - Qt::Horizontal - - - - 20 - 0 - - - - - - + + - For multiple-valued fields, sho&w + &Replace with: - results_count + replace_with - - - - true - - - 1 - - - 999 - - - 999 - - - - - - - values starting a&t - - - starting_from - - - - - - - true - - - 1 - - - 999 - - - 1 - - - - - - - with values separated b&y - - - multiple_separator - - - - - + + - Used when displaying test results to separate values in multiple-valued fields + The replacement text. The matched search text will be replaced with this string - - - - - - QFrame::NoFrame - - - true - - - - - 0 - 0 - 726 - 334 - - - - - + + + + - Test text + &Apply function after replace: + + + replace_func - - - - Test result + + + + Specify how the text is to be processed after matching and replacement. In character mode, the entire +field is processed. In regular expression mode, only the matched text is processed - - - - Your test: - - - - - - - - - - - + + - Qt::Vertical + Qt::Horizontal 20 - 5 + 10 - - - - - - - - + + + + + &Destination field: + + + destination_field + + + + + + + The field that the text will be put into after all replacements. +If blank, the source field is used if the field is modifiable + + + + + + + + + M&ode: + + + replace_mode + + + + + + + Specify how the text should be copied into the destination. + + + + + + + Specifies whether result items should be split into multiple values or +left as single values. This option has the most effect when the source field is +not multiple and the destination field is multiple + + + Split &result + + + true + + + + + + + Qt::Horizontal + + + + 20 + 10 + + + + + + + + + + + + Qt::Horizontal + + + + 20 + 0 + + + + + + + + For multiple-valued fields, sho&w + + + results_count + + + + + + + true + + + 1 + + + 999 + + + 999 + + + + + + + values starting a&t + + + starting_from + + + + + + + true + + + 1 + + + 999 + + + 1 + + + + + + + with values separated b&y + + + multiple_separator + + + + + + + Used when displaying test results to separate values in multiple-valued fields + + + + + + + + + QFrame::NoFrame + + + true + + + + + 0 + 0 + 197 + 60 + + + + + + + Test text + + + + + + + Test result + + + + + + + Your test: + + + + + + + + + + + + + Qt::Vertical + + + + 20 + 5 + + + + + + + + + + + + + + + - + Qt::Horizontal @@ -893,7 +911,6 @@ not multiple and the destination field is multiple swap_title_and_author change_title_to_title_case button_box - central_widget search_field search_mode s_r_template diff --git a/src/calibre/gui2/dialogs/metadata_single.py b/src/calibre/gui2/dialogs/metadata_single.py index c2588f57a8..a4e8bb6972 100644 --- a/src/calibre/gui2/dialogs/metadata_single.py +++ b/src/calibre/gui2/dialogs/metadata_single.py @@ -823,7 +823,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog): if book.series_index is not None: self.series_index.setValue(book.series_index) if book.has_cover: - if d.opt_auto_download_cover.isChecked() and book.has_cover: + if d.opt_auto_download_cover.isChecked(): self.fetch_cover() else: self.fetch_cover_button.setFocus(Qt.OtherFocusReason) diff --git a/src/calibre/gui2/layout.py b/src/calibre/gui2/layout.py index aaaf1b0267..2edf19d0c4 100644 --- a/src/calibre/gui2/layout.py +++ b/src/calibre/gui2/layout.py @@ -8,9 +8,9 @@ __docformat__ = 'restructuredtext en' from functools import partial from PyQt4.Qt import QIcon, Qt, QWidget, QToolBar, QSize, \ - pyqtSignal, QToolButton, QPushButton, \ - QObject, QVBoxLayout, QSizePolicy, QLabel, QHBoxLayout, QActionGroup, \ - QMenu + pyqtSignal, QToolButton, QMenu, QCheckBox, \ + QObject, QVBoxLayout, QSizePolicy, QLabel, QHBoxLayout, QActionGroup + from calibre.constants import __appname__ from calibre.gui2.search_box import SearchBox2, SavedSearchBox @@ -178,7 +178,9 @@ class SearchBar(QWidget): # {{{ x.setToolTip(_("

Search the list of books by title, author, publisher, tags, comments, etc.

Words separated by spaces are ANDed")) l.addWidget(x) - self.search_button = QPushButton(_('&Go!')) + self.search_button = QToolButton() + self.search_button.setToolButtonStyle(Qt.ToolButtonTextOnly) + self.search_button.setText(_('&Go!')) l.addWidget(self.search_button) self.search_button.setSizePolicy(QSizePolicy.Minimum, QSizePolicy.Minimum) @@ -192,6 +194,12 @@ class SearchBar(QWidget): # {{{ l.addWidget(x) x.setToolTip(_("Reset Quick Search")) + x = parent.search_highlight_only = QCheckBox() + x.setText(_('&Highlight')) + x.setToolTip(_('Highlight matched books in the book list, instead ' + 'of restricting the book list to the matches.')) + l.addWidget(x) + x = parent.saved_search = SavedSearchBox(self) x.setMaximumSize(QSize(150, 16777215)) x.setMinimumContentsLength(15) diff --git a/src/calibre/gui2/library/models.py b/src/calibre/gui2/library/models.py index 49cb1ce182..eea452c238 100644 --- a/src/calibre/gui2/library/models.py +++ b/src/calibre/gui2/library/models.py @@ -10,7 +10,7 @@ from contextlib import closing from operator import attrgetter from PyQt4.Qt import QAbstractTableModel, Qt, pyqtSignal, QIcon, QImage, \ - QModelIndex, QVariant, QDate + QModelIndex, QVariant, QDate, QColor from calibre.gui2 import NONE, config, UNDEFINED_QDATE from calibre.utils.pyparsing import ParseException @@ -93,6 +93,9 @@ class BooksModel(QAbstractTableModel): # {{{ self.bool_no_icon = QIcon(I('list_remove.png')) self.bool_blank_icon = QIcon(I('blank.png')) self.device_connected = False + self.rows_matching = set() + self.lowest_row_matching = None + self.highlight_only = False self.read_config() def change_alignment(self, colname, alignment): @@ -229,9 +232,27 @@ class BooksModel(QAbstractTableModel): # {{{ self.endInsertRows() self.count_changed() + def set_highlight_only(self, toWhat): + self.highlight_only = toWhat + if self.last_search: + self.research() + def search(self, text, reset=True): try: - self.db.search(text) + if self.highlight_only: + self.db.search('') + if not text: + self.rows_matching = set() + self.lowest_row_matching = None + else: + self.rows_matching = self.db.search(text, return_matches=True) + if self.rows_matching: + self.lowest_row_matching = self.db.row(self.rows_matching[0]) + self.rows_matching = set(self.rows_matching) + else: + self.rows_matching = set() + self.lowest_row_matching = None + self.db.search(text) except ParseException as e: self.searched.emit(e.msg) return @@ -337,8 +358,9 @@ class BooksModel(QAbstractTableModel): # {{{ name, val = mi.format_field(key) if mi.metadata_for_field(key)['datatype'] == 'comments': name += ':html' - if val: + if val and name not in data: data[name] = val + return data @@ -651,6 +673,9 @@ class BooksModel(QAbstractTableModel): # {{{ return NONE if role in (Qt.DisplayRole, Qt.EditRole): return self.column_to_dc_map[col](index.row()) + elif role == Qt.BackgroundColorRole: + if self.id(index) in self.rows_matching: + return QColor('lightgreen') elif role == Qt.DecorationRole: if self.column_to_dc_decorator_map[col] is not None: return self.column_to_dc_decorator_map[index.column()](index.row()) diff --git a/src/calibre/gui2/library/views.py b/src/calibre/gui2/library/views.py index e1e9cf4456..ea2e03fdad 100644 --- a/src/calibre/gui2/library/views.py +++ b/src/calibre/gui2/library/views.py @@ -680,8 +680,14 @@ class BooksView(QTableView): # {{{ def set_editable(self, editable, supports_backloading): self._model.set_editable(editable) + def search_proxy(self, txt): + self._model.search(txt) + if self._model.lowest_row_matching is not None: + self.select_rows([self._model.lowest_row_matching], using_ids=False) + self.setFocus(Qt.OtherFocusReason) + def connect_to_search_box(self, sb, search_done): - sb.search.connect(self._model.search) + sb.search.connect(self.search_proxy) self._search_done = search_done self._model.searched.connect(self.search_done) diff --git a/src/calibre/gui2/preferences/toolbar.py b/src/calibre/gui2/preferences/toolbar.py index c13d956aea..26cdea19d3 100644 --- a/src/calibre/gui2/preferences/toolbar.py +++ b/src/calibre/gui2/preferences/toolbar.py @@ -37,7 +37,10 @@ class BaseModel(QAbstractListModel): dont_remove_from=set(['toolbar-device'])) if name is None: return FakeAction('--- '+_('Separator')+' ---', None) - return gui.iactions[name] + try: + return gui.iactions[name] + except: + return None def rowCount(self, parent): return len(self._data) @@ -124,7 +127,8 @@ class CurrentModel(BaseModel): BaseModel.__init__(self) self.gprefs_name = 'action-layout-'+key current = gprefs[self.gprefs_name] - self._data = [self.name_to_action(x, gui) for x in current] + self._data = [self.name_to_action(x, gui) for x in current] + self._data = [x for x in self._data if x is not None] self.key = key self.gui = gui diff --git a/src/calibre/gui2/search_box.py b/src/calibre/gui2/search_box.py index 9f74abfc86..e4073a01c9 100644 --- a/src/calibre/gui2/search_box.py +++ b/src/calibre/gui2/search_box.py @@ -16,6 +16,7 @@ from calibre.gui2 import config from calibre.gui2.dialogs.confirm_delete import confirm from calibre.gui2.dialogs.saved_search_editor import SavedSearchEditor from calibre.gui2.dialogs.search import SearchDialog +from calibre.utils.config import dynamic from calibre.utils.search_query_parser import saved_searches from calibre.utils.icu import sort_key @@ -375,6 +376,9 @@ class SearchBoxMixin(object): # {{{ unicode(self.search.toolTip()))) self.advanced_search_button.setStatusTip(self.advanced_search_button.toolTip()) self.clear_button.setStatusTip(self.clear_button.toolTip()) + self.search_highlight_only.stateChanged.connect(self.highlight_only_changed) + self.search_highlight_only.setChecked( + dynamic.get('search_highlight_only', False)) def focus_search_box(self, *args): self.search.setFocus(Qt.OtherFocusReason) @@ -401,6 +405,11 @@ class SearchBoxMixin(object): # {{{ def focus_to_library(self): self.current_view().setFocus(Qt.OtherFocusReason) + def highlight_only_changed(self, toWhat): + dynamic.set('search_highlight_only', toWhat) + self.current_view().model().set_highlight_only(toWhat) + self.focus_to_library() + # }}} class SavedSearchBoxMixin(object): # {{{ diff --git a/src/calibre/gui2/viewer/main.py b/src/calibre/gui2/viewer/main.py index 25f69b1558..6468cd88c6 100644 --- a/src/calibre/gui2/viewer/main.py +++ b/src/calibre/gui2/viewer/main.py @@ -26,6 +26,7 @@ from calibre.gui2.search_box import SearchBox2 from calibre.ebooks.metadata import MetaInformation from calibre.customize.ui import available_input_formats from calibre.gui2.viewer.dictionary import Lookup +from calibre import as_unicode class TOCItem(QStandardItem): @@ -632,7 +633,7 @@ class EbookViewer(MainWindow, Ui_EbookViewer): else: r = getattr(worker.exception, 'reason', worker.exception) error_dialog(self, _('Could not open ebook'), - unicode(r), det_msg=worker.traceback, show=True) + as_unicode(r), det_msg=worker.traceback, show=True) self.close_progress_indicator() else: self.metadata.show_opf(self.iterator.opf, os.path.splitext(pathtoebook)[1][1:]) diff --git a/src/calibre/library/caches.py b/src/calibre/library/caches.py index 6aef45dbbd..4168360d3a 100644 --- a/src/calibre/library/caches.py +++ b/src/calibre/library/caches.py @@ -411,7 +411,8 @@ class ResultCache(SearchQueryParser): # {{{ if isinstance(location, list): if allow_recursion: for loc in location: - matches |= self.get_matches(loc, query, allow_recursion=False) + matches |= self.get_matches(loc, query, candidates, + allow_recursion=False) return matches raise ParseException(query, len(query), 'Recursive query group detected', self) @@ -419,11 +420,11 @@ class ResultCache(SearchQueryParser): # {{{ fm = self.field_metadata[location] # take care of dates special case if fm['datatype'] == 'datetime': - return self.get_dates_matches(location, query.lower()) + return self.get_dates_matches(location, query.lower(), candidates) # take care of numbers special case if fm['datatype'] in ('rating', 'int', 'float'): - return self.get_numeric_matches(location, query.lower()) + return self.get_numeric_matches(location, query.lower(), candidates) # take care of the 'count' operator for is_multiples if fm['is_multiple'] and \ @@ -431,7 +432,8 @@ class ResultCache(SearchQueryParser): # {{{ query[1:1] in '=<>!': vf = lambda item, loc=fm['rec_index'], ms=fm['is_multiple']:\ len(item[loc].split(ms)) if item[loc] is not None else 0 - return self.get_numeric_matches(location, query[1:], val_func=vf) + return self.get_numeric_matches(location, query[1:], + candidates, val_func=vf) # everything else, or 'all' matches matchkind = CONTAINS_MATCH diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py index 98cc4b7ecd..d2913c8bf0 100644 --- a/src/calibre/library/catalog.py +++ b/src/calibre/library/catalog.py @@ -1531,10 +1531,23 @@ class EPUB_MOBI(CatalogPlugin): self.opts.header_note_source_field, index_is_id=True) if notes: - if field_md['datatype'] == 'text' and isinstance(notes,list): - notes = ' · '.join(notes) + if field_md['datatype'] == 'text': + print "\n inner notes: %s\n" % repr(notes) + if isinstance(notes,list): + notes = ' · '.join(notes) elif field_md['datatype'] == 'datetime': notes = format_date(notes,'dd MMM yyyy') + elif field_md['datatype'] == 'composite': + if re.match(r'\[(.*)\]$', notes): + # Sniff for special pseudo-list string "[]" + bracketed_content = re.match(r'\[(.*)\]$', notes).group(1) + if re.search(',',bracketed_content): + # Recast the comma-separated items as a list + items = bracketed_content.split(',') + items = [i.strip() for i in items] + notes = ' · '.join(items) + else: + notes = bracketed_content this_title['notes'] = {'source':field_md['name'], 'content':notes} diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index aa491aff28..5f66297322 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -341,10 +341,6 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): self.has_id = self.data.has_id self.count = self.data.count - # Count times get_metadata is called, and how many times in the cache - self.gm_count = 0 - self.gm_missed = 0 - for prop in ('author_sort', 'authors', 'comment', 'comments', 'isbn', 'publisher', 'rating', 'series', 'series_index', 'tags', 'title', 'timestamp', 'uuid', 'pubdate', 'ondevice'): @@ -710,6 +706,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): formats = row[fm['formats']] if not formats: formats = None + else: + formats = formats.split(',') mi.formats = formats tags = row[fm['tags']] if tags: diff --git a/src/calibre/trac/bzr_commit_plugin.py b/src/calibre/trac/bzr_commit_plugin.py index df6bf699d1..6c36115cae 100644 --- a/src/calibre/trac/bzr_commit_plugin.py +++ b/src/calibre/trac/bzr_commit_plugin.py @@ -110,6 +110,7 @@ class cmd_commit(_cmd_commit): suffix = 'The fix will be in the next release.' action = action+'ed' msg = '%s in branch %s. %s'%(action, nick, suffix) + msg = msg.replace('Fixesed', 'Fixed') server = xmlrpclib.ServerProxy(url) server.ticket.update(int(bug), msg, {'status':'closed', 'resolution':'fixed'}, diff --git a/src/calibre/utils/cleantext.py b/src/calibre/utils/cleantext.py index b4afe7576d..938960df93 100644 --- a/src/calibre/utils/cleantext.py +++ b/src/calibre/utils/cleantext.py @@ -3,7 +3,7 @@ __license__ = 'GPL 3' __copyright__ = '2010, sengian ' __docformat__ = 'restructuredtext en' -import re +import re, htmlentitydefs _ascii_pat = None @@ -21,3 +21,32 @@ def clean_ascii_chars(txt, charlist=None): pat = re.compile(u'|'.join(map(unichr, charlist))) return pat.sub('', txt) +## +# Fredrik Lundh: http://effbot.org/zone/re-sub.htm#unescape-html +# Removes HTML or XML character references and entities from a text string. +# +# @param text The HTML (or XML) source text. +# @return The plain text, as a Unicode string, if necessary. + +def unescape(text, rm=False, rchar=u''): + def fixup(m, rm=rm, rchar=rchar): + text = m.group(0) + if text[:2] == "&#": + # character reference + try: + if text[:3] == "&#x": + return unichr(int(text[3:-1], 16)) + else: + return unichr(int(text[2:-1])) + except ValueError: + pass + else: + # named entity + try: + text = unichr(htmlentitydefs.name2codepoint[text[1:-1]]) + except KeyError: + pass + if rm: + return rchar #replace by char + return text # leave as is + return re.sub("&#?\w+;", fixup, text) diff --git a/src/calibre/utils/formatter.py b/src/calibre/utils/formatter.py index 40760bf91b..0b5f1d1f52 100644 --- a/src/calibre/utils/formatter.py +++ b/src/calibre/utils/formatter.py @@ -18,6 +18,24 @@ class _Parser(object): LEX_NUM = 4 LEX_EOF = 5 + def _python(self, func): + locals = {} + exec func in locals + if 'evaluate' not in locals: + self.error('no evaluate function in python') + try: + result = locals['evaluate'](self.parent.kwargs) + if isinstance(result, (float, int)): + result = unicode(result) + elif isinstance(result, list): + result = ','.join(result) + elif isinstance(result, str): + result = unicode(result) + return result + except Exception as e: + self.error('python function threw exception: ' + e.msg) + + def _strcmp(self, x, y, lt, eq, gt): v = strcmp(x, y) if v < 0: @@ -79,6 +97,7 @@ class _Parser(object): 'field' : (1, lambda s, x: s.parent.get_value(x, [], s.parent.kwargs)), 'multiply' : (2, partial(_math, op='*')), 'print' : (-1, _print), + 'python' : (1, _python), 'strcat' : (-1, _concat), 'strcmp' : (5, _strcmp), 'substr' : (3, lambda s, x, y, z: x[int(y): len(x) if int(z) == 0 else int(z)]), @@ -362,7 +381,7 @@ class TemplateFormatter(string.Formatter): (r'\'.*?((?