diff --git a/resources/images/document-encrypt.png b/resources/images/document-encrypt.png new file mode 100644 index 0000000000..0774342024 Binary files /dev/null and b/resources/images/document-encrypt.png differ diff --git a/resources/images/news/zerohedge.png b/resources/images/news/zerohedge.png new file mode 100644 index 0000000000..a2bc6cde14 Binary files /dev/null and b/resources/images/news/zerohedge.png differ diff --git a/resources/recipes/expansion_spanish.recipe b/resources/recipes/expansion_spanish.recipe index 31a1504eb0..f2229e90e6 100644 --- a/resources/recipes/expansion_spanish.recipe +++ b/resources/recipes/expansion_spanish.recipe @@ -1,59 +1,79 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- - __license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic ' +__author__ = 'Gerardo Diez' +__copyright__ = 'Gerardo Diez' +description = 'Main daily newspaper from Spain - v1.00 (05, Enero 2011)' +__docformat__ = 'restructuredtext en' + ''' -www.expansion.com +expansion.es ''' +from calibre.web.feeds.recipes import BasicNewsRecipe +class Publico(BasicNewsRecipe): + title =u'Expansion.com' + __author__ ='Gerardo Diez' + publisher =u'Unidad Editorial Información Económica, S.L.' + category ='finances, catalunya' + oldest_article =1 + max_articles_per_feed =100 + simultaneous_downloads =10 + cover_url =u'http://estaticos01.expansion.com/iconos/v2.x/v2.0/cabeceras/logo_expansion.png' + timefmt ='[%A, %d %B, %Y]' + encoding ='latin' + language ='es' + remove_javascript =True + no_stylesheets =True + keep_only_tags =dict(name='div', attrs={'class':['noticia primer_elemento']}) + remove_tags =[ + dict(name='div', attrs={'class':['compartir', 'metadata_desarrollo_noticia', 'relacionadas', 'mas_info','publicidad publicidad_textlink', 'ampliarfoto']}), + dict(name='ul', attrs={'class':['bolos_desarrollo_noticia']}), + dict(name='span', attrs={'class':['comentarios']}), + dict(name='p', attrs={'class':['cintillo_comentarios', 'cintillo_comentarios formulario']}), + dict(name='div', attrs={'id':['comentarios_lectores_listado']}) + ] + feeds =[ + (u'Portada', u'http://estaticos.expansion.com/rss/portada.xml'), + (u'Portada: Bolsas', u'http://estaticos.expansion.com/rss/mercados.xml'), + (u'Divisas', u'http://estaticos.expansion.com/rss/mercadosdivisas.xml'), + (u'Euribor', u'http://estaticos.expansion.com/rss/mercadoseuribor.xml'), + (u'Materias Primas', u'http://estaticos.expansion.com/rss/mercadosmateriasprimas.xml'), + (u'Renta Fija', u'http://estaticos.expansion.com/rss/mercadosrentafija.xml'), -from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import Tag + (u'Portada: Mi Dinero', u'http://estaticos.expansion.com/rss/midinero.xml'), + (u'Hipotecas', u'http://estaticos.expansion.com/rss/midinerohipotecas.xml'), + (u'Créditos', u'http://estaticos.expansion.com/rss/midinerocreditos.xml'), + (u'Pensiones', u'http://estaticos.expansion.com/rss/midineropensiones.xml'), + (u'Fondos de Inversión', u'http://estaticos.expansion.com/rss/midinerofondos.xml'), + (u'Motor', u'http://estaticos.expansion.com/rss/midineromotor.xml'), -class Expansion(BasicNewsRecipe): - title = 'Diario Expansion' - __author__ = 'Darko Miletic' - description = 'Lider de informacion de mercados, economica y politica' - publisher = 'expansion.com' - category = 'news, politics, Spain' - oldest_article = 2 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - delay = 1 - encoding = 'iso-8859-15' - language = 'es' + (u'Portada: Empresas', u'http://estaticos.expansion.com/rss/empresas.xml'), + (u'Banca', u'http://estaticos.expansion.com/rss/empresasbanca.xml'), + (u'TMT', u'http://estaticos.expansion.com/rss/empresastmt.xml'), + (u'Energía', u'http://estaticos.expansion.com/rss/empresasenergia.xml'), + (u'Inmobiliario y Construcción', u'http://estaticos.expansion.com/rss/empresasinmobiliario.xml'), + (u'Transporte y Turismo', u'http://estaticos.expansion.com/rss/empresastransporte.xml'), + (u'Automoción e Industria', u'http://estaticos.expansion.com/rss/empresasauto-industria.xml'), + (u'Distribución', u'http://estaticos.expansion.com/rss/empresasdistribucion.xml'), + (u'Deporte y Negocio', u' http://estaticos.expansion.com/rss/empresasdeporte.xml'), + (u'Mi Negocio', u'http://estaticos.expansion.com/rss/empresasminegocio.xml'), + (u'Interiores', u'http://estaticos.expansion.com/rss/empresasinteriores.xml'), + (u'Digitech', u'http://estaticos.expansion.com/rss/empresasdigitech.xml'), - direction = 'ltr' + (u'Portada: Economía y Política', u'http://estaticos.expansion.com/rss/economiapolitica.xml'), + (u'Política', u'http://estaticos.expansion.com/rss/economia.xml'), + (u'Portada: Sociedad', u'http://estaticos.expansion.com/rss/entorno.xml'), - html2lrf_options = [ - '--comment' , description - , '--category' , category - , '--publisher', publisher - ] + (u'Portada: Opinión', u'http://estaticos.expansion.com/rss/opinion.xml'), + (u'Llaves y editoriales', u'http://estaticos.expansion.com/rss/opinioneditorialyllaves.xml'), + (u'Tribunas', u'http://estaticos.expansion.com/rss/opiniontribunas.xml'), - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + (u'Portada: Jurídico', u'http://estaticos.expansion.com/rss/juridico.xml'), + (u'Entrevistas', u'http://estaticos.expansion.com/rss/juridicoentrevistas.xml'), + (u'Opinión', u'http://estaticos.expansion.com/rss/juridicoopinion.xml'), + (u'Sentencias', u'http://estaticos.expansion.com/rss/juridicosentencias.xml'), - feeds = [ - (u'Ultimas noticias', u'http://rss.expansion.com/rss/descarga.htm?data2=178') - ,(u'Temas del dia' , u'http://rss.expansion.com/rss/descarga.htm?data2=178') - ] - - - keep_only_tags = [dict(name='div', attrs={'id':'principal'})] - - remove_tags = [ - dict(name=['object','link','script']) - ,dict(name='div', attrs={'class':['utilidades','tit_relacionadas']}) - ] - - remove_tags_after = [dict(name='div', attrs={'class':'tit_relacionadas'})] - - def preprocess_html(self, soup): - soup.html['dir' ] = self.direction - mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")]) - soup.head.insert(0,mcharset) - for item in soup.findAll(style=True): - del item['style'] - return soup + (u'Mujer', u'http://estaticos.expansion.com/rss/mujer-empresa.xml'), + (u'Cataluña', u'http://estaticos.expansion.com/rss/catalunya.xml'), + (u'Función pública', u'http://estaticos.expansion.com/rss/funcion-publica.xml') + ] diff --git a/resources/recipes/globe_and_mail.recipe b/resources/recipes/globe_and_mail.recipe index 4cc76688c1..22cb6fa5bb 100644 --- a/resources/recipes/globe_and_mail.recipe +++ b/resources/recipes/globe_and_mail.recipe @@ -8,12 +8,13 @@ __docformat__ = 'restructuredtext en' globeandmail.com ''' +import re + from calibre.web.feeds.news import BasicNewsRecipe class AdvancedUserRecipe1287083651(BasicNewsRecipe): title = u'Globe & Mail' - __license__ = 'GPL v3' - __author__ = 'Szing' + __author__ = 'Kovid Goyal' oldest_article = 2 no_stylesheets = True max_articles_per_feed = 100 @@ -38,24 +39,19 @@ class AdvancedUserRecipe1287083651(BasicNewsRecipe): (u'Sports', u'http://www.theglobeandmail.com/auto/?service=rss') ] - keep_only_tags = [ - dict(name='h1'), - dict(name='h2', attrs={'id':'articletitle'}), - dict(name='p', attrs={'class':['leadText', 'meta', 'leadImage', 'redtext byline', 'bodyText']}), - dict(name='div', attrs={'class':['news','articlemeta','articlecopy']}), - dict(name='id', attrs={'class':'article'}), - dict(name='table', attrs={'class':'todays-market'}), - dict(name='header', attrs={'id':'leadheader'}) - ] + preprocess_regexps = [ + (re.compile(r'', re.DOTALL), lambda m: ''), + (re.compile(r'', re.DOTALL), lambda m: ''), + ] + remove_tags_before = dict(name='h1') remove_tags = [ - dict(name='div', attrs={'id':['tabInside', 'ShareArticles', 'topStories']}) - ] - - #this has to be here or the text in the article appears twice. - remove_tags_after = [dict(id='article')] + dict(name='div', attrs={'id':['ShareArticles', 'topStories']}), + dict(href=lambda x: x and 'tracking=' in x), + {'class':['articleTools', 'pagination', 'Ads', 'topad', + 'breadcrumbs', 'footerNav', 'footerUtil', 'downloadlinks']}] #Use the mobile version rather than the web version def print_version(self, url): - return url + '&service=mobile' + return url.rpartition('?')[0] + '?service=mobile' diff --git a/resources/recipes/msnbc.recipe b/resources/recipes/msnbc.recipe index 6e2fc50aaa..6e58585341 100644 --- a/resources/recipes/msnbc.recipe +++ b/resources/recipes/msnbc.recipe @@ -1,10 +1,9 @@ __license__ = 'GPL v3' -__copyright__ = '2010, Darko Miletic ' +__copyright__ = '2010-2011, Darko Miletic ' ''' msnbc.msn.com ''' -import re from calibre.web.feeds.recipes import BasicNewsRecipe class MsNBC(BasicNewsRecipe): @@ -19,7 +18,16 @@ class MsNBC(BasicNewsRecipe): publisher = 'msnbc.com' category = 'news, USA, world' language = 'en' - extra_css = ' body{ font-family: sans-serif } .head{font-family: serif; font-size: xx-large; font-weight: bold; color: #CC0000} .abstract{font-weight: bold} .source{font-size: small} .updateTime{font-size: small} ' + extra_css = """ + body{ font-family: Georgia,Times,serif } + .hide{display: none} + .caption{font-family: Arial,sans-serif; font-size: x-small} + .entry-summary{font-family: Arial,sans-serif} + .copyright{font-size: 0.95em; font-style: italic} + .source-org{font-size: small; font-family: Arial,sans-serif} + img{display: block; margin-bottom: 0.5em} + span.byline{display: none} + """ conversion_options = { 'comments' : description @@ -28,14 +36,20 @@ class MsNBC(BasicNewsRecipe): ,'publisher': publisher } - preprocess_regexps = [ - (re.compile(r'', re.DOTALL|re.IGNORECASE),lambda match: '') - ,(re.compile(r'
', re.DOTALL|re.IGNORECASE),lambda match: '
'), - ] + remove_tags_before = dict(name='h1', attrs={'id':'headline'}) + remove_tags_after = dict(name='span', attrs={'class':['copyright','Linear copyright']}) + keep_only_tags=[ + dict(attrs={'id':['headline','deck','byline','source','intelliTXT']}) + ,dict(attrs={'class':['gl_headline','articleText','drawer-content Linear','v-center3','byline','textBodyBlack']}) + ] + remove_attributes=['property','lang','rel','xmlns:fb','xmlns:v','xmlns:dc','xmlns:dcmitype','xmlns:og','xmlns:media','xmlns:vcard','typeof','itemscope','itemtype','itemprop','about','type','size','width','height','onreadystatechange','data','border','hspace','vspace'] + + remove_tags = [ + dict(name=['iframe','object','link','embed','meta','table']) + ,dict(name='span', attrs={'class':['copyright','Linear copyright']}) + ,dict(name='div', attrs={'class':'social'}) + ] - remove_tags_before = dict(name='div', attrs={'class':'head'}) - remove_tags_after = dict(name='div', attrs={'class':'copyright'}) - remove_tags = [dict(name=['iframe','object','link','script','form'])] feeds = [ (u'US News' , u'http://rss.msnbc.msn.com/id/3032524/device/rss/rss.xml' ) @@ -48,11 +62,26 @@ class MsNBC(BasicNewsRecipe): ,(u'Tech & Science', u'http://rss.msnbc.msn.com/id/3032117/device/rss/rss.xml' ) ] - def print_version(self, url): - return url + 'print/1/displaymode/1098/' - def preprocess_html(self, soup): - for item in soup.head.findAll('div'): - item.extract() + for item in soup.body.findAll('html'): + item.name='div' + for item in soup.body.findAll('div'): + if item.has_key('id') and item['id'].startswith('vine-'): + item.extract() + if item.has_key('class') and ( item['class'].startswith('ad') or item['class'].startswith('vine')): + item.extract() + for item in soup.body.findAll('img'): + if not item.has_key('alt'): + item['alt'] = 'image' + for item in soup.body.findAll('ol'): + if item.has_key('class') and item['class'].startswith('grid'): + item.extract() + for item in soup.body.findAll('span'): + if ( item.has_key('id') and item['id'].startswith('byLine') and item.string is None) or ( item.has_key('class') and item['class'].startswith('inline') ): + item.extract() + for alink in soup.findAll('a'): + if alink.string is not None: + tstr = alink.string + alink.replaceWith(tstr) return soup diff --git a/resources/recipes/technology_review.recipe b/resources/recipes/technology_review.recipe index cc8f13733e..e7cc6700d7 100644 --- a/resources/recipes/technology_review.recipe +++ b/resources/recipes/technology_review.recipe @@ -35,7 +35,6 @@ class TechnologyReview(BasicNewsRecipe): def get_article_url(self, article): return article.get('guid', article.get('id', None)) - def print_version(self, url): baseurl='http://www.technologyreview.com/printer_friendly_article.aspx?id=' split1 = string.split(url,"/") @@ -43,3 +42,25 @@ class TechnologyReview(BasicNewsRecipe): split2= string.split(xxx,"/") s = baseurl + split2[0] return s + + + def postprocess_html(self,soup, True): + #remove picture + headerhtml = soup.find(True, {'class':'header'}) + headerhtml.replaceWith("") + + #remove close button + closehtml = soup.find(True, {'class':'close'}) + closehtml.replaceWith("") + + #remove banner advertisement + bannerhtml = soup.find(True, {'class':'bannerad'}) + bannerhtml.replaceWith("") + + #thanks kiklop74! This code removes all links from the text + for alink in soup.findAll('a'): + if alink.string is not None: + tstr = alink.string + alink.replaceWith(tstr) + + return soup diff --git a/resources/recipes/tyzden.recipe b/resources/recipes/tyzden.recipe index c206244ff6..b8d7389fbe 100644 --- a/resources/recipes/tyzden.recipe +++ b/resources/recipes/tyzden.recipe @@ -28,7 +28,7 @@ class TyzdenRecipe(BasicNewsRecipe): if (weeknum > 1): weeknum -= 1 - title = u'.tyzden ' + str(weeknum) + '/' + str(year) + title = u'tyzden' base_url_path = 'http://www.tyzden.sk/casopis/' + str(year) + '/' + str(weeknum) base_url = base_url_path + '.html' diff --git a/resources/recipes/wired_daily.recipe b/resources/recipes/wired_daily.recipe index f06d28796e..df59c7c826 100644 --- a/resources/recipes/wired_daily.recipe +++ b/resources/recipes/wired_daily.recipe @@ -2,8 +2,10 @@ __license__ = 'GPL v3' __docformat__ = 'restructuredtext en' +import re from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.chardet import xml_to_unicode class Wired_Daily(BasicNewsRecipe): @@ -15,30 +17,43 @@ class Wired_Daily(BasicNewsRecipe): no_stylesheets = True + preprocess_regexps = [(re.compile(r'', re.DOTALL), lambda m: + '')] + remove_tags_before = dict(name='div', id='content') - remove_tags = [dict(id=['social_tools', 'outerWrapper', 'sidebar', - 'footer', 'advertisement', 'blog_subscription_unit', - 'brightcove_component']), - {'class':'entryActions'}, - dict(name=['noscript', 'script'])] + remove_tags = [dict(id=['header', 'commenting_module', 'post_nav', + 'social_tools', 'sidebar', 'footer', 'social_wishlist', 'pgwidget', + 'outerWrapper', 'inf_widget']), + {'class':['entryActions', 'advertisement', 'entryTags']}, + dict(name=['noscript', 'script']), + dict(name='h4', attrs={'class':re.compile(r'rat\d+')}), + {'class':lambda x: x and x.startswith('contentjump')}, + dict(name='li', attrs={'class':['entryCategories', 'entryEdit']})] + feeds = [ ('Top News', 'http://feeds.wired.com/wired/index'), - ('Culture', 'http://feeds.wired.com/wired/culture'), - ('Software', 'http://feeds.wired.com/wired/software'), - ('Mac', 'http://feeds.feedburner.com/cultofmac/bFow'), - ('Gadgets', 'http://feeds.wired.com/wired/gadgets'), - ('Cars', 'http://feeds.wired.com/wired/cars'), - ('Entertainment', 'http://feeds.wired.com/wired/entertainment'), - ('Gaming', 'http://feeds.wired.com/wired/gaming'), - ('Science', 'http://feeds.wired.com/wired/science'), - ('Med Tech', 'http://feeds.wired.com/wired/medtech'), - ('Politics', 'http://feeds.wired.com/wired/politics'), - ('Tech Biz', 'http://feeds.wired.com/wired/techbiz'), - ('Commentary', 'http://feeds.wired.com/wired/commentary'), + ('Product Reviews', + 'http://www.wired.com/reviews/feeds/latestProductsRss'), + ('Autopia', 'http://www.wired.com/autopia/feed/'), + ('Danger Room', 'http://www.wired.com/dangerroom/feed/'), + ('Epicenter', 'http://www.wired.com/epicenter/feed/'), + ('Gadget Lab', 'http://www.wired.com/gadgetlab/feed/'), + ('Geek Dad', 'http://www.wired.com/geekdad/feed/'), + ('Playbook', 'http://www.wired.com/playbook/feed/'), + ('Rawfile', 'http://www.wired.com/rawfile/feed/'), + ('This Day in Tech', 'http://www.wired.com/thisdayintech/feed/'), + ('Threat Level', 'http://www.wired.com/threatlevel/feed/'), + ('Underwire', 'http://www.wired.com/underwire/feed/'), + ('Web Monkey', 'http://www.webmonkey.com/feed/'), + ('Science', 'http://www.wired.com/wiredscience/feed/'), ] + def populate_article_metadata(self, article, soup, first): + if article.text_summary: + article.text_summary = xml_to_unicode(article.text_summary, + resolve_entities=True)[0] + def print_version(self, url): - return url.replace('http://www.wired.com/', 'http://www.wired.com/print/') - + return url + '/all/1' diff --git a/resources/recipes/zerohedge.recipe b/resources/recipes/zerohedge.recipe new file mode 100644 index 0000000000..09f62e5b52 --- /dev/null +++ b/resources/recipes/zerohedge.recipe @@ -0,0 +1,33 @@ +__license__ = 'GPL v3' +__copyright__ = '2011, Darko Miletic ' +''' +www.zerohedge.com +''' + +from calibre.web.feeds.recipes import BasicNewsRecipe + +class ZeroHedge(BasicNewsRecipe): + title = 'Zero Hedge' + __author__ = 'Darko Miletic' + description = 'On a long enough timeline the survival rate for everyone drops to zero' + oldest_article = 10 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = True + encoding = 'utf8' + publisher = 'zero hedge' + category = 'news, USA, world, economy, politics' + language = 'en' + masthead_url = 'http://www.zerohedge.com/themes/newsflash/logo.png' + publication_type = 'blog' + extra_css = 'body{ font-family: sans-serif }' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher': publisher + } + + + feeds = [(u'Articles', u'http://feeds.feedburner.com/zerohedge/feed')] diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py index 2585b5d081..a4f7439405 100644 --- a/src/calibre/__init__.py +++ b/src/calibre/__init__.py @@ -459,6 +459,18 @@ def force_unicode(obj, enc=preferred_encoding): obj = obj.decode('utf-8') return obj +def as_unicode(obj, enc=preferred_encoding): + if not isbytestring(obj): + try: + obj = unicode(obj) + except: + try: + obj = str(obj) + except: + obj = repr(obj) + return force_unicode(obj, enc=enc) + + def human_readable(size): """ Convert a size in bytes into a human readable form """ diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index b1d760ea2d..9b22fb46ec 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -88,6 +88,7 @@ class Plumber(object): self.ui_reporter = report_progress self.abort_after_input_dump = abort_after_input_dump + # Pipeline options {{{ # Initialize the conversion options that are independent of input and # output formats. The input and output plugins can still disable these # options via recommendations. @@ -527,6 +528,7 @@ OptionRecommendation(name='timestamp', help=_('Set the book timestamp (used by the date column in calibre).')), ] + # }}} input_fmt = os.path.splitext(self.input)[1] if not input_fmt: diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py index 4dd6e7c7ae..796a94533a 100644 --- a/src/calibre/ebooks/fb2/fb2ml.py +++ b/src/calibre/ebooks/fb2/fb2ml.py @@ -16,7 +16,6 @@ import uuid from lxml import etree -from calibre import guess_type from calibre import prepare_string_for_xml from calibre.constants import __appname__, __version__ from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace @@ -41,7 +40,7 @@ class FB2MLizer(object): # in different directories. FB2 images are all in a flat layout so we rename all images # into a sequential numbering system to ensure there are no collisions between image names. self.image_hrefs = {} - # Mapping of toc items and their + # Mapping of toc items and their self.toc = {} # Used to see whether a new
needs to be opened self.section_level = 0 @@ -51,7 +50,7 @@ class FB2MLizer(object): self.oeb_book = oeb_book self.opts = opts self.reset_state() - + # Used for adding
s and s to allow readers # to generate toc from the document. if self.opts.sectionize == 'toc': @@ -75,20 +74,20 @@ class FB2MLizer(object): text = re.sub(r'(?miu)<p>\s*</p>', '', text) text = re.sub(r'(?miu)\s*</p>', '</p>', text) text = re.sub(r'(?miu)</p>\s*<p>', '</p>\n\n<p>', text) - + text = re.sub(r'(?miu)<title>\s*', '', text) text = re.sub(r'(?miu)\s+', '', text) - + text = re.sub(r'(?miu)
\s*
', '', text) text = re.sub(r'(?miu)\s*
', '\n
', text) text = re.sub(r'(?miu)\s*', '\n\n', text) text = re.sub(r'(?miu)\s*
', '\n
', text) text = re.sub(r'(?miu)
\s*', '
\n', text) text = re.sub(r'(?miu)
', '
\n\n
', text) - + if self.opts.insert_blank_line: text = re.sub(r'(?miu)

', '

', text) - + return text def fb2_header(self): @@ -102,6 +101,7 @@ class FB2MLizer(object): metadata['date'] = '%i.%i.%i' % (datetime.now().day, datetime.now().month, datetime.now().year) metadata['lang'] = u''.join(self.oeb_book.metadata.lang) if self.oeb_book.metadata.lang else 'en' metadata['id'] = None + metadata['cover'] = self.get_cover() author_parts = self.oeb_book.metadata.creator[0].value.split(' ') if len(author_parts) == 1: @@ -121,10 +121,11 @@ class FB2MLizer(object): break if metadata['id'] is None: self.log.warn('No UUID identifier found') - metadata['id'] = str(uuid.uuid4()) + metadata['id'] = str(uuid.uuid4()) for key, value in metadata.items(): - metadata[key] = prepare_string_for_xml(value) + if not key == 'cover': + metadata[key] = prepare_string_for_xml(value) return u'' \ '' \ @@ -136,6 +137,7 @@ class FB2MLizer(object): '%(author_last)s' \ '' \ '%(title)s' \ + '%(cover)s' \ '%(lang)s' \ '' \ '' \ @@ -154,48 +156,64 @@ class FB2MLizer(object): def fb2_footer(self): return u'' + def get_cover(self): + cover_href = None + + # Get the raster cover if it's available. + if self.oeb_book.metadata.cover and unicode(self.oeb_book.metadata.cover[0]) in self.oeb_book.manifest.ids: + id = unicode(self.oeb_book.metadata.cover[0]) + cover_item = self.oeb_book.manifest.ids[id] + if cover_item.media_type in OEB_RASTER_IMAGES: + cover_href = cover_item.href + else: + # Figure out if we have a title page or a cover page + page_name = '' + if 'titlepage' in self.oeb_book.guide: + page_name = 'titlepage' + elif 'cover' in self.oeb_book.guide: + page_name = 'cover' + + if page_name: + cover_item = self.oeb_book.manifest.hrefs[self.oeb_book.guide[page_name].href] + # Get the first image in the page + for img in cover_item.xpath('//img'): + cover_href = cover_item.abshref(img.get('src')) + break + + if cover_href: + # Only write the image tag if it is in the manifest. + if cover_href in self.oeb_book.manifest.hrefs.keys(): + if cover_href not in self.image_hrefs.keys(): + self.image_hrefs[cover_href] = '_%s.jpg' % len(self.image_hrefs.keys()) + return u'' % self.image_hrefs[cover_href] + + return u'' + def get_text(self): text = [''] - + # Create main section if there are no others to create if self.opts.sectionize == 'nothing': text.append('
') self.section_level += 1 - - # Insert the title page / cover into the spine if it is not already referenced. - title_name = u'' - if 'titlepage' in self.oeb_book.guide: - title_name = 'titlepage' - elif 'cover' in self.oeb_book.guide: - title_name = 'cover' - if title_name: - title_item = self.oeb_book.manifest.hrefs[self.oeb_book.guide[title_name].href] - if title_item.spine_position is None and title_item.media_type == 'application/xhtml+xml': - self.oeb_book.spine.insert(0, title_item, True) - # Create xhtml page to reference cover image so it can be used. - if not title_name and self.oeb_book.metadata.cover and unicode(self.oeb_book.metadata.cover[0]) in self.oeb_book.manifest.ids: - id = unicode(self.oeb_book.metadata.cover[0]) - cover_item = self.oeb_book.manifest.ids[id] - if cover_item.media_type in OEB_RASTER_IMAGES: - self.insert_image_cover(cover_item.href) - + for item in self.oeb_book.spine: self.log.debug('Converting %s to FictionBook2 XML' % item.href) stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile) - + # Start a
if we must sectionize each file or if the TOC references this page page_section_open = False if self.opts.sectionize == 'files' or self.toc.get(item.href) == 'page': text.append('
') page_section_open = True self.section_level += 1 - + text += self.dump_text(item.data.find(XHTML('body')), stylizer, item) - + if page_section_open: text.append('
') self.section_level -= 1 - + # Close any open sections while self.section_level > 0: text.append('
') @@ -203,17 +221,6 @@ class FB2MLizer(object): return ''.join(text) + '' - def insert_image_cover(self, image_href): - from calibre.ebooks.oeb.base import RECOVER_PARSER - try: - root = etree.fromstring(u'' % (XHTML_NS, image_href), parser=RECOVER_PARSER) - except: - root = etree.fromstring(u'', parser=RECOVER_PARSER) - - id, href = self.oeb_book.manifest.generate('fb2_cover', 'fb2_cover.xhtml') - item = self.oeb_book.manifest.add(id, href, guess_type(href)[0], data=root) - self.oeb_book.spine.insert(0, item, True) - def fb2mlize_images(self): ''' This function uses the self.image_hrefs dictionary mapping. It is populated by the dump_text function. @@ -345,7 +352,7 @@ class FB2MLizer(object): self.toc[page.href] = None elif toc_entry and elem_tree.attrib.get('id', None): newlevel = toc_entry.get(elem_tree.attrib.get('id', None), None) - + # Start a new section if necessary if newlevel: if not (newlevel > self.section_level): diff --git a/src/calibre/ebooks/metadata/rtf.py b/src/calibre/ebooks/metadata/rtf.py index 3e316ee430..70f6effcda 100644 --- a/src/calibre/ebooks/metadata/rtf.py +++ b/src/calibre/ebooks/metadata/rtf.py @@ -85,42 +85,42 @@ def get_metadata(stream): """ Return metadata as a L{MetaInfo} object """ stream.seek(0) if stream.read(5) != r'{\rtf': - return MetaInformation(_('Unknown'), None) + return MetaInformation(_('Unknown')) block = get_document_info(stream)[0] if not block: - return MetaInformation(_('Unknown'), None) + return MetaInformation(_('Unknown')) stream.seek(0) cpg = detect_codepage(stream) stream.seek(0) title_match = title_pat.search(block) - if title_match: + if title_match is not None: title = decode(title_match.group(1).strip(), cpg) else: title = _('Unknown') author_match = author_pat.search(block) - if author_match: + if author_match is not None: author = decode(author_match.group(1).strip(), cpg) else: author = None - mi = MetaInformation(title, author) + mi = MetaInformation(title) if author: mi.authors = string_to_authors(author) - + comment_match = comment_pat.search(block) - if comment_match: + if comment_match is not None: comment = decode(comment_match.group(1).strip(), cpg) mi.comments = comment tags_match = tags_pat.search(block) - if tags_match: + if tags_match is not None: tags = decode(tags_match.group(1).strip(), cpg) mi.tags = tags publisher_match = publisher_pat.search(block) - if publisher_match: + if publisher_match is not None: publisher = decode(publisher_match.group(1).strip(), cpg) mi.publisher = publisher - + return mi def create_metadata(stream, options): @@ -149,7 +149,7 @@ def create_metadata(stream, options): md.append('}') stream.seek(0) src = stream.read() - ans = src[:6] + ''.join(md) + src[6:] + ans = src[:6] + u''.join(md) + src[6:] stream.seek(0) stream.write(ans) @@ -197,7 +197,7 @@ def set_metadata(stream, options): tags = options.tags if tags is not None: tags = ', '.join(tags) - tags = tags.encode('ascii', 'ignore') + tags = tags.encode('ascii', 'replace') pat = re.compile(base_pat.replace('name', 'category'), re.DOTALL) if pat.search(src): src = pat.sub(r'{\\category ' + tags + r'}', src) diff --git a/src/calibre/ebooks/rtf/input.py b/src/calibre/ebooks/rtf/input.py index 915ca55fc1..e979f9949e 100644 --- a/src/calibre/ebooks/rtf/input.py +++ b/src/calibre/ebooks/rtf/input.py @@ -77,19 +77,16 @@ class RTFInput(InputFormatPlugin): def generate_xml(self, stream): from calibre.ebooks.rtf2xml.ParseRtf import ParseRtf - ofile = 'dataxml.xml' - run_lev = 1 - if hasattr(self.opts, 'debug_pipeline'): + ofile = 'dataxml.xml' + run_lev, debug_dir = 1, None + #just to check if the debug process is lauched, no need of this directory in fact + if getattr(self.opts, 'debug_pipeline', None) is not None: try: + os.mkdir('rtfdebug') debug_dir = 'rtfdebug' - os.mkdir(debug_dir) run_lev = 4 - except OSError, ( errno, strerror ): - print strerror - print errno - debug_dir = None - else: - debug_dir = None + except: + pass parser = ParseRtf( in_file = stream, out_file = ofile, @@ -127,32 +124,38 @@ class RTFInput(InputFormatPlugin): # Write or do not write paragraphs. Default is 0. empty_paragraphs = 1, - + #debug deb_dir = debug_dir, run_level = run_lev, ) parser.parse_rtf() - ans = open('dataxml.xml').read() - return ans + with open(ofile, 'rb') as f: + return f.read() def extract_images(self, picts): + import imghdr self.log('Extracting images...') - - raw = open(picts, 'rb').read() + + with open(picts, 'rb') as f: + raw = f.read() picts = filter(len, re.findall(r'\{\\pict([^}]+)\}', raw)) - hex = re.compile(r'[^a-zA-Z0-9]') + hex = re.compile(r'[^a-fA-F0-9]') encs = [hex.sub('', pict) for pict in picts] - + count = 0 imap = {} for enc in encs: if len(enc) % 2 == 1: enc = enc[:-1] data = enc.decode('hex') + fmt = imghdr.what(None, data) + if fmt is None: + fmt = 'wmf' count += 1 - name = '%04d.wmf' % count - open(name, 'wb').write(data) + name = '%04d.%s' % (count, fmt) + with open(name, 'wb') as f: + f.write(data) imap[count] = name #open(name+'.hex', 'wb').write(enc) return self.convert_images(imap) @@ -183,6 +186,7 @@ class RTFInput(InputFormatPlugin): # return self.convert_images(imap) def convert_images(self, imap): + self.default_img = None for count, val in imap.iteritems(): try: imap[count] = self.convert_image(val) @@ -191,11 +195,35 @@ class RTFInput(InputFormatPlugin): return imap def convert_image(self, name): - from calibre.utils.magick import Image - img = Image() - img.open(name) + if not name.endswith('.wmf'): + return name + try: + return self.rasterize_wmf(name) + except: + self.log.exception('Failed to convert WMF image %r'%name) + return self.replace_wmf(name) + + def replace_wmf(self, name): + from calibre.ebooks import calibre_cover + if self.default_img is None: + self.default_img = calibre_cover('Conversion of WMF images is not supported', + 'Use Microsoft Word or OpenOffice to save this RTF file' + ' as HTML and convert that in calibre.', title_size=36, + author_size=20) name = name.replace('.wmf', '.jpg') - img.save(name) + with open(name, 'wb') as f: + f.write(self.default_img) + return name + + def rasterize_wmf(self, name): + raise ValueError('Conversion of WMF images not supported') + from calibre.utils.wmf import extract_raster_image + with open(name, 'rb') as f: + data = f.read() + data = extract_raster_image(data) + name = name.replace('.wmf', '.jpg') + with open(name, 'wb') as f: + f.write(data) return name @@ -285,6 +313,7 @@ class RTFInput(InputFormatPlugin): try: xml = self.generate_xml(stream.name) except RtfInvalidCodeException, e: + raise raise ValueError(_('This RTF file has a feature calibre does not ' 'support. Convert it to HTML first and then try it.\n%s')%e) diff --git a/src/calibre/ebooks/rtf2xml/ParseRtf.py b/src/calibre/ebooks/rtf2xml/ParseRtf.py index e994513c68..902ad09c30 100755 --- a/src/calibre/ebooks/rtf2xml/ParseRtf.py +++ b/src/calibre/ebooks/rtf2xml/ParseRtf.py @@ -107,7 +107,7 @@ class ParseRtf: no_dtd = 0, char_data = '', ): - + """ Requires: 'file' --file to parse @@ -124,7 +124,7 @@ class ParseRtf: through a file. Only for debugging. Returns: Nothing """ - + self.__file = in_file self.__out_file = out_file self.__out_dir = out_dir @@ -155,12 +155,12 @@ class ParseRtf: if hasattr(the_file, 'read'): return if the_file == None: if type == "file_to_parse": - msg = _("\nYou must provide a file for the script to work") + msg = "\nYou must provide a file for the script to work" raise RtfInvalidCodeException, msg elif os.path.exists(the_file): pass # do nothing else: - msg = _("\nThe file '%s' cannot be found") % the_file + msg = "\nThe file '%s' cannot be found" % the_file raise RtfInvalidCodeException, msg def __check_dir(self, the_dir): @@ -169,7 +169,7 @@ class ParseRtf: return dir_exists = os.path.isdir(the_dir) if not dir_exists: - msg = _("\n%s is not a directory") % the_dir + msg = "\n%s is not a directory" % the_dir raise RtfInvalidCodeException, msg return 1 @@ -247,7 +247,7 @@ class ParseRtf: if check_encoding_obj.check_encoding(self.__file, enc): file_name = self.__file if isinstance(self.__file, str) \ else self.__file.encode('utf-8') - msg = _('File %s does not appear to be correctly encoded.\n') % file_name + msg = 'File %s does not appear to be correctly encoded.\n' % file_name raise InvalidRtfException, msg delete_info_obj = delete_info.DeleteInfo( in_file = self.__temp_file, @@ -542,7 +542,7 @@ class ParseRtf: pass #sys.stderr.write( msg + ' in ' + file_name + "\n") else: - msg = _('%s in file %s\n') % (msg, file_name) + msg = '%s in file %s\n' % (msg, file_name) raise RtfInvalidCodeException, msg def __return_code(self, num): @@ -558,4 +558,4 @@ class ParseRtf: with open(write_file, 'wb') as write_obj: for line in read_obj: write_obj.write(line) - return write_file \ No newline at end of file + return write_file diff --git a/src/calibre/ebooks/rtf2xml/check_brackets.py b/src/calibre/ebooks/rtf2xml/check_brackets.py index 40f9f7e13f..8089620999 100755 --- a/src/calibre/ebooks/rtf2xml/check_brackets.py +++ b/src/calibre/ebooks/rtf2xml/check_brackets.py @@ -54,7 +54,7 @@ class CheckBrackets: return (False, "closed bracket doesn't match, line %s" % line_count) if self.__bracket_count != 0: - msg = _('At end of file open and closed brackets don\'t match\n' \ + msg = ('At end of file open and closed brackets don\'t match\n' \ 'total number of brackets is %s') % self.__bracket_count return (False, msg) - return (True, _("Brackets match!")) + return (True, "Brackets match!") diff --git a/src/calibre/ebooks/rtf2xml/check_encoding.py b/src/calibre/ebooks/rtf2xml/check_encoding.py index 7a7b842db6..0f52320aea 100755 --- a/src/calibre/ebooks/rtf2xml/check_encoding.py +++ b/src/calibre/ebooks/rtf2xml/check_encoding.py @@ -13,10 +13,10 @@ class CheckEncoding: try: char.decode(encoding) except UnicodeError, msg: - sys.stderr.write(_('line: %s char: %s\n') % (line_num, char_position)) + sys.stderr.write('line: %s char: %s\n' % (line_num, char_position)) sys.stderr.write(str(msg) + '\n') - def check_encoding(self, path, encoding='us-ascii', verbose = True): + def check_encoding(self, path, encoding='us-ascii', verbose=True): line_num = 0 with open(path, 'r') as read_obj: for line in read_obj: @@ -28,7 +28,7 @@ class CheckEncoding: if len(line) < 1000: self.__get_position_error(line, encoding, line_num) else: - sys.stderr.write(_('line: %d has bad encoding\n') % line_num) + sys.stderr.write('line: %d has bad encoding\n' % line_num) return True return False diff --git a/src/calibre/ebooks/rtf2xml/combine_borders.py b/src/calibre/ebooks/rtf2xml/combine_borders.py index a0bc77e7ad..eaf09d0842 100755 --- a/src/calibre/ebooks/rtf2xml/combine_borders.py +++ b/src/calibre/ebooks/rtf2xml/combine_borders.py @@ -78,14 +78,14 @@ class CombineBorders: self.add_to_border_desc(line) def combine_borders(self): - with open(self.__file, 'r') as read_obj, \ - open(self.__write_to, 'w') as write_obj: - for line in read_obj: - self.__first_five = line[0:5] - if self.__state == 'border': - self.__border_func(line, write_obj) - else: - write_obj.write(self.__default_func(line)) + with open(self.__file, 'r') as read_obj: + with open(self.__write_to, 'w') as write_obj: + for line in read_obj: + self.__first_five = line[0:5] + if self.__state == 'border': + self.__border_func(line, write_obj) + else: + write_obj.write(self.__default_func(line)) copy_obj = copy.Copy(bug_handler = self.__bug_handler) if self.__copy: copy_obj.copy_file(self.__write_to, "combine_borders.data") diff --git a/src/calibre/ebooks/rtf2xml/convert_to_tags.py b/src/calibre/ebooks/rtf2xml/convert_to_tags.py index 67689eb2d1..6927537474 100755 --- a/src/calibre/ebooks/rtf2xml/convert_to_tags.py +++ b/src/calibre/ebooks/rtf2xml/convert_to_tags.py @@ -1,4 +1,4 @@ -import os, tempfile +import os, tempfile, sys from calibre.ebooks.rtf2xml import copy, check_encoding @@ -208,15 +208,16 @@ class ConvertToTags: """ #keep maximum compatibility with previous version check_encoding_obj = check_encoding.CheckEncoding( - bug_handler = self.__bug_handler, - ) - if not check_encoding_obj.check_encoding(self.__file, verbose = False): + bug_handler=self.__bug_handler) + + if not check_encoding_obj.check_encoding(self.__file, verbose=False): self.__write_obj.write('') elif not check_encoding_obj.check_encoding(self.__file, self.__encoding): self.__write_obj.write('' % self.__encoding) else: self.__write_obj.write('') - sys.stderr.write(_('Bad RTF encoding, revert to US-ASCII chars and hope for the best')) + sys.stderr.write('Bad RTF encoding, revert to US-ASCII chars and' + ' hope for the best') self.__new_line = 0 self.__write_new_line() if self.__no_dtd: diff --git a/src/calibre/ebooks/rtf2xml/default_encoding.py b/src/calibre/ebooks/rtf2xml/default_encoding.py index c7e030e48b..aec33943a9 100755 --- a/src/calibre/ebooks/rtf2xml/default_encoding.py +++ b/src/calibre/ebooks/rtf2xml/default_encoding.py @@ -3,7 +3,6 @@ # copyright 2002 Paul Henry Tremblay # # # ######################################################################### - ''' Codepages as to RTF 1.9.1: 437 United States IBM @@ -79,7 +78,7 @@ class DefaultEncoding: else: code_page = 'ansicpg' + self.__code_page return self.__platform, code_page, self.__default_num - + def get_codepage(self): if not self.__datafetched: self._encoding() @@ -91,7 +90,7 @@ class DefaultEncoding: self._encoding() self.__datafetched = True return self.__platform - + def _encoding(self): with open(self.__file, 'r') as read_obj: if not self.__fetchraw: diff --git a/src/calibre/ebooks/rtf2xml/delete_info.py b/src/calibre/ebooks/rtf2xml/delete_info.py index 3a7442addc..3ffff7d73a 100755 --- a/src/calibre/ebooks/rtf2xml/delete_info.py +++ b/src/calibre/ebooks/rtf2xml/delete_info.py @@ -128,7 +128,7 @@ class DeleteInfo: # not sure what happens here! # believe I have a '{\*} if self.__run_level > 3: - msg = _('flag problem\n') + msg = 'flag problem\n' raise self.__bug_handler, msg return True elif self.__token_info in self.__allowable : @@ -144,14 +144,14 @@ class DeleteInfo: self.__found_list_func(line) elif self.__token_info in self.__not_allowable: if not self.__ob: - self.__write_cb = False + self.__write_cb = True self.__ob = 0 self.__state = 'delete' self.__cb_count = 0 return False else: if self.__run_level > 5: - msg = _('After an asterisk, and found neither an allowable or non-allowable token\n\ + msg = ('After an asterisk, and found neither an allowable or non-allowable token\n\ token is "%s"\n') % self.__token_info raise self.__bug_handler, msg if not self.__ob: @@ -187,32 +187,31 @@ class DeleteInfo: def delete_info(self): """Main method for handling other methods. Read one line in at - a time, and determine wheter to print the line based on the state.""" - with open(self.__file, 'r') as read_obj, \ - open(self.__write_to, 'w') as self.__write_obj: - for line in read_obj: - #ob 3: - msg = _('self.__inline_list is %s\n') % self.__inline_list + msg = 'self.__inline_list is %s\n' % self.__inline_list raise self.__bug_handler, msg self.__write_obj.write('error\n') self.__groups_in_waiting[0] = 0 @@ -393,27 +393,27 @@ class Inline: the state. """ self.__initiate_values() - with open(self.__file, 'r') as read_obj, \ - open(self.__write_to, 'w') as self.__write_obj: - for line in read_obj: - token = line[0:-1] - self.__token_info = '' - if token == 'tx 1: - sys.stderr.write(_('Removing files from old pict directory...\n')) + sys.stderr.write('Removing files from old pict directory...\n') all_files = os.listdir(self.__dir_name) for the_file in all_files: the_file = os.path.join(self.__dir_name, the_file) @@ -90,7 +90,7 @@ class Pict: except OSError: pass if self.__run_level > 1: - sys.stderr.write(_('Files removed.\n')) + sys.stderr.write('Files removed.\n') def __create_pict_file(self): """Create a file for all the pict data to be written to. @@ -146,25 +146,25 @@ class Pict: def process_pict(self): self.__make_dir() - with open(self.__file) as read_obj, \ - open(self.__write_to, 'w') as write_obj: - for line in read_obj: - self.__token_info = line[:16] - if self.__token_info == 'ob 3: - msg = _('Number "%s" cannot be converted to integer\n') % num + msg = 'Number "%s" cannot be converted to integer\n' % num raise self.__bug_handler, msg type = self.__number_type_dict.get(num) if type is None: if self.__run_level > 3: - msg = _('No type for "%s" in self.__number_type_dict\n') + msg = 'No type for "%s" in self.__number_type_dict\n' raise self.__bug_handler type = 'Arabic' return 'cw<%s<%s 3: - msg = _('No entry for number "%s"') % num + msg = 'No entry for number "%s"' % num raise self.__bug_handler, msg return 'cw<%s<%sfalse<%s\n' % (token, token) else: - msg = _("boolean should have some value module process tokens\ntoken is %s\n'%s'\n") % (token, num) + msg = "boolean should have some value module process tokens\ntoken is %s\n'%s'\n" % (token, num) raise self.__bug_handler, msg def __no_sup_sub_func(self, pre, token, num): @@ -703,7 +703,7 @@ class ProcessTokens: numerator = float(re.search('[0-9.\-]+', numerator).group()) except TypeError, msg: if self.__run_level > 3: - msg = _('No number to process?\nthis indicates that the token \(\\li\) \ + msg = ('No number to process?\nthis indicates that the token \(\\li\) \ should have a number and does not\nnumerator is \ "%s"\ndenominator is "%s"\n') % (numerator, denominator) raise self.__bug_handler, msg @@ -724,12 +724,12 @@ class ProcessTokens: second = match_obj.group(2) if not second: if self.__run_level > 3: - msg = _("token is '%s' \n") % token + msg = "token is '%s' \n" % token raise self.__bug_handler, msg return first, 0 else: if self.__run_level > 3: - msg = _("token is '%s' \n") % token + msg = "token is '%s' \n" % token raise self.__bug_handler return token, 0 return first, second @@ -758,7 +758,7 @@ class ProcessTokens: pre, token, action = self.dict_token.get(token, (None, None, None)) if action: return action(pre, token, num) - + def __check_brackets(self, in_file): self.__check_brack_obj = check_brackets.CheckBrackets\ (file = in_file) @@ -769,53 +769,54 @@ class ProcessTokens: def process_tokens(self): """Main method for handling other methods. """ line_count = 0 - with open(self.__file, 'r') as read_obj, open(self.__write_to, 'wb') as write_obj: - for line in read_obj: - token = line.replace("\n","") - line_count += 1 - if line_count == 1 and token != '\\{': - msg = _('Invalid RTF: document doesn\'t start with {\n') + with open(self.__file, 'r') as read_obj: + with open(self.__write_to, 'wb') as write_obj: + for line in read_obj: + token = line.replace("\n","") + line_count += 1 + if line_count == 1 and token != '\\{': + msg = 'Invalid RTF: document doesn\'t start with {\n' + raise self.__exception_handler, msg + elif line_count == 2 and token[0:4] != '\\rtf': + msg = 'Invalid RTF: document doesn\'t start with \\rtf \n' + raise self.__exception_handler, msg + + the_index = token.find('\\ ') + if token is not None and the_index > -1: + msg = 'Invalid RTF: token "\\ " not valid.\n' raise self.__exception_handler, msg - elif line_count == 2 and token[0:4] != '\\rtf': - msg =_('Invalid RTF: document doesn\'t start with \\rtf \n') - raise self.__exception_handler, msg - - the_index = token.find('\\ ') - if token is not None and the_index > -1: - msg =_('Invalid RTF: token "\\ " not valid.\n') - raise self.__exception_handler, msg - elif token[:1] == "\\": - try: - token.decode('us-ascii') - except UnicodeError, msg: - msg = _('Invalid RTF: Tokens not ascii encoded.\n%s') % str(msg) - raise self.__exception_handler, msg - line = self.process_cw(token) - if line is not None: - write_obj.write(line) - else: - fields = re.split(self.__utf_exp, token) - for field in fields: - if not field: - continue - if field[0:1] == '&': - write_obj.write('tx +All rights reserved. + +Thanks to Carlo Zottmann for refactoring +Textile's procedural code into a class framework + +Additions and fixes Copyright (c) 2006 Alex Shiels http://thresholdstate.com/ + +""" + +__license__ = """ +L I C E N S E +============= +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name Textile nor the names of its contributors may be used to + endorse or promote products derived from this software without specific + prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +""" + +import re +import uuid +from urlparse import urlparse + +def _normalize_newlines(string): + out = re.sub(r'\r\n', '\n', string) + out = re.sub(r'\n{3,}', '\n\n', out) + out = re.sub(r'\n\s*\n', '\n\n', out) + out = re.sub(r'"$', '" ', out) + return out + +def getimagesize(url): + """ + Attempts to determine an image's width and height, and returns a string + suitable for use in an tag, or None in case of failure. + Requires that PIL is installed. + + >>> getimagesize("http://www.google.com/intl/en_ALL/images/logo.gif") + ... #doctest: +ELLIPSIS, +SKIP + 'width="..." height="..."' + + """ + + try: + import ImageFile + import urllib2 + except ImportError: + return None + + try: + p = ImageFile.Parser() + f = urllib2.urlopen(url) + while True: + s = f.read(1024) + if not s: + break + p.feed(s) + if p.image: + return 'width="%i" height="%i"' % p.image.size + except (IOError, ValueError): + return None + +class Textile(object): + hlgn = r'(?:\<(?!>)|(?|\<\>|\=|[()]+(?! ))' + vlgn = r'[\-^~]' + clas = r'(?:\([^)]+\))' + lnge = r'(?:\[[^\]]+\])' + styl = r'(?:\{[^}]+\})' + cspn = r'(?:\\\d+)' + rspn = r'(?:\/\d+)' + a = r'(?:%s|%s)*' % (hlgn, vlgn) + s = r'(?:%s|%s)*' % (cspn, rspn) + c = r'(?:%s)*' % '|'.join([clas, styl, lnge, hlgn]) + + pnct = r'[-!"#$%&()*+,/:;<=>?@\'\[\\\]\.^_`{|}~]' + # urlch = r'[\w"$\-_.+!*\'(),";/?:@=&%#{}|\\^~\[\]`]' + urlch = '[\w"$\-_.+*\'(),";\/?:@=&%#{}|\\^~\[\]`]' + + url_schemes = ('http', 'https', 'ftp', 'mailto') + + btag = ('bq', 'bc', 'notextile', 'pre', 'h[1-6]', 'fn\d+', 'p') + btag_lite = ('bq', 'bc', 'p') + + glyph_defaults = ( + ('txt_quote_single_open', '‘'), + ('txt_quote_single_close', '’'), + ('txt_quote_double_open', '“'), + ('txt_quote_double_close', '”'), + ('txt_apostrophe', '’'), + ('txt_prime', '′'), + ('txt_prime_double', '″'), + ('txt_ellipsis', '…'), + ('txt_emdash', '—'), + ('txt_endash', '–'), + ('txt_dimension', '×'), + ('txt_trademark', '™'), + ('txt_registered', '®'), + ('txt_copyright', '©'), + ) + + def __init__(self, restricted=False, lite=False, noimage=False): + """docstring for __init__""" + self.restricted = restricted + self.lite = lite + self.noimage = noimage + self.get_sizes = False + self.fn = {} + self.urlrefs = {} + self.shelf = {} + self.rel = '' + self.html_type = 'xhtml' + + def textile(self, text, rel=None, head_offset=0, html_type='xhtml'): + """ + >>> import textile + >>> textile.textile('some textile') + u'\\t

some textile

' + """ + self.html_type = html_type + + # text = unicode(text) + text = _normalize_newlines(text) + + if self.restricted: + text = self.encode_html(text, quotes=False) + + if rel: + self.rel = ' rel="%s"' % rel + + text = self.getRefs(text) + + text = self.block(text, int(head_offset)) + + text = self.retrieve(text) + + return text + + def pba(self, input, element=None): + """ + Parse block attributes. + + >>> t = Textile() + >>> t.pba(r'\3') + '' + >>> t.pba(r'\\3', element='td') + ' colspan="3"' + >>> t.pba(r'/4', element='td') + ' rowspan="4"' + >>> t.pba(r'\\3/4', element='td') + ' colspan="3" rowspan="4"' + + >>> t.vAlign('^') + 'top' + + >>> t.pba('^', element='td') + ' style="vertical-align:top;"' + + >>> t.pba('{line-height:18px}') + ' style="line-height:18px;"' + + >>> t.pba('(foo-bar)') + ' class="foo-bar"' + + >>> t.pba('(#myid)') + ' id="myid"' + + >>> t.pba('(foo-bar#myid)') + ' class="foo-bar" id="myid"' + + >>> t.pba('((((') + ' style="padding-left:4em;"' + + >>> t.pba(')))') + ' style="padding-right:3em;"' + + >>> t.pba('[fr]') + ' lang="fr"' + + """ + style = [] + aclass = '' + lang = '' + colspan = '' + rowspan = '' + id = '' + + if not input: + return '' + + matched = input + if element == 'td': + m = re.search(r'\\(\d+)', matched) + if m: + colspan = m.group(1) + + m = re.search(r'/(\d+)', matched) + if m: + rowspan = m.group(1) + + if element == 'td' or element == 'tr': + m = re.search(r'(%s)' % self.vlgn, matched) + if m: + style.append("vertical-align:%s;" % self.vAlign(m.group(1))) + + m = re.search(r'\{([^}]*)\}', matched) + if m: + style.append(m.group(1).rstrip(';') + ';') + matched = matched.replace(m.group(0), '') + + m = re.search(r'\[([^\]]+)\]', matched, re.U) + if m: + lang = m.group(1) + matched = matched.replace(m.group(0), '') + + m = re.search(r'\(([^()]+)\)', matched, re.U) + if m: + aclass = m.group(1) + matched = matched.replace(m.group(0), '') + + m = re.search(r'([(]+)', matched) + if m: + style.append("padding-left:%sem;" % len(m.group(1))) + matched = matched.replace(m.group(0), '') + + m = re.search(r'([)]+)', matched) + if m: + style.append("padding-right:%sem;" % len(m.group(1))) + matched = matched.replace(m.group(0), '') + + m = re.search(r'(%s)' % self.hlgn, matched) + if m: + style.append("text-align:%s;" % self.hAlign(m.group(1))) + + m = re.search(r'^(.*)#(.*)$', aclass) + if m: + id = m.group(2) + aclass = m.group(1) + + if self.restricted: + if lang: + return ' lang="%s"' + else: + return '' + + result = [] + if style: + result.append(' style="%s"' % "".join(style)) + if aclass: + result.append(' class="%s"' % aclass) + if lang: + result.append(' lang="%s"' % lang) + if id: + result.append(' id="%s"' % id) + if colspan: + result.append(' colspan="%s"' % colspan) + if rowspan: + result.append(' rowspan="%s"' % rowspan) + return ''.join(result) + + def hasRawText(self, text): + """ + checks whether the text has text not already enclosed by a block tag + + >>> t = Textile() + >>> t.hasRawText('

foo bar biz baz

') + False + + >>> t.hasRawText(' why yes, yes it does') + True + + """ + r = re.compile(r'<(p|blockquote|div|form|table|ul|ol|pre|h\d)[^>]*?>.*', re.S).sub('', text.strip()).strip() + r = re.compile(r'<(hr|br)[^>]*?/>').sub('', r) + return '' != r + + def table(self, text): + r""" + >>> t = Textile() + >>> t.table('|one|two|three|\n|a|b|c|') + '\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t
onetwothree
abc
\n\n' + """ + text = text + "\n\n" + pattern = re.compile(r'^(?:table(_?%(s)s%(a)s%(c)s)\. ?\n)?^(%(a)s%(c)s\.? ?\|.*\|)\n\n' % {'s':self.s, 'a':self.a, 'c':self.c}, re.S|re.M|re.U) + return pattern.sub(self.fTable, text) + + def fTable(self, match): + tatts = self.pba(match.group(1), 'table') + rows = [] + for row in [ x for x in match.group(2).split('\n') if x]: + rmtch = re.search(r'^(%s%s\. )(.*)' % (self.a, self.c), row.lstrip()) + if rmtch: + ratts = self.pba(rmtch.group(1), 'tr') + row = rmtch.group(2) + else: + ratts = '' + + cells = [] + for cell in row.split('|')[1:-1]: + ctyp = 'd' + if re.search(r'^_', cell): + ctyp = "h" + cmtch = re.search(r'^(_?%s%s%s\. )(.*)' % (self.s, self.a, self.c), cell) + if cmtch: + catts = self.pba(cmtch.group(1), 'td') + cell = cmtch.group(2) + else: + catts = '' + + cell = self.graf(self.span(cell)) + cells.append('\t\t\t%s' % (ctyp, catts, cell, ctyp)) + rows.append("\t\t\n%s\n\t\t" % (ratts, '\n'.join(cells))) + cells = [] + catts = None + return "\t\n%s\n\t\n\n" % (tatts, '\n'.join(rows)) + + def lists(self, text): + """ + >>> t = Textile() + >>> t.lists("* one\\n* two\\n* three") + '\\t
    \\n\\t\\t
  • one
  • \\n\\t\\t
  • two
  • \\n\\t\\t
  • three
  • \\n\\t
' + """ + pattern = re.compile(r'^([#*]+%s .*)$(?![^#*])' % self.c, re.U|re.M|re.S) + return pattern.sub(self.fList, text) + + def fList(self, match): + text = match.group(0).split("\n") + result = [] + lists = [] + for i, line in enumerate(text): + try: + nextline = text[i+1] + except IndexError: + nextline = '' + + m = re.search(r"^([#*]+)(%s%s) (.*)$" % (self.a, self.c), line, re.S) + if m: + tl, atts, content = m.groups() + nl = '' + nm = re.search(r'^([#*]+)\s.*', nextline) + if nm: + nl = nm.group(1) + if tl not in lists: + lists.append(tl) + atts = self.pba(atts) + line = "\t<%sl%s>\n\t\t
  • %s" % (self.lT(tl), atts, self.graf(content)) + else: + line = "\t\t
  • " + self.graf(content) + + if len(nl) <= len(tl): + line = line + "
  • " + for k in reversed(lists): + if len(k) > len(nl): + line = line + "\n\t" % self.lT(k) + if len(k) > 1: + line = line + "" + lists.remove(k) + + result.append(line) + return "\n".join(result) + + def lT(self, input): + if re.search(r'^#+', input): + return 'o' + else: + return 'u' + + def doPBr(self, in_): + return re.compile(r'<(p)([^>]*?)>(.*)()', re.S).sub(self.doBr, in_) + + def doBr(self, match): + if self.html_type == 'html': + content = re.sub(r'(.+)(?:(?)|(?))\n(?![#*\s|])', '\\1
    ', match.group(3)) + else: + content = re.sub(r'(.+)(?:(?)|(?))\n(?![#*\s|])', '\\1
    ', match.group(3)) + return '<%s%s>%s%s' % (match.group(1), match.group(2), content, match.group(4)) + + def block(self, text, head_offset = 0): + """ + >>> t = Textile() + >>> t.block('h1. foobar baby') + '\\t

    foobar baby

    ' + """ + if not self.lite: + tre = '|'.join(self.btag) + else: + tre = '|'.join(self.btag_lite) + text = text.split('\n\n') + + tag = 'p' + atts = cite = graf = ext = c1 = '' + + out = [] + + anon = False + for line in text: + pattern = r'^(%s)(%s%s)\.(\.?)(?::(\S+))? (.*)$' % (tre, self.a, self.c) + match = re.search(pattern, line, re.S) + if match: + if ext: + out.append(out.pop() + c1) + + tag, atts, ext, cite, graf = match.groups() + h_match = re.search(r'h([1-6])', tag) + if h_match: + head_level, = h_match.groups() + tag = 'h%i' % max(1, + min(int(head_level) + head_offset, + 6)) + o1, o2, content, c2, c1 = self.fBlock(tag, atts, ext, + cite, graf) + # leave off c1 if this block is extended, + # we'll close it at the start of the next block + + if ext: + line = "%s%s%s%s" % (o1, o2, content, c2) + else: + line = "%s%s%s%s%s" % (o1, o2, content, c2, c1) + + else: + anon = True + if ext or not re.search(r'^\s', line): + o1, o2, content, c2, c1 = self.fBlock(tag, atts, ext, + cite, line) + # skip $o1/$c1 because this is part of a continuing + # extended block + if tag == 'p' and not self.hasRawText(content): + line = content + else: + line = "%s%s%s" % (o2, content, c2) + else: + line = self.graf(line) + + line = self.doPBr(line) + if self.html_type == 'xhtml': + line = re.sub(r'
    ', '
    ', line) + + if ext and anon: + out.append(out.pop() + "\n" + line) + else: + out.append(line) + + if not ext: + tag = 'p' + atts = '' + cite = '' + graf = '' + + if ext: + out.append(out.pop() + c1) + return '\n\n'.join(out) + + def fBlock(self, tag, atts, ext, cite, content): + """ + >>> t = Textile() + >>> t.fBlock("bq", "", None, "", "Hello BlockQuote") + ('\\t
    \\n', '\\t\\t

    ', 'Hello BlockQuote', '

    ', '\\n\\t
    ') + + >>> t.fBlock("bq", "", None, "http://google.com", "Hello BlockQuote") + ('\\t
    \\n', '\\t\\t

    ', 'Hello BlockQuote', '

    ', '\\n\\t
    ') + + >>> t.fBlock("bc", "", None, "", 'printf "Hello, World";') # doctest: +ELLIPSIS + ('
    ', '', ..., '', '
    ') + + >>> t.fBlock("h1", "", None, "", "foobar") + ('', '\\t

    ', 'foobar', '

    ', '') + """ + atts = self.pba(atts) + o1 = o2 = c2 = c1 = '' + + m = re.search(r'fn(\d+)', tag) + if m: + tag = 'p' + if m.group(1) in self.fn: + fnid = self.fn[m.group(1)] + else: + fnid = m.group(1) + atts = atts + ' id="fn%s"' % fnid + if atts.find('class=') < 0: + atts = atts + ' class="footnote"' + content = ('%s' % m.group(1)) + content + + if tag == 'bq': + cite = self.checkRefs(cite) + if cite: + cite = ' cite="%s"' % cite + else: + cite = '' + o1 = "\t\n" % (cite, atts) + o2 = "\t\t" % atts + c2 = "

    " + c1 = "\n\t" + + elif tag == 'bc': + o1 = "" % atts + o2 = "" % atts + c2 = "" + c1 = "" + content = self.shelve(self.encode_html(content.rstrip("\n") + "\n")) + + elif tag == 'notextile': + content = self.shelve(content) + o1 = o2 = '' + c1 = c2 = '' + + elif tag == 'pre': + content = self.shelve(self.encode_html(content.rstrip("\n") + "\n")) + o1 = "" % atts + o2 = c2 = '' + c1 = '' + + else: + o2 = "\t<%s%s>" % (tag, atts) + c2 = "" % tag + + content = self.graf(content) + return o1, o2, content, c2, c1 + + def footnoteRef(self, text): + """ + >>> t = Textile() + >>> t.footnoteRef('foo[1] ') # doctest: +ELLIPSIS + 'foo1 ' + """ + return re.sub(r'\b\[([0-9]+)\](\s)?', self.footnoteID, text) + + def footnoteID(self, match): + id, t = match.groups() + if id not in self.fn: + self.fn[id] = str(uuid.uuid4()) + fnid = self.fn[id] + if not t: + t = '' + return '%s%s' % (fnid, id, t) + + def glyphs(self, text): + """ + >>> t = Textile() + + >>> t.glyphs("apostrophe's") + 'apostrophe’s' + + >>> t.glyphs("back in '88") + 'back in ’88' + + >>> t.glyphs('foo ...') + 'foo …' + + >>> t.glyphs('--') + '—' + + >>> t.glyphs('FooBar[tm]') + 'FooBar™' + + >>> t.glyphs("

    Cat's Cradle by Vonnegut

    ") + '

    Cat’s Cradle by Vonnegut

    ' + + """ + # fix: hackish + text = re.sub(r'"\Z', '\" ', text) + + glyph_search = ( + re.compile(r"(\w)\'(\w)"), # apostrophe's + re.compile(r'(\s)\'(\d+\w?)\b(?!\')'), # back in '88 + re.compile(r'(\S)\'(?=\s|'+self.pnct+'|<|$)'), # single closing + re.compile(r'\'/'), # single opening + re.compile(r'(\S)\"(?=\s|'+self.pnct+'|<|$)'), # double closing + re.compile(r'"'), # double opening + re.compile(r'\b([A-Z][A-Z0-9]{2,})\b(?:[(]([^)]*)[)])'), # 3+ uppercase acronym + re.compile(r'\b([A-Z][A-Z\'\-]+[A-Z])(?=[\s.,\)>])'), # 3+ uppercase + re.compile(r'\b(\s{0,1})?\.{3}'), # ellipsis + re.compile(r'(\s?)--(\s?)'), # em dash + re.compile(r'\s-(?:\s|$)'), # en dash + re.compile(r'(\d+)( ?)x( ?)(?=\d+)'), # dimension sign + re.compile(r'\b ?[([]TM[])]', re.I), # trademark + re.compile(r'\b ?[([]R[])]', re.I), # registered + re.compile(r'\b ?[([]C[])]', re.I), # copyright + ) + + glyph_replace = [x % dict(self.glyph_defaults) for x in ( + r'\1%(txt_apostrophe)s\2', # apostrophe's + r'\1%(txt_apostrophe)s\2', # back in '88 + r'\1%(txt_quote_single_close)s', # single closing + r'%(txt_quote_single_open)s', # single opening + r'\1%(txt_quote_double_close)s', # double closing + r'%(txt_quote_double_open)s', # double opening + r'\1', # 3+ uppercase acronym + r'\1', # 3+ uppercase + r'\1%(txt_ellipsis)s', # ellipsis + r'\1%(txt_emdash)s\2', # em dash + r' %(txt_endash)s ', # en dash + r'\1\2%(txt_dimension)s\3', # dimension sign + r'%(txt_trademark)s', # trademark + r'%(txt_registered)s', # registered + r'%(txt_copyright)s', # copyright + )] + + result = [] + for line in re.compile(r'(<.*?>)', re.U).split(text): + if not re.search(r'<.*>', line): + for s, r in zip(glyph_search, glyph_replace): + line = s.sub(r, line) + result.append(line) + return ''.join(result) + + def vAlign(self, input): + d = {'^':'top', '-':'middle', '~':'bottom'} + return d.get(input, '') + + def hAlign(self, input): + d = {'<':'left', '=':'center', '>':'right', '<>': 'justify'} + return d.get(input, '') + + def getRefs(self, text): + """ + what is this for? + """ + pattern = re.compile(r'(?:(?<=^)|(?<=\s))\[(.+)\]((?:http(?:s?):\/\/|\/)\S+)(?=\s|$)', re.U) + text = pattern.sub(self.refs, text) + return text + + def refs(self, match): + flag, url = match.groups() + self.urlrefs[flag] = url + return '' + + def checkRefs(self, url): + return self.urlrefs.get(url, url) + + def isRelURL(self, url): + """ + Identify relative urls. + + >>> t = Textile() + >>> t.isRelURL("http://www.google.com/") + False + >>> t.isRelURL("/foo") + True + + """ + (scheme, netloc) = urlparse(url)[0:2] + return not scheme and not netloc + + def relURL(self, url): + scheme = urlparse(url)[0] + if self.restricted and scheme and scheme not in self.url_schemes: + return '#' + return url + + def shelve(self, text): + id = str(uuid.uuid4()) + self.shelf[id] = text + return id + + def retrieve(self, text): + """ + >>> t = Textile() + >>> id = t.shelve("foobar") + >>> t.retrieve(id) + 'foobar' + """ + while True: + old = text + for k, v in self.shelf.items(): + text = text.replace(k, v) + if text == old: + break + return text + + def encode_html(self, text, quotes=True): + a = ( + ('&', '&'), + ('<', '<'), + ('>', '>') + ) + + if quotes: + a = a + ( + ("'", '''), + ('"', '"') + ) + + for k, v in a: + text = text.replace(k, v) + return text + + def graf(self, text): + if not self.lite: + text = self.noTextile(text) + text = self.code(text) + + text = self.links(text) + + if not self.noimage: + text = self.image(text) + + if not self.lite: + text = self.lists(text) + text = self.table(text) + + text = self.span(text) + text = self.footnoteRef(text) + text = self.glyphs(text) + + return text.rstrip('\n') + + def links(self, text): + """ + >>> t = Textile() + >>> t.links('fooobar "Google":http://google.com/foobar/ and hello world "flickr":http://flickr.com/photos/jsamsa/ ') # doctest: +ELLIPSIS + 'fooobar ... and hello world ...' + """ + + punct = '!"#$%&\'*+,-./:;=?@\\^_`|~' + + pattern = r''' + (?P
        [\s\[{(]|[%s]   )?
    +            "                          # start
    +            (?P   %s       )
    +            (?P   [^"]+?   )
    +            \s?
    +            (?:   \(([^)]+?)\)(?=")   )?     # $title
    +            ":
    +            (?P    (?:ftp|https?)? (?: :// )? [-A-Za-z0-9+&@#/?=~_()|!:,.;]*[-A-Za-z0-9+&@#/=~_()|]   )
    +            (?P   [^\w\/;]*?   )
    +            (?=<|\s|$)
    +        ''' % (re.escape(punct), self.c)
    +
    +        text = re.compile(pattern, re.X).sub(self.fLink, text)
    +
    +        return text
    +
    +    def fLink(self, match):
    +        pre, atts, text, title, url, post = match.groups()
    +
    +        if pre == None:
    +            pre = ''
    +
    +        # assume ) at the end of the url is not actually part of the url
    +        # unless the url also contains a (
    +        if url.endswith(')') and not url.find('(') > -1:
    +            post = url[-1] + post
    +            url = url[:-1]
    +
    +        url = self.checkRefs(url)
    +
    +        atts = self.pba(atts)
    +        if title:
    +            atts = atts +  ' title="%s"' % self.encode_html(title)
    +
    +        if not self.noimage:
    +            text = self.image(text)
    +
    +        text = self.span(text)
    +        text = self.glyphs(text)
    +
    +        url = self.relURL(url)
    +        out = '%s' % (self.encode_html(url), atts, self.rel, text)
    +        out = self.shelve(out)
    +        return ''.join([pre, out, post])
    +
    +    def span(self, text):
    +        """
    +        >>> t = Textile()
    +        >>> t.span(r"hello %(bob)span *strong* and **bold**% goodbye")
    +        'hello span strong and bold goodbye'
    +        """
    +        qtags = (r'\*\*', r'\*', r'\?\?', r'\-', r'__', r'_', r'%', r'\+', r'~', r'\^')
    +        pnct = ".,\"'?!;:"
    +
    +        for qtag in qtags:
    +            pattern = re.compile(r"""
    +                (?:^|(?<=[\s>%(pnct)s])|([\]}]))
    +                (%(qtag)s)(?!%(qtag)s)
    +                (%(c)s)
    +                (?::(\S+))?
    +                ([^\s%(qtag)s]+|\S[^%(qtag)s\n]*[^\s%(qtag)s\n])
    +                ([%(pnct)s]*)
    +                %(qtag)s
    +                (?:$|([\]}])|(?=%(selfpnct)s{1,2}|\s))
    +            """ % {'qtag':qtag, 'c':self.c, 'pnct':pnct,
    +                   'selfpnct':self.pnct}, re.X)
    +            text = pattern.sub(self.fSpan, text)
    +        return text
    +
    +
    +    def fSpan(self, match):
    +        _, tag, atts, cite, content, end, _ = match.groups()
    +
    +        qtags = {
    +            '*': 'strong',
    +            '**': 'b',
    +            '??': 'cite',
    +            '_' : 'em',
    +            '__': 'i',
    +            '-' : 'del',
    +            '%' : 'span',
    +            '+' : 'ins',
    +            '~' : 'sub',
    +            '^' : 'sup'
    +        }
    +        tag = qtags[tag]
    +        atts = self.pba(atts)
    +        if cite:
    +            atts = atts + 'cite="%s"' % cite
    +
    +        content = self.span(content)
    +
    +        out = "<%s%s>%s%s" % (tag, atts, content, end, tag)
    +        return out
    +
    +    def image(self, text):
    +        """
    +        >>> t = Textile()
    +        >>> t.image('!/imgs/myphoto.jpg!:http://jsamsa.com')
    +        ''
    +        """
    +        pattern = re.compile(r"""
    +            (?:[\[{])?          # pre
    +            \!                 # opening !
    +            (%s)               # optional style,class atts
    +            (?:\. )?           # optional dot-space
    +            ([^\s(!]+)         # presume this is the src
    +            \s?                # optional space
    +            (?:\(([^\)]+)\))?  # optional title
    +            \!                 # closing
    +            (?::(\S+))?        # optional href
    +            (?:[\]}]|(?=\s|$)) # lookahead: space or end of string
    +        """ % self.c, re.U|re.X)
    +        return pattern.sub(self.fImage, text)
    +
    +    def fImage(self, match):
    +        # (None, '', '/imgs/myphoto.jpg', None, None)
    +        atts, url, title, href = match.groups()
    +        atts  = self.pba(atts)
    +
    +        if title:
    +            atts = atts + ' title="%s" alt="%s"' % (title, title)
    +        else:
    +            atts = atts + ' alt=""'
    +
    +        if not self.isRelURL(url) and self.get_sizes:
    +            size = getimagesize(url)
    +            if (size):
    +                atts += " %s" % size
    +
    +        if href:
    +            href = self.checkRefs(href)
    +
    +        url = self.checkRefs(url)
    +        url = self.relURL(url)
    +
    +        out = []
    +        if href:
    +            out.append('' % href)
    +        if self.html_type == 'html':
    +            out.append('' % (url, atts))
    +        else:
    +            out.append('' % (url, atts))
    +        if href:
    +            out.append('')
    +
    +        return ''.join(out)
    +
    +    def code(self, text):
    +        text = self.doSpecial(text, '', '', self.fCode)
    +        text = self.doSpecial(text, '@', '@', self.fCode)
    +        text = self.doSpecial(text, '
    ', '
    ', self.fPre) + return text + + def fCode(self, match): + before, text, after = match.groups() + if after == None: + after = '' + # text needs to be escaped + if not self.restricted: + text = self.encode_html(text) + return ''.join([before, self.shelve('%s' % text), after]) + + def fPre(self, match): + before, text, after = match.groups() + if after == None: + after = '' + # text needs to be escapedd + if not self.restricted: + text = self.encode_html(text) + return ''.join([before, '
    ', self.shelve(text), '
    ', after]) + + def doSpecial(self, text, start, end, method=None): + if method == None: + method = self.fSpecial + pattern = re.compile(r'(^|\s|[\[({>])%s(.*?)%s(\s|$|[\])}])?' % (re.escape(start), re.escape(end)), re.M|re.S) + return pattern.sub(method, text) + + def fSpecial(self, match): + """ + special blocks like notextile or code + """ + before, text, after = match.groups() + if after == None: + after = '' + return ''.join([before, self.shelve(self.encode_html(text)), after]) + + def noTextile(self, text): + text = self.doSpecial(text, '', '', self.fTextile) + return self.doSpecial(text, '==', '==', self.fTextile) + + def fTextile(self, match): + before, notextile, after = match.groups() + if after == None: + after = '' + return ''.join([before, self.shelve(notextile), after]) + + +def textile(text, head_offset=0, html_type='xhtml', encoding=None, output=None): + """ + this function takes additional parameters: + head_offset - offset to apply to heading levels (default: 0) + html_type - 'xhtml' or 'html' style tags (default: 'xhtml') + """ + return Textile().textile(text, head_offset=head_offset, + html_type=html_type) + +def textile_restricted(text, lite=True, noimage=True, html_type='xhtml'): + """ + Restricted version of Textile designed for weblog comments and other + untrusted input. + + Raw HTML is escaped. + Style attributes are disabled. + rel='nofollow' is added to external links. + + When lite=True is set (the default): + Block tags are restricted to p, bq, and bc. + Lists and tables are disabled. + + When noimage=True is set (the default): + Image tags are disabled. + + """ + return Textile(restricted=True, lite=lite, + noimage=noimage).textile(text, rel='nofollow', + html_type=html_type) + diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/txt/input.py index aaff8b55c0..0b0bd6d570 100644 --- a/src/calibre/ebooks/txt/input.py +++ b/src/calibre/ebooks/txt/input.py @@ -12,7 +12,7 @@ from calibre.ebooks.chardet import detect from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \ separate_paragraphs_single_line, separate_paragraphs_print_formatted, \ preserve_spaces, detect_paragraph_type, detect_formatting_type, \ - convert_heuristic, normalize_line_endings + convert_heuristic, normalize_line_endings, convert_textile from calibre import _ent_pat, xml_entity_to_unicode class TXTInput(InputFormatPlugin): @@ -41,6 +41,7 @@ class TXTInput(InputFormatPlugin): 'paragraph and no styling is applied.\n' '* heuristic: Process using heuristics to determine formatting such ' 'as chapter headings and italic text.\n' + '* textile: Processing using textile formatting.\n' '* markdown: Processing using markdown formatting. ' 'To learn more about markdown see')+' http://daringfireball.net/projects/markdown/'), OptionRecommendation(name='preserve_spaces', recommended_value=False, @@ -91,6 +92,9 @@ class TXTInput(InputFormatPlugin): except RuntimeError: raise ValueError('This txt file has malformed markup, it cannot be' ' converted by calibre. See http://daringfireball.net/projects/markdown/syntax') + elif options.formatting_type == 'textile': + log.debug('Running text though textile conversion...') + html = convert_textile(txt) else: # Determine the paragraph type of the document. if options.paragraph_type == 'auto': diff --git a/src/calibre/ebooks/txt/processor.py b/src/calibre/ebooks/txt/processor.py index 39f73c33ef..1cbf848706 100644 --- a/src/calibre/ebooks/txt/processor.py +++ b/src/calibre/ebooks/txt/processor.py @@ -1,4 +1,8 @@ # -*- coding: utf-8 -*- +__license__ = 'GPL v3' +__copyright__ = '2009, John Schember ' +__docformat__ = 'restructuredtext en' + ''' Read content from txt file. @@ -7,15 +11,11 @@ Read content from txt file. import os, re from calibre import prepare_string_for_xml, isbytestring -from calibre.ebooks.markdown import markdown from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.utils.cleantext import clean_ascii_chars from calibre.ebooks.txt.heuristicprocessor import TXTHeuristicProcessor from calibre.ebooks.conversion.preprocess import DocAnalysis - -__license__ = 'GPL v3' -__copyright__ = '2009, John Schember ' -__docformat__ = 'restructuredtext en' +from calibre.utils.cleantext import clean_ascii_chars HTML_TEMPLATE = u'%s\n%s\n' @@ -34,9 +34,9 @@ def clean_txt(txt): txt = re.sub('(?<=.)\s+$', '', txt) # Remove excessive line breaks. txt = re.sub('\n{3,}', '\n\n', txt) - #remove ASCII invalid chars + #remove ASCII invalid chars : 0 to 8 and 11-14 to 24 txt = clean_ascii_chars(txt) - + return txt def split_txt(txt, epub_split_size_kb=0): @@ -73,12 +73,18 @@ def convert_heuristic(txt, title='', epub_split_size_kb=0): return tp.convert(txt, title, epub_split_size_kb) def convert_markdown(txt, title='', disable_toc=False): + from calibre.ebooks.markdown import markdown md = markdown.Markdown( extensions=['footnotes', 'tables', 'toc'], extension_configs={"toc": {"disable_toc": disable_toc}}, safe_mode=False) return HTML_TEMPLATE % (title, md.convert(txt)) +def convert_textile(txt, title=''): + from calibre.ebooks.textile import textile + html = textile(txt, encoding='utf-8') + return HTML_TEMPLATE % (title, html) + def normalize_line_endings(txt): txt = txt.replace('\r\n', '\n') txt = txt.replace('\r', '\n') @@ -114,66 +120,75 @@ def split_string_separator(txt, size) : def detect_paragraph_type(txt): ''' Tries to determine the formatting of the document. - + block: Paragraphs are separated by a blank line. single: Each line is a paragraph. print: Each paragraph starts with a 2+ spaces or a tab and ends when a new paragraph is reached. unformatted: most lines have hard line breaks, few/no blank lines or indents - + returns block, single, print, unformatted ''' txt = txt.replace('\r\n', '\n') txt = txt.replace('\r', '\n') txt_line_count = len(re.findall('(?mu)^\s*.+$', txt)) - + # Check for hard line breaks - true if 55% of the doc breaks in the same region docanalysis = DocAnalysis('txt', txt) hardbreaks = docanalysis.line_histogram(.55) - + if hardbreaks: # Determine print percentage tab_line_count = len(re.findall('(?mu)^(\t|\s{2,}).+$', txt)) print_percent = tab_line_count / float(txt_line_count) - + # Determine block percentage empty_line_count = len(re.findall('(?mu)^\s*$', txt)) block_percent = empty_line_count / float(txt_line_count) - + # Compare the two types - the type with the larger number of instances wins # in cases where only one or the other represents the vast majority of the document neither wins if print_percent >= block_percent: if .15 <= print_percent <= .75: return 'print' elif .15 <= block_percent <= .75: - return 'block' + return 'block' - # Assume unformatted text with hardbreaks if nothing else matches + # Assume unformatted text with hardbreaks if nothing else matches return 'unformatted' - + # return single if hardbreaks is false return 'single' def detect_formatting_type(txt): + markdown_count = 0 + textile_count = 0 + # Check for markdown # Headings - if len(re.findall('(?mu)^#+', txt)) >= 5: - return 'markdown' - if len(re.findall('(?mu)^=+$', txt)) >= 5: - return 'markdown' - if len(re.findall('(?mu)^-+$', txt)) >= 5: - return 'markdown' + markdown_count += len(re.findall('(?mu)^#+', txt)) + markdown_count += len(re.findall('(?mu)^=+$', txt)) + markdown_count += len(re.findall('(?mu)^-+$', txt)) # Images - if len(re.findall('(?u)!\[.*?\]\(.+?\)', txt)) >= 5: - return 'markdown' + markdown_count += len(re.findall('(?u)!\[.*?\]\(.+?\)', txt)) # Links - if len(re.findall('(?u)(^|(?P
    [^!]))\[.*?\]\([^)]+\)', txt)) >= 5:
    -        return 'markdown'
    -    # Escaped characters
    -    md_escapted_characters = ['\\', '`', '*', '_', '{', '}', '[', ']', '(', ')', '#', '+', '-', '.', '!']
    -    for c in md_escapted_characters:
    -        if txt.count('\\'+c) > 10:
    +    markdown_count += len(re.findall('(?u)(^|(?P
    [^!]))\[.*?\]\([^)]+\)', txt))
    +
    +    # Check for textile
    +    # Headings
    +    textile_count += len(re.findall(r'(?mu)^h[1-6]\.', txt))
    +    # Block quote.
    +    textile_count += len(re.findall(r'(?mu)^bq\.', txt))
    +    # Images
    +    textile_count += len(re.findall(r'\![^\s]+(:[^\s]+)*', txt))
    +    # Links
    +    textile_count += len(re.findall(r'"(\(.+?\))*[^\(]+?(\(.+?\))*":[^\s]+', txt))
    +
    +    if markdown_count > 5 or textile_count > 5:
    +        if markdown_count > textile_count:
                 return 'markdown'
    -    
    +        else:
    +            return 'textile'
    +
         return 'heuristic'
    diff --git a/src/calibre/gui2/catalog/catalog_bibtex.py b/src/calibre/gui2/catalog/catalog_bibtex.py
    index f66b63bd58..a24f6b0f95 100644
    --- a/src/calibre/gui2/catalog/catalog_bibtex.py
    +++ b/src/calibre/gui2/catalog/catalog_bibtex.py
    @@ -28,17 +28,17 @@ class PluginWidget(QWidget, Ui_Form):
         def __init__(self, parent=None):
             QWidget.__init__(self, parent)
             self.setupUi(self)
    +
    +    def initialize(self, name, db): #not working properly to update
             from calibre.library.catalog import FIELDS
    -        
    +
             self.all_fields = [x for x in FIELDS if x != 'all']
             #add custom columns
    -        db = db_()
             self.all_fields.extend([x for x in sorted(db.custom_field_keys())])
             #populate
             for x in self.all_fields:
                 QListWidgetItem(x, self.db_fields)
     
    -    def initialize(self, name, db): #not working properly to update
             self.name = name
             fields = gprefs.get(name+'_db_fields', self.all_fields)
             # Restore the activated db_fields from last use
    diff --git a/src/calibre/gui2/dialogs/drm_error.py b/src/calibre/gui2/dialogs/drm_error.py
    new file mode 100644
    index 0000000000..5fbba47165
    --- /dev/null
    +++ b/src/calibre/gui2/dialogs/drm_error.py
    @@ -0,0 +1,21 @@
    +#!/usr/bin/env python
    +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
    +
    +__license__   = 'GPL v3'
    +__copyright__ = '2011, Kovid Goyal '
    +__docformat__ = 'restructuredtext en'
    +
    +
    +from PyQt4.Qt import QDialog
    +from calibre.gui2.dialogs.drm_error_ui import Ui_Dialog
    +
    +class DRMErrorMessage(QDialog, Ui_Dialog):
    +
    +    def __init__(self, parent=None, title=None):
    +        QDialog.__init__(self, parent)
    +        self.setupUi(self)
    +        if title is not None:
    +            t = unicode(self.msg.text())
    +            self.msg.setText('

    %s

    %s'%(title, t)) + self.resize(self.sizeHint()) + diff --git a/src/calibre/gui2/dialogs/drm_error.ui b/src/calibre/gui2/dialogs/drm_error.ui new file mode 100644 index 0000000000..842807c9bc --- /dev/null +++ b/src/calibre/gui2/dialogs/drm_error.ui @@ -0,0 +1,102 @@ + + + Dialog + + + + 0 + 0 + 417 + 235 + + + + This book is DRMed + + + + + + + 0 + 0 + + + + + 132 + 16777215 + + + + + + + :/images/document-encrypt.png + + + + + + + <p>This book is locked by <b>DRM</b>. To learn more about DRM and why you cannot read or convert this book in calibre, +<a href="http://bugs.calibre-ebook.com/wiki/DRM">click here</a>. + + + true + + + true + + + + + + + Qt::Horizontal + + + QDialogButtonBox::Close + + + + + + + + + + + buttonBox + accepted() + Dialog + accept() + + + 248 + 254 + + + 157 + 274 + + + + + buttonBox + rejected() + Dialog + reject() + + + 316 + 260 + + + 286 + 274 + + + + + diff --git a/src/calibre/gui2/dialogs/metadata_bulk.py b/src/calibre/gui2/dialogs/metadata_bulk.py index e1ee4327f3..5ea8f00148 100644 --- a/src/calibre/gui2/dialogs/metadata_bulk.py +++ b/src/calibre/gui2/dialogs/metadata_bulk.py @@ -15,7 +15,7 @@ from calibre.ebooks.metadata import string_to_authors, authors_to_string from calibre.ebooks.metadata.book.base import composite_formatter from calibre.ebooks.metadata.meta import get_metadata from calibre.gui2.custom_column_widgets import populate_metadata_page -from calibre.gui2 import error_dialog +from calibre.gui2 import error_dialog, ResizableDialog from calibre.gui2.progress_indicator import ProgressIndicator from calibre.utils.config import dynamic from calibre.utils.titlecase import titlecase @@ -49,7 +49,7 @@ def get_cover_data(path): -class MyBlockingBusy(QDialog): +class MyBlockingBusy(QDialog): # {{{ do_one_signal = pyqtSignal() @@ -241,8 +241,9 @@ class MyBlockingBusy(QDialog): self.current_index += 1 self.do_one_signal.emit() + # }}} -class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog): +class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog): s_r_functions = { '' : lambda x: x, _('Lower Case') : lambda x: icu_lower(x), @@ -261,9 +262,8 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog): ] def __init__(self, window, rows, model, tab): - QDialog.__init__(self, window) + ResizableDialog.__init__(self, window) Ui_MetadataBulkDialog.__init__(self) - self.setupUi(self) self.model = model self.db = model.db self.ids = [self.db.id(r) for r in rows] diff --git a/src/calibre/gui2/dialogs/metadata_bulk.ui b/src/calibre/gui2/dialogs/metadata_bulk.ui index 41858b099b..9240cd1af8 100644 --- a/src/calibre/gui2/dialogs/metadata_bulk.ui +++ b/src/calibre/gui2/dialogs/metadata_bulk.ui @@ -6,8 +6,8 @@ 0 0 - 752 - 633 + 850 + 650 @@ -17,8 +17,8 @@ :/images/edit_input.png:/images/edit_input.png - - + + @@ -28,818 +28,836 @@ - - - - 6 + + + + QFrame::NoFrame - + 0 - - - + + true + + + + + 0 + 0 + 842 + 589 + + + + 0 - - - &Basic metadata - - - - - - &Author(s): - - - Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter - - - authors - - - - - - - A&utomatically set author sort - - - - - - - Author s&ort: - - - Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter - - - author_sort - - - - - - - Specify how the author(s) of this book should be sorted. For example Charles Dickens should be sorted as Dickens, Charles. - - - - - - - &Rating: - - - Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter - - - rating - - - - - - - Rating of this book. 0-5 stars - - - Rating of this book. 0-5 stars - - - QAbstractSpinBox::PlusMinus - - - No change - - - stars - - - -1 - - - 5 - - - -1 - - - - - - - &Publisher: - - - Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter - - - publisher - - - - - - - true - - - - - - - Add ta&gs: - - - Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter - - - tags - - - - - - - Tags categorize the book. This is particularly useful while searching. <br><br>They can be any words or phrases, separated by commas. - - - - - - - Open Tag Editor - - - Open Tag Editor - - - - :/images/chapters.png:/images/chapters.png - - - - - - - &Remove tags: - - - remove_tags - - - - - - - Comma separated list of tags to remove from the books. - - - - - - - Check this box to remove all tags from the books. - - - Remove all - - - - - - - &Series: - - - Qt::PlainText - - - Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter - - - series - - - - - - - + + + + 0 + + + + &Basic metadata + + + + + + &Author(s): + + + Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter + + + authors + + + + + + + A&utomatically set author sort + + + + + + + Author s&ort: + + + Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter + + + author_sort + + + + + - List of known series. You can add new series. + Specify how the author(s) of this book should be sorted. For example Charles Dickens should be sorted as Dickens, Charles. + + + + + + + &Rating: + + + Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter + + + rating + + + + + + + Rating of this book. 0-5 stars - List of known series. You can add new series. + Rating of this book. 0-5 stars + + QAbstractSpinBox::PlusMinus + + + No change + + + stars + + + -1 + + + 5 + + + -1 + + + + + + + &Publisher: + + + Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter + + + publisher + + + + + true - - QComboBox::InsertAlphabetically + + + + + + Add ta&gs: - - QComboBox::AdjustToContents + + Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter + + + tags - - + + - If checked, the series will be cleared + Tags categorize the book. This is particularly useful while searching. <br><br>They can be any words or phrases, separated by commas. + + + + + + + Open Tag Editor - Clear series + Open Tag Editor + + + + :/images/chapters.png:/images/chapters.png - - - - Qt::Horizontal + + + + &Remove tags: - - - 20 - 0 - + + remove_tags - + - - - - - - + + - If not checked, the series number for the books will be set to 1. + Comma separated list of tags to remove from the books. + + + + + + + Check this box to remove all tags from the books. + + + Remove all + + + + + + + &Series: + + + Qt::PlainText + + + Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter + + + series + + + + + + + + + List of known series. You can add new series. + + + List of known series. You can add new series. + + + true + + + QComboBox::InsertAlphabetically + + + QComboBox::AdjustToContents + + + + + + + If checked, the series will be cleared + + + Clear series + + + + + + + Qt::Horizontal + + + + 20 + 0 + + + + + + + + + + + + If not checked, the series number for the books will be set to 1. If checked, selected books will be automatically numbered, in the order you selected them. So if you selected Book A and then Book B, Book A will have series number 1 and Book B series number 2. - - - Automatically number books in this series - - - - - - - false - - - Series will normally be renumbered from the highest number in the database + + + Automatically number books in this series + + + + + + + false + + + Series will normally be renumbered from the highest number in the database for that series. Checking this box will tell calibre to start numbering from the value in the box + + + Force numbers to start with + + + + + + + false + + + 1 + + + 990000 + + + 1 + + + + + + + Qt::Horizontal + + + + 20 + 10 + + + + + + + + + + Remove &format: + + + remove_format + + + + + + + + + + true + + + + + + + &Swap title and author + + + + + + + Force the title to be in title case. If both this and swap authors are checked, +title and author are swapped before the title case is set - Force numbers to start with + Change title to title case - - - - false - - - 1 - - - 990000 - - - 1 - - - - - - - Qt::Horizontal - - - - 20 - 10 - - - - - - - - - - Remove &format: - - - remove_format - - - - - - - - - - true - - - - - - - &Swap title and author - - - - - - - Force the title to be in title case. If both this and swap authors are checked, -title and author are swapped before the title case is set - - - Change title to title case - - - - - - - Remove stored conversion settings for the selected books. + + + + Remove stored conversion settings for the selected books. Future conversion of these books will use the default settings. - - - Remove &stored conversion settings for the selected books - - - - - - - Qt::Vertical - - - - 20 - 40 - - - - - - - - Change &cover - - - - - - &Generate default cover - - - - - - - &Remove cover - - - - - - - Set from &ebook file(s) - - - - - - - - - - - &Custom metadata - - - - - &Search and replace - - - - QLayout::SetMinimumSize - - - - - true - - - true - - - - - - - - - - - - - - Search &field: - - - search_field - - - - - - - The name of the field that you want to search - - - - - - - + - Search &mode: - - - search_mode + Remove &stored conversion settings for the selected books - - - - Choose whether to use basic text matching or advanced regular expression matching - - - - - + + - Qt::Horizontal + Qt::Vertical 20 - 10 + 40 - - - - - - Te&mplate: - - - s_r_template - - - - - - - - 100 - 0 - - - - Enter a template to be used as the source for the search/replace - - - - - - - &Search for: - - - search_for - - - - - - - - 100 - 0 - - - - Enter the what you are looking for, either plain text or a regular expression, depending on the mode - - - - - - - Check this box if the search string must match exactly upper and lower case. Uncheck it if case is to be ignored - - - Cas&e sensitive - - - true - - - - - - - &Replace with: - - - replace_with - - - - - - - The replacement text. The matched search text will be replaced with this string - - - - - - - - - &Apply function after replace: - - - replace_func + + + + Change &cover + + + + + &Generate default cover + + + + + + + &Remove cover + + + + + + + Set from &ebook file(s) + + + + - - - - Specify how the text is to be processed after matching and replacement. In character mode, the entire -field is processed. In regular expression mode, only the matched text is processed - - - - - - - Qt::Horizontal - - - - 20 - 10 - - - - - - - - - &Destination field: + + + + &Custom metadata + + + + + &Search and replace + + + + QLayout::SetMinimumSize - - destination_field - - - - - - - The field that the text will be put into after all replacements. -If blank, the source field is used if the field is modifiable - - - - - - - + + + + true + + + true + + + + + - M&ode: + + + + + + + + Search &field: - replace_mode + search_field - - + + - Specify how the text should be copied into the destination. + The name of the field that you want to search - - + + + + + + Search &mode: + + + search_mode + + + + + + + Choose whether to use basic text matching or advanced regular expression matching + + + + + + + Qt::Horizontal + + + + 20 + 10 + + + + + + + + + + Te&mplate: + + + s_r_template + + + + + + + + 100 + 0 + + - Specifies whether result items should be split into multiple values or -left as single values. This option has the most effect when the source field is -not multiple and the destination field is multiple + Enter a template to be used as the source for the search/replace + + + + + + + &Search for: + + + search_for + + + + + + + + 100 + 0 + + + + Enter the what you are looking for, either plain text or a regular expression, depending on the mode + + + + + + + Check this box if the search string must match exactly upper and lower case. Uncheck it if case is to be ignored - Split &result + Cas&e sensitive true - - - - Qt::Horizontal - - - - 20 - 10 - - - - - - - - - - - - Qt::Horizontal - - - - 20 - 0 - - - - - - + + - For multiple-valued fields, sho&w + &Replace with: - results_count + replace_with - - - - true - - - 1 - - - 999 - - - 999 - - - - - - - values starting a&t - - - starting_from - - - - - - - true - - - 1 - - - 999 - - - 1 - - - - - - - with values separated b&y - - - multiple_separator - - - - - + + - Used when displaying test results to separate values in multiple-valued fields + The replacement text. The matched search text will be replaced with this string - - - - - - QFrame::NoFrame - - - true - - - - - 0 - 0 - 726 - 334 - - - - - + + + + - Test text + &Apply function after replace: + + + replace_func - - - - Test result + + + + Specify how the text is to be processed after matching and replacement. In character mode, the entire +field is processed. In regular expression mode, only the matched text is processed - - - - Your test: - - - - - - - - - - - + + - Qt::Vertical + Qt::Horizontal 20 - 5 + 10 - - - - - - - - + + + + + &Destination field: + + + destination_field + + + + + + + The field that the text will be put into after all replacements. +If blank, the source field is used if the field is modifiable + + + + + + + + + M&ode: + + + replace_mode + + + + + + + Specify how the text should be copied into the destination. + + + + + + + Specifies whether result items should be split into multiple values or +left as single values. This option has the most effect when the source field is +not multiple and the destination field is multiple + + + Split &result + + + true + + + + + + + Qt::Horizontal + + + + 20 + 10 + + + + + + + + + + + + Qt::Horizontal + + + + 20 + 0 + + + + + + + + For multiple-valued fields, sho&w + + + results_count + + + + + + + true + + + 1 + + + 999 + + + 999 + + + + + + + values starting a&t + + + starting_from + + + + + + + true + + + 1 + + + 999 + + + 1 + + + + + + + with values separated b&y + + + multiple_separator + + + + + + + Used when displaying test results to separate values in multiple-valued fields + + + + + + + + + QFrame::NoFrame + + + true + + + + + 0 + 0 + 197 + 60 + + + + + + + Test text + + + + + + + Test result + + + + + + + Your test: + + + + + + + + + + + + + Qt::Vertical + + + + 20 + 5 + + + + + + + + + + + + + + + - + Qt::Horizontal @@ -893,7 +911,6 @@ not multiple and the destination field is multiple swap_title_and_author change_title_to_title_case button_box - central_widget search_field search_mode s_r_template diff --git a/src/calibre/gui2/dialogs/user_profiles.py b/src/calibre/gui2/dialogs/user_profiles.py index 71c9ebcd04..04c41f0c5e 100644 --- a/src/calibre/gui2/dialogs/user_profiles.py +++ b/src/calibre/gui2/dialogs/user_profiles.py @@ -4,7 +4,7 @@ __copyright__ = '2008, Kovid Goyal ' import time, os from PyQt4.Qt import SIGNAL, QUrl, QAbstractListModel, Qt, \ - QVariant, QInputDialog + QVariant from calibre.web.feeds.recipes import compile_recipe from calibre.web.feeds.news import AutomaticNewsRecipe @@ -256,24 +256,61 @@ class %(classname)s(%(base_class)s): def add_builtin_recipe(self): from calibre.web.feeds.recipes.collection import \ - get_builtin_recipe_by_title, get_builtin_recipe_titles - items = sorted(get_builtin_recipe_titles(), key=sort_key) + get_builtin_recipe_collection, get_builtin_recipe_by_id + from PyQt4.Qt import QDialog, QVBoxLayout, QListWidgetItem, \ + QListWidget, QDialogButtonBox, QSize + d = QDialog(self) + d.l = QVBoxLayout() + d.setLayout(d.l) + d.list = QListWidget(d) + d.list.doubleClicked.connect(lambda x: d.accept()) + d.l.addWidget(d.list) + d.bb = QDialogButtonBox(QDialogButtonBox.Ok|QDialogButtonBox.Cancel, + Qt.Horizontal, d) + d.bb.accepted.connect(d.accept) + d.bb.rejected.connect(d.reject) + d.l.addWidget(d.bb) + d.setWindowTitle(_('Choose builtin recipe')) + items = [] + for r in get_builtin_recipe_collection(): + id_ = r.get('id', '') + title = r.get('title', '') + lang = r.get('language', '') + if id_ and title: + items.append((title + ' [%s]'%lang, id_)) - title, ok = QInputDialog.getItem(self, _('Pick recipe'), _('Pick the recipe to customize'), - items, 0, False) - if ok: - title = unicode(title) - profile = get_builtin_recipe_by_title(title) - if self._model.has_title(title): - if question_dialog(self, _('Replace recipe?'), - _('A custom recipe named %s already exists. Do you want to ' - 'replace it?')%title): - self._model.replace_by_title(title, profile) - else: - return + items.sort(key=lambda x:sort_key(x[0])) + for title, id_ in items: + item = QListWidgetItem(title) + item.setData(Qt.UserRole, id_) + d.list.addItem(item) + + d.resize(QSize(450, 400)) + ret = d.exec_() + d.list.doubleClicked.disconnect() + if ret != d.Accepted: + return + + items = list(d.list.selectedItems()) + if not items: + return + item = items[-1] + id_ = unicode(item.data(Qt.UserRole).toString()) + title = unicode(item.data(Qt.DisplayRole).toString()).rpartition(' [')[0] + profile = get_builtin_recipe_by_id(id_) + if profile is None: + raise Exception('Something weird happened') + + if self._model.has_title(title): + if question_dialog(self, _('Replace recipe?'), + _('A custom recipe named %s already exists. Do you want to ' + 'replace it?')%title): + self._model.replace_by_title(title, profile) else: - self.model.add(title, profile) + return + else: + self.model.add(title, profile) self.clear() diff --git a/src/calibre/gui2/layout.py b/src/calibre/gui2/layout.py index aaaf1b0267..2edf19d0c4 100644 --- a/src/calibre/gui2/layout.py +++ b/src/calibre/gui2/layout.py @@ -8,9 +8,9 @@ __docformat__ = 'restructuredtext en' from functools import partial from PyQt4.Qt import QIcon, Qt, QWidget, QToolBar, QSize, \ - pyqtSignal, QToolButton, QPushButton, \ - QObject, QVBoxLayout, QSizePolicy, QLabel, QHBoxLayout, QActionGroup, \ - QMenu + pyqtSignal, QToolButton, QMenu, QCheckBox, \ + QObject, QVBoxLayout, QSizePolicy, QLabel, QHBoxLayout, QActionGroup + from calibre.constants import __appname__ from calibre.gui2.search_box import SearchBox2, SavedSearchBox @@ -178,7 +178,9 @@ class SearchBar(QWidget): # {{{ x.setToolTip(_("

    Search the list of books by title, author, publisher, tags, comments, etc.

    Words separated by spaces are ANDed")) l.addWidget(x) - self.search_button = QPushButton(_('&Go!')) + self.search_button = QToolButton() + self.search_button.setToolButtonStyle(Qt.ToolButtonTextOnly) + self.search_button.setText(_('&Go!')) l.addWidget(self.search_button) self.search_button.setSizePolicy(QSizePolicy.Minimum, QSizePolicy.Minimum) @@ -192,6 +194,12 @@ class SearchBar(QWidget): # {{{ l.addWidget(x) x.setToolTip(_("Reset Quick Search")) + x = parent.search_highlight_only = QCheckBox() + x.setText(_('&Highlight')) + x.setToolTip(_('Highlight matched books in the book list, instead ' + 'of restricting the book list to the matches.')) + l.addWidget(x) + x = parent.saved_search = SavedSearchBox(self) x.setMaximumSize(QSize(150, 16777215)) x.setMinimumContentsLength(15) diff --git a/src/calibre/gui2/library/models.py b/src/calibre/gui2/library/models.py index 49cb1ce182..eea452c238 100644 --- a/src/calibre/gui2/library/models.py +++ b/src/calibre/gui2/library/models.py @@ -10,7 +10,7 @@ from contextlib import closing from operator import attrgetter from PyQt4.Qt import QAbstractTableModel, Qt, pyqtSignal, QIcon, QImage, \ - QModelIndex, QVariant, QDate + QModelIndex, QVariant, QDate, QColor from calibre.gui2 import NONE, config, UNDEFINED_QDATE from calibre.utils.pyparsing import ParseException @@ -93,6 +93,9 @@ class BooksModel(QAbstractTableModel): # {{{ self.bool_no_icon = QIcon(I('list_remove.png')) self.bool_blank_icon = QIcon(I('blank.png')) self.device_connected = False + self.rows_matching = set() + self.lowest_row_matching = None + self.highlight_only = False self.read_config() def change_alignment(self, colname, alignment): @@ -229,9 +232,27 @@ class BooksModel(QAbstractTableModel): # {{{ self.endInsertRows() self.count_changed() + def set_highlight_only(self, toWhat): + self.highlight_only = toWhat + if self.last_search: + self.research() + def search(self, text, reset=True): try: - self.db.search(text) + if self.highlight_only: + self.db.search('') + if not text: + self.rows_matching = set() + self.lowest_row_matching = None + else: + self.rows_matching = self.db.search(text, return_matches=True) + if self.rows_matching: + self.lowest_row_matching = self.db.row(self.rows_matching[0]) + self.rows_matching = set(self.rows_matching) + else: + self.rows_matching = set() + self.lowest_row_matching = None + self.db.search(text) except ParseException as e: self.searched.emit(e.msg) return @@ -337,8 +358,9 @@ class BooksModel(QAbstractTableModel): # {{{ name, val = mi.format_field(key) if mi.metadata_for_field(key)['datatype'] == 'comments': name += ':html' - if val: + if val and name not in data: data[name] = val + return data @@ -651,6 +673,9 @@ class BooksModel(QAbstractTableModel): # {{{ return NONE if role in (Qt.DisplayRole, Qt.EditRole): return self.column_to_dc_map[col](index.row()) + elif role == Qt.BackgroundColorRole: + if self.id(index) in self.rows_matching: + return QColor('lightgreen') elif role == Qt.DecorationRole: if self.column_to_dc_decorator_map[col] is not None: return self.column_to_dc_decorator_map[index.column()](index.row()) diff --git a/src/calibre/gui2/library/views.py b/src/calibre/gui2/library/views.py index e1e9cf4456..ea2e03fdad 100644 --- a/src/calibre/gui2/library/views.py +++ b/src/calibre/gui2/library/views.py @@ -680,8 +680,14 @@ class BooksView(QTableView): # {{{ def set_editable(self, editable, supports_backloading): self._model.set_editable(editable) + def search_proxy(self, txt): + self._model.search(txt) + if self._model.lowest_row_matching is not None: + self.select_rows([self._model.lowest_row_matching], using_ids=False) + self.setFocus(Qt.OtherFocusReason) + def connect_to_search_box(self, sb, search_done): - sb.search.connect(self._model.search) + sb.search.connect(self.search_proxy) self._search_done = search_done self._model.searched.connect(self.search_done) diff --git a/src/calibre/gui2/preferences/toolbar.py b/src/calibre/gui2/preferences/toolbar.py index c13d956aea..26cdea19d3 100644 --- a/src/calibre/gui2/preferences/toolbar.py +++ b/src/calibre/gui2/preferences/toolbar.py @@ -37,7 +37,10 @@ class BaseModel(QAbstractListModel): dont_remove_from=set(['toolbar-device'])) if name is None: return FakeAction('--- '+_('Separator')+' ---', None) - return gui.iactions[name] + try: + return gui.iactions[name] + except: + return None def rowCount(self, parent): return len(self._data) @@ -124,7 +127,8 @@ class CurrentModel(BaseModel): BaseModel.__init__(self) self.gprefs_name = 'action-layout-'+key current = gprefs[self.gprefs_name] - self._data = [self.name_to_action(x, gui) for x in current] + self._data = [self.name_to_action(x, gui) for x in current] + self._data = [x for x in self._data if x is not None] self.key = key self.gui = gui diff --git a/src/calibre/gui2/search_box.py b/src/calibre/gui2/search_box.py index 9f74abfc86..e4073a01c9 100644 --- a/src/calibre/gui2/search_box.py +++ b/src/calibre/gui2/search_box.py @@ -16,6 +16,7 @@ from calibre.gui2 import config from calibre.gui2.dialogs.confirm_delete import confirm from calibre.gui2.dialogs.saved_search_editor import SavedSearchEditor from calibre.gui2.dialogs.search import SearchDialog +from calibre.utils.config import dynamic from calibre.utils.search_query_parser import saved_searches from calibre.utils.icu import sort_key @@ -375,6 +376,9 @@ class SearchBoxMixin(object): # {{{ unicode(self.search.toolTip()))) self.advanced_search_button.setStatusTip(self.advanced_search_button.toolTip()) self.clear_button.setStatusTip(self.clear_button.toolTip()) + self.search_highlight_only.stateChanged.connect(self.highlight_only_changed) + self.search_highlight_only.setChecked( + dynamic.get('search_highlight_only', False)) def focus_search_box(self, *args): self.search.setFocus(Qt.OtherFocusReason) @@ -401,6 +405,11 @@ class SearchBoxMixin(object): # {{{ def focus_to_library(self): self.current_view().setFocus(Qt.OtherFocusReason) + def highlight_only_changed(self, toWhat): + dynamic.set('search_highlight_only', toWhat) + self.current_view().model().set_highlight_only(toWhat) + self.focus_to_library() + # }}} class SavedSearchBoxMixin(object): # {{{ diff --git a/src/calibre/gui2/ui.py b/src/calibre/gui2/ui.py index a6eeabd57f..01d3180778 100644 --- a/src/calibre/gui2/ui.py +++ b/src/calibre/gui2/ui.py @@ -468,12 +468,8 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{ try: if 'calibre.ebooks.DRMError' in job.details: if not minz: - d = error_dialog(self, _('Conversion Error'), - _('

    Could not convert: %s

    It is a ' - 'DRMed book. You must first remove the ' - 'DRM using third party tools.')%\ - (job.description.split(':')[-1], - 'http://bugs.calibre-ebook.com/wiki/DRM')) + from calibre.gui2.dialogs.drm_error import DRMErrorMessage + d = DRMErrorMessage(self, job.description.split(':')[-1]) d.setModal(False) d.show() self._modeless_dialogs.append(d) diff --git a/src/calibre/gui2/viewer/main.py b/src/calibre/gui2/viewer/main.py index 25f69b1558..c5001659a0 100644 --- a/src/calibre/gui2/viewer/main.py +++ b/src/calibre/gui2/viewer/main.py @@ -26,6 +26,7 @@ from calibre.gui2.search_box import SearchBox2 from calibre.ebooks.metadata import MetaInformation from calibre.customize.ui import available_input_formats from calibre.gui2.viewer.dictionary import Lookup +from calibre import as_unicode class TOCItem(QStandardItem): @@ -626,13 +627,12 @@ class EbookViewer(MainWindow, Ui_EbookViewer): QApplication.processEvents() if worker.exception is not None: if isinstance(worker.exception, DRMError): - error_dialog(self, _('DRM Error'), - _('

    This book is protected by DRM') - %'http://wiki.mobileread.com/wiki/DRM').exec_() + from calibre.gui2.dialogs.drm_error import DRMErrorMessage + DRMErrorMessage(self).exec_() else: r = getattr(worker.exception, 'reason', worker.exception) error_dialog(self, _('Could not open ebook'), - unicode(r), det_msg=worker.traceback, show=True) + as_unicode(r), det_msg=worker.traceback, show=True) self.close_progress_indicator() else: self.metadata.show_opf(self.iterator.opf, os.path.splitext(pathtoebook)[1][1:]) diff --git a/src/calibre/library/caches.py b/src/calibre/library/caches.py index 6aef45dbbd..4168360d3a 100644 --- a/src/calibre/library/caches.py +++ b/src/calibre/library/caches.py @@ -411,7 +411,8 @@ class ResultCache(SearchQueryParser): # {{{ if isinstance(location, list): if allow_recursion: for loc in location: - matches |= self.get_matches(loc, query, allow_recursion=False) + matches |= self.get_matches(loc, query, candidates, + allow_recursion=False) return matches raise ParseException(query, len(query), 'Recursive query group detected', self) @@ -419,11 +420,11 @@ class ResultCache(SearchQueryParser): # {{{ fm = self.field_metadata[location] # take care of dates special case if fm['datatype'] == 'datetime': - return self.get_dates_matches(location, query.lower()) + return self.get_dates_matches(location, query.lower(), candidates) # take care of numbers special case if fm['datatype'] in ('rating', 'int', 'float'): - return self.get_numeric_matches(location, query.lower()) + return self.get_numeric_matches(location, query.lower(), candidates) # take care of the 'count' operator for is_multiples if fm['is_multiple'] and \ @@ -431,7 +432,8 @@ class ResultCache(SearchQueryParser): # {{{ query[1:1] in '=<>!': vf = lambda item, loc=fm['rec_index'], ms=fm['is_multiple']:\ len(item[loc].split(ms)) if item[loc] is not None else 0 - return self.get_numeric_matches(location, query[1:], val_func=vf) + return self.get_numeric_matches(location, query[1:], + candidates, val_func=vf) # everything else, or 'all' matches matchkind = CONTAINS_MATCH diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py index 349800c8ba..5cda9baa8c 100644 --- a/src/calibre/library/catalog.py +++ b/src/calibre/library/catalog.py @@ -1524,19 +1524,32 @@ class EPUB_MOBI(CatalogPlugin): this_title['formats'] = formats # Add user notes to be displayed in header - # Special case handling for datetime fields + # Special case handling for datetime fields and lists if self.opts.header_note_source_field: field_md = self.__db.metadata_for_field(self.opts.header_note_source_field) notes = self.__db.get_field(record['id'], self.opts.header_note_source_field, index_is_id=True) - if notes and field_md['datatype'] == 'datetime': - # Reformat date fields to match UI presentation: dd MMM YYYY - notes = format_date(notes,'dd MMM yyyy') - if notes: + if field_md['datatype'] == 'text': + if isinstance(notes,list): + notes = ' · '.join(notes) + elif field_md['datatype'] == 'datetime': + notes = format_date(notes,'dd MMM yyyy') + elif field_md['datatype'] == 'composite': + m = re.match(r'\[(.+)\]$', notes) + if m is not None: + # Sniff for special pseudo-list string "[]" + bracketed_content = m.group(1) + if ',' in bracketed_content: + # Recast the comma-separated items as a list + items = bracketed_content.split(',') + items = [i.strip() for i in items] + notes = ' · '.join(items) + else: + notes = bracketed_content this_title['notes'] = {'source':field_md['name'], - 'content':notes} + 'content':notes} titles.append(this_title) diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index aa491aff28..5f66297322 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -341,10 +341,6 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): self.has_id = self.data.has_id self.count = self.data.count - # Count times get_metadata is called, and how many times in the cache - self.gm_count = 0 - self.gm_missed = 0 - for prop in ('author_sort', 'authors', 'comment', 'comments', 'isbn', 'publisher', 'rating', 'series', 'series_index', 'tags', 'title', 'timestamp', 'uuid', 'pubdate', 'ondevice'): @@ -710,6 +706,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): formats = row[fm['formats']] if not formats: formats = None + else: + formats = formats.split(',') mi.formats = formats tags = row[fm['tags']] if tags: diff --git a/src/calibre/trac/bzr_commit_plugin.py b/src/calibre/trac/bzr_commit_plugin.py index df6bf699d1..6c36115cae 100644 --- a/src/calibre/trac/bzr_commit_plugin.py +++ b/src/calibre/trac/bzr_commit_plugin.py @@ -110,6 +110,7 @@ class cmd_commit(_cmd_commit): suffix = 'The fix will be in the next release.' action = action+'ed' msg = '%s in branch %s. %s'%(action, nick, suffix) + msg = msg.replace('Fixesed', 'Fixed') server = xmlrpclib.ServerProxy(url) server.ticket.update(int(bug), msg, {'status':'closed', 'resolution':'fixed'}, diff --git a/src/calibre/utils/cleantext.py b/src/calibre/utils/cleantext.py index a27f74529e..938960df93 100644 --- a/src/calibre/utils/cleantext.py +++ b/src/calibre/utils/cleantext.py @@ -4,7 +4,6 @@ __copyright__ = '2010, sengian ' __docformat__ = 'restructuredtext en' import re, htmlentitydefs -from functools import partial _ascii_pat = None @@ -50,4 +49,4 @@ def unescape(text, rm=False, rchar=u''): if rm: return rchar #replace by char return text # leave as is - return re.sub("&#?\w+;", fixup, text) \ No newline at end of file + return re.sub("&#?\w+;", fixup, text) diff --git a/src/calibre/utils/formatter.py b/src/calibre/utils/formatter.py index 40760bf91b..0b5f1d1f52 100644 --- a/src/calibre/utils/formatter.py +++ b/src/calibre/utils/formatter.py @@ -18,6 +18,24 @@ class _Parser(object): LEX_NUM = 4 LEX_EOF = 5 + def _python(self, func): + locals = {} + exec func in locals + if 'evaluate' not in locals: + self.error('no evaluate function in python') + try: + result = locals['evaluate'](self.parent.kwargs) + if isinstance(result, (float, int)): + result = unicode(result) + elif isinstance(result, list): + result = ','.join(result) + elif isinstance(result, str): + result = unicode(result) + return result + except Exception as e: + self.error('python function threw exception: ' + e.msg) + + def _strcmp(self, x, y, lt, eq, gt): v = strcmp(x, y) if v < 0: @@ -79,6 +97,7 @@ class _Parser(object): 'field' : (1, lambda s, x: s.parent.get_value(x, [], s.parent.kwargs)), 'multiply' : (2, partial(_math, op='*')), 'print' : (-1, _print), + 'python' : (1, _python), 'strcat' : (-1, _concat), 'strcmp' : (5, _strcmp), 'substr' : (3, lambda s, x, y, z: x[int(y): len(x) if int(z) == 0 else int(z)]), @@ -362,7 +381,7 @@ class TemplateFormatter(string.Formatter): (r'\'.*?((?wand, data, dlen); + + if (!res) + return magick_set_exception(self->wand); + + Py_RETURN_NONE; +} + +// }}} + // Image.open {{{ static PyObject * magick_Image_read(magick_Image *self, PyObject *args, PyObject *kwargs) { @@ -993,6 +1013,10 @@ static PyMethodDef magick_Image_methods[] = { {"destroy", (PyCFunction)magick_Image_destroy, METH_VARARGS, "Destroy the underlying ImageMagick Wand. WARNING: After using this method, all methods on this object will raise an exception."}, + {"identify", (PyCFunction)magick_Image_identify, METH_VARARGS, + "Identify an image from a byte buffer (string)" + }, + {"load", (PyCFunction)magick_Image_load, METH_VARARGS, "Load an image from a byte buffer (string)" }, diff --git a/src/calibre/utils/wmf/__init__.py b/src/calibre/utils/wmf/__init__.py index 68dfb8d2b5..cb7736e06a 100644 --- a/src/calibre/utils/wmf/__init__.py +++ b/src/calibre/utils/wmf/__init__.py @@ -5,5 +5,52 @@ __license__ = 'GPL v3' __copyright__ = '2011, Kovid Goyal ' __docformat__ = 'restructuredtext en' +import glob + +from calibre.constants import plugins, iswindows, filesystem_encoding +from calibre.ptempfile import TemporaryDirectory +from calibre import CurrentDir +from calibre.utils.magick import Image, PixelWand + +class Unavailable(Exception): + pass + +class NoRaster(Exception): + pass + +def extract_raster_image(wmf_data): + try: + wmf, wmf_err = plugins['wmf'] + except KeyError: + raise Unavailable('libwmf not available on this platform') + if wmf_err: + raise Unavailable(wmf_err) + + if iswindows: + import sys, os + appdir = sys.app_dir + if isinstance(appdir, unicode): + appdir = appdir.encode(filesystem_encoding) + fdir = os.path.join(appdir, 'wmffonts') + wmf.set_font_dir(fdir) + + data = '' + + with TemporaryDirectory('wmf2png') as tdir: + with CurrentDir(tdir): + wmf.render(wmf_data) + + images = list(sorted(glob.glob('*.png'))) + if not images: + raise NoRaster('No raster images in WMF') + data = open(images[0], 'rb').read() + + im = Image() + im.load(data) + pw = PixelWand() + pw.color = '#ffffff' + im.rotate(pw, 180) + + return im.export('png') diff --git a/src/calibre/utils/wmf/wmf.c b/src/calibre/utils/wmf/wmf.c index 1f8e8a27f3..74d3ca813f 100644 --- a/src/calibre/utils/wmf/wmf.c +++ b/src/calibre/utils/wmf/wmf.c @@ -4,6 +4,7 @@ #include #include +//#include typedef struct { char *data; @@ -13,7 +14,7 @@ typedef struct { //This code is taken mostly from the Abiword wmf plugin - +// Buffer read {{{ // returns unsigned char cast to int, or EOF static int wmf_WMF_read(void * context) { char c; @@ -22,11 +23,11 @@ static int wmf_WMF_read(void * context) { if (info->pos == info->len) return EOF; - c = info->data[pos]; + c = info->data[info->pos]; info->pos++; - return (int)c; + return (int)((unsigned char)c); } // returns (-1) on error, else 0 @@ -44,8 +45,17 @@ static long wmf_WMF_tell(void * context) { return (long) info->pos; } +// }}} +char _png_name_buf[100]; +char *wmf_png_name(void *ctxt) { + int *num = (int*)ctxt; + *num = *num + 1; + snprintf(_png_name_buf, 90, "%04d.png", *num); + return _png_name_buf; +} + #define CLEANUP if(API) { if (stream) wmf_free(API, stream); wmf_api_destroy(API); }; static PyObject * @@ -66,9 +76,9 @@ wmf_render(PyObject *self, PyObject *args) { unsigned int max_width = 1600; unsigned int max_height = 1200; - unsigned long max_flags = 0; static const char* Default_Description = "wmf2svg"; + int fname_counter = 0; wmf_error_t err; @@ -125,6 +135,8 @@ wmf_render(PyObject *self, PyObject *args) { ddata->Description = (char *)Default_Description; ddata->bbox = bbox; + ddata->image.context = (void *)&fname_counter; + ddata->image.name = wmf_png_name; wmf_display_size(API, &disp_width, &disp_height, 96, 96); @@ -156,9 +168,9 @@ wmf_render(PyObject *self, PyObject *args) { ddata->height = (unsigned int) ceil ((double) wmf_height); } - ddata->flags |= WMF_SVG_INLINE_IMAGES; - - ddata->flags |= WMF_GD_OUTPUT_MEMORY | WMF_GD_OWN_BUFFER; + // Needs GD + //ddata->flags |= WMF_SVG_INLINE_IMAGES; + //ddata->flags |= WMF_GD_OUTPUT_MEMORY | WMF_GD_OWN_BUFFER; err = wmf_play(API, 0, &(bbox)); @@ -178,11 +190,32 @@ wmf_render(PyObject *self, PyObject *args) { return ans; } +#ifdef _WIN32 +void set_libwmf_fontdir(const char *); + +static PyObject * +wmf_setfontdir(PyObject *self, PyObject *args) { + char *path; + if (!PyArg_ParseTuple(args, "s", &path)) + return NULL; + set_libwmf_fontdir(path); + + Py_RETURN_NONE; +} +#endif + + + static PyMethodDef wmf_methods[] = { {"render", wmf_render, METH_VARARGS, - "render(path) -> Render wmf as svg." + "render(data) -> Render wmf as svg." }, +#ifdef _WIN32 + {"set_font_dir", wmf_setfontdir, METH_VARARGS, + "set_font_dir(path) -> Set the path to the fonts dir on windows, must be called at least once before using render()" + }, +#endif {NULL} /* Sentinel */ }; diff --git a/src/calibre/utils/zipfile.py b/src/calibre/utils/zipfile.py index ff290abd25..c230b9dfa7 100644 --- a/src/calibre/utils/zipfile.py +++ b/src/calibre/utils/zipfile.py @@ -982,9 +982,12 @@ class ZipFile: zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH]) if fname != zinfo.orig_filename: - raise BadZipfile, \ - 'File name in directory "%s" and header "%s" differ.' % ( - zinfo.orig_filename, fname) + print ('WARNING: Header (%r) and directory (%r) filenames do not' + ' match inside ZipFile')%(fname, zinfo.orig_filename) + print 'Using directory filename %r'%zinfo.orig_filename + #raise BadZipfile, \ + # 'File name in directory "%r" and header "%r" differ.' % ( + # zinfo.orig_filename, fname) # check for encrypted flag & handle password is_encrypted = zinfo.flag_bits & 0x1 diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index 7bd5301dfb..ee5b11c5f6 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -700,10 +700,17 @@ class BasicNewsRecipe(Recipe): for attr in self.remove_attributes: for x in soup.findAll(attrs={attr:True}): del x[attr] - for base in list(soup.findAll(['base', 'iframe'])): + for base in list(soup.findAll(['base', 'iframe', 'canvas', 'embed', + 'command', 'datalist', 'video', 'audio'])): base.extract() ans = self.postprocess_html(soup, first_fetch) + + # Nuke HTML5 tags + for x in ans.findAll(['article', 'aside', 'header', 'footer', 'nav', + 'figcaption', 'figure', 'section']): + x.name = 'div' + if job_info: url, f, a, feed_len = job_info try: diff --git a/src/calibre/web/feeds/recipes/collection.py b/src/calibre/web/feeds/recipes/collection.py index a513cf3880..5dd360213b 100644 --- a/src/calibre/web/feeds/recipes/collection.py +++ b/src/calibre/web/feeds/recipes/collection.py @@ -108,7 +108,6 @@ def download_builtin_recipe(urn): br = browser() return br.open_novisit('http://status.calibre-ebook.com/recipe/'+urn).read() - def get_builtin_recipe_by_title(title, log=None, download_recipe=False): for x in get_builtin_recipe_collection(): if x.get('title') == title: @@ -127,6 +126,24 @@ def get_builtin_recipe_by_title(title, log=None, download_recipe=False): 'Failed to download recipe, using builtin version') return P('recipes/%s.recipe'%urn, data=True) +def get_builtin_recipe_by_id(id_, log=None, download_recipe=False): + for x in get_builtin_recipe_collection(): + if x.get('id') == id_: + urn = x.get('id')[8:] + if download_recipe: + try: + if log is not None: + log('Trying to get latest version of recipe:', urn) + return download_builtin_recipe(urn) + except: + if log is None: + import traceback + traceback.print_exc() + else: + log.exception( + 'Failed to download recipe, using builtin version') + return P('recipes/%s.recipe'%urn, data=True) + class SchedulerConfig(object): def __init__(self):