Merge + remove translation in rtf2xml + modify debug dir integration

This commit is contained in:
Sengian 2011-01-13 08:32:03 +01:00
commit 8da8eca1d3
59 changed files with 2768 additions and 1203 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.0 KiB

View File

@ -1,59 +1,79 @@
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*-
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>' __author__ = 'Gerardo Diez'
__copyright__ = 'Gerardo Diez<gerardo.diez.garcia@gmail.com>'
description = 'Main daily newspaper from Spain - v1.00 (05, Enero 2011)'
__docformat__ = 'restructuredtext en'
''' '''
www.expansion.com expansion.es
''' '''
from calibre.web.feeds.recipes import BasicNewsRecipe
class Publico(BasicNewsRecipe):
title =u'Expansion.com'
__author__ ='Gerardo Diez'
publisher =u'Unidad Editorial Información Económica, S.L.'
category ='finances, catalunya'
oldest_article =1
max_articles_per_feed =100
simultaneous_downloads =10
cover_url =u'http://estaticos01.expansion.com/iconos/v2.x/v2.0/cabeceras/logo_expansion.png'
timefmt ='[%A, %d %B, %Y]'
encoding ='latin'
language ='es'
remove_javascript =True
no_stylesheets =True
keep_only_tags =dict(name='div', attrs={'class':['noticia primer_elemento']})
remove_tags =[
dict(name='div', attrs={'class':['compartir', 'metadata_desarrollo_noticia', 'relacionadas', 'mas_info','publicidad publicidad_textlink', 'ampliarfoto']}),
dict(name='ul', attrs={'class':['bolos_desarrollo_noticia']}),
dict(name='span', attrs={'class':['comentarios']}),
dict(name='p', attrs={'class':['cintillo_comentarios', 'cintillo_comentarios formulario']}),
dict(name='div', attrs={'id':['comentarios_lectores_listado']})
]
feeds =[
(u'Portada', u'http://estaticos.expansion.com/rss/portada.xml'),
(u'Portada: Bolsas', u'http://estaticos.expansion.com/rss/mercados.xml'),
(u'Divisas', u'http://estaticos.expansion.com/rss/mercadosdivisas.xml'),
(u'Euribor', u'http://estaticos.expansion.com/rss/mercadoseuribor.xml'),
(u'Materias Primas', u'http://estaticos.expansion.com/rss/mercadosmateriasprimas.xml'),
(u'Renta Fija', u'http://estaticos.expansion.com/rss/mercadosrentafija.xml'),
from calibre.web.feeds.news import BasicNewsRecipe (u'Portada: Mi Dinero', u'http://estaticos.expansion.com/rss/midinero.xml'),
from calibre.ebooks.BeautifulSoup import Tag (u'Hipotecas', u'http://estaticos.expansion.com/rss/midinerohipotecas.xml'),
(u'Créditos', u'http://estaticos.expansion.com/rss/midinerocreditos.xml'),
(u'Pensiones', u'http://estaticos.expansion.com/rss/midineropensiones.xml'),
(u'Fondos de Inversión', u'http://estaticos.expansion.com/rss/midinerofondos.xml'),
(u'Motor', u'http://estaticos.expansion.com/rss/midineromotor.xml'),
class Expansion(BasicNewsRecipe): (u'Portada: Empresas', u'http://estaticos.expansion.com/rss/empresas.xml'),
title = 'Diario Expansion' (u'Banca', u'http://estaticos.expansion.com/rss/empresasbanca.xml'),
__author__ = 'Darko Miletic' (u'TMT', u'http://estaticos.expansion.com/rss/empresastmt.xml'),
description = 'Lider de informacion de mercados, economica y politica' (u'Energía', u'http://estaticos.expansion.com/rss/empresasenergia.xml'),
publisher = 'expansion.com' (u'Inmobiliario y Construcción', u'http://estaticos.expansion.com/rss/empresasinmobiliario.xml'),
category = 'news, politics, Spain' (u'Transporte y Turismo', u'http://estaticos.expansion.com/rss/empresastransporte.xml'),
oldest_article = 2 (u'Automoción e Industria', u'http://estaticos.expansion.com/rss/empresasauto-industria.xml'),
max_articles_per_feed = 100 (u'Distribución', u'http://estaticos.expansion.com/rss/empresasdistribucion.xml'),
no_stylesheets = True (u'Deporte y Negocio', u' http://estaticos.expansion.com/rss/empresasdeporte.xml'),
use_embedded_content = False (u'Mi Negocio', u'http://estaticos.expansion.com/rss/empresasminegocio.xml'),
delay = 1 (u'Interiores', u'http://estaticos.expansion.com/rss/empresasinteriores.xml'),
encoding = 'iso-8859-15' (u'Digitech', u'http://estaticos.expansion.com/rss/empresasdigitech.xml'),
language = 'es'
direction = 'ltr' (u'Portada: Economía y Política', u'http://estaticos.expansion.com/rss/economiapolitica.xml'),
(u'Política', u'http://estaticos.expansion.com/rss/economia.xml'),
(u'Portada: Sociedad', u'http://estaticos.expansion.com/rss/entorno.xml'),
html2lrf_options = [ (u'Portada: Opinión', u'http://estaticos.expansion.com/rss/opinion.xml'),
'--comment' , description (u'Llaves y editoriales', u'http://estaticos.expansion.com/rss/opinioneditorialyllaves.xml'),
, '--category' , category (u'Tribunas', u'http://estaticos.expansion.com/rss/opiniontribunas.xml'),
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' (u'Portada: Jurídico', u'http://estaticos.expansion.com/rss/juridico.xml'),
(u'Entrevistas', u'http://estaticos.expansion.com/rss/juridicoentrevistas.xml'),
(u'Opinión', u'http://estaticos.expansion.com/rss/juridicoopinion.xml'),
(u'Sentencias', u'http://estaticos.expansion.com/rss/juridicosentencias.xml'),
feeds = [ (u'Mujer', u'http://estaticos.expansion.com/rss/mujer-empresa.xml'),
(u'Ultimas noticias', u'http://rss.expansion.com/rss/descarga.htm?data2=178') (u'Catalu&ntilde;a', u'http://estaticos.expansion.com/rss/catalunya.xml'),
,(u'Temas del dia' , u'http://rss.expansion.com/rss/descarga.htm?data2=178') (u'Función pública', u'http://estaticos.expansion.com/rss/funcion-publica.xml')
] ]
keep_only_tags = [dict(name='div', attrs={'id':'principal'})]
remove_tags = [
dict(name=['object','link','script'])
,dict(name='div', attrs={'class':['utilidades','tit_relacionadas']})
]
remove_tags_after = [dict(name='div', attrs={'class':'tit_relacionadas'})]
def preprocess_html(self, soup):
soup.html['dir' ] = self.direction
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
soup.head.insert(0,mcharset)
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -8,12 +8,13 @@ __docformat__ = 'restructuredtext en'
globeandmail.com globeandmail.com
''' '''
import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1287083651(BasicNewsRecipe): class AdvancedUserRecipe1287083651(BasicNewsRecipe):
title = u'Globe & Mail' title = u'Globe & Mail'
__license__ = 'GPL v3' __author__ = 'Kovid Goyal'
__author__ = 'Szing'
oldest_article = 2 oldest_article = 2
no_stylesheets = True no_stylesheets = True
max_articles_per_feed = 100 max_articles_per_feed = 100
@ -38,24 +39,19 @@ class AdvancedUserRecipe1287083651(BasicNewsRecipe):
(u'Sports', u'http://www.theglobeandmail.com/auto/?service=rss') (u'Sports', u'http://www.theglobeandmail.com/auto/?service=rss')
] ]
keep_only_tags = [ preprocess_regexps = [
dict(name='h1'), (re.compile(r'<head.*?</head>', re.DOTALL), lambda m: '<head></head>'),
dict(name='h2', attrs={'id':'articletitle'}), (re.compile(r'<script.*?</script>', re.DOTALL), lambda m: ''),
dict(name='p', attrs={'class':['leadText', 'meta', 'leadImage', 'redtext byline', 'bodyText']}), ]
dict(name='div', attrs={'class':['news','articlemeta','articlecopy']}),
dict(name='id', attrs={'class':'article'}),
dict(name='table', attrs={'class':'todays-market'}),
dict(name='header', attrs={'id':'leadheader'})
]
remove_tags_before = dict(name='h1')
remove_tags = [ remove_tags = [
dict(name='div', attrs={'id':['tabInside', 'ShareArticles', 'topStories']}) dict(name='div', attrs={'id':['ShareArticles', 'topStories']}),
] dict(href=lambda x: x and 'tracking=' in x),
{'class':['articleTools', 'pagination', 'Ads', 'topad',
#this has to be here or the text in the article appears twice. 'breadcrumbs', 'footerNav', 'footerUtil', 'downloadlinks']}]
remove_tags_after = [dict(id='article')]
#Use the mobile version rather than the web version #Use the mobile version rather than the web version
def print_version(self, url): def print_version(self, url):
return url + '&service=mobile' return url.rpartition('?')[0] + '?service=mobile'

View File

@ -1,10 +1,9 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
''' '''
msnbc.msn.com msnbc.msn.com
''' '''
import re
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
class MsNBC(BasicNewsRecipe): class MsNBC(BasicNewsRecipe):
@ -19,7 +18,16 @@ class MsNBC(BasicNewsRecipe):
publisher = 'msnbc.com' publisher = 'msnbc.com'
category = 'news, USA, world' category = 'news, USA, world'
language = 'en' language = 'en'
extra_css = ' body{ font-family: sans-serif } .head{font-family: serif; font-size: xx-large; font-weight: bold; color: #CC0000} .abstract{font-weight: bold} .source{font-size: small} .updateTime{font-size: small} ' extra_css = """
body{ font-family: Georgia,Times,serif }
.hide{display: none}
.caption{font-family: Arial,sans-serif; font-size: x-small}
.entry-summary{font-family: Arial,sans-serif}
.copyright{font-size: 0.95em; font-style: italic}
.source-org{font-size: small; font-family: Arial,sans-serif}
img{display: block; margin-bottom: 0.5em}
span.byline{display: none}
"""
conversion_options = { conversion_options = {
'comments' : description 'comments' : description
@ -28,14 +36,20 @@ class MsNBC(BasicNewsRecipe):
,'publisher': publisher ,'publisher': publisher
} }
preprocess_regexps = [ remove_tags_before = dict(name='h1', attrs={'id':'headline'})
(re.compile(r'</style></head>', re.DOTALL|re.IGNORECASE),lambda match: '</style>') remove_tags_after = dict(name='span', attrs={'class':['copyright','Linear copyright']})
,(re.compile(r'<div class="head">', re.DOTALL|re.IGNORECASE),lambda match: '</head><body><div class="head">'), keep_only_tags=[
] dict(attrs={'id':['headline','deck','byline','source','intelliTXT']})
,dict(attrs={'class':['gl_headline','articleText','drawer-content Linear','v-center3','byline','textBodyBlack']})
]
remove_attributes=['property','lang','rel','xmlns:fb','xmlns:v','xmlns:dc','xmlns:dcmitype','xmlns:og','xmlns:media','xmlns:vcard','typeof','itemscope','itemtype','itemprop','about','type','size','width','height','onreadystatechange','data','border','hspace','vspace']
remove_tags = [
dict(name=['iframe','object','link','embed','meta','table'])
,dict(name='span', attrs={'class':['copyright','Linear copyright']})
,dict(name='div', attrs={'class':'social'})
]
remove_tags_before = dict(name='div', attrs={'class':'head'})
remove_tags_after = dict(name='div', attrs={'class':'copyright'})
remove_tags = [dict(name=['iframe','object','link','script','form'])]
feeds = [ feeds = [
(u'US News' , u'http://rss.msnbc.msn.com/id/3032524/device/rss/rss.xml' ) (u'US News' , u'http://rss.msnbc.msn.com/id/3032524/device/rss/rss.xml' )
@ -48,11 +62,26 @@ class MsNBC(BasicNewsRecipe):
,(u'Tech & Science', u'http://rss.msnbc.msn.com/id/3032117/device/rss/rss.xml' ) ,(u'Tech & Science', u'http://rss.msnbc.msn.com/id/3032117/device/rss/rss.xml' )
] ]
def print_version(self, url):
return url + 'print/1/displaymode/1098/'
def preprocess_html(self, soup): def preprocess_html(self, soup):
for item in soup.head.findAll('div'): for item in soup.body.findAll('html'):
item.extract() item.name='div'
for item in soup.body.findAll('div'):
if item.has_key('id') and item['id'].startswith('vine-'):
item.extract()
if item.has_key('class') and ( item['class'].startswith('ad') or item['class'].startswith('vine')):
item.extract()
for item in soup.body.findAll('img'):
if not item.has_key('alt'):
item['alt'] = 'image'
for item in soup.body.findAll('ol'):
if item.has_key('class') and item['class'].startswith('grid'):
item.extract()
for item in soup.body.findAll('span'):
if ( item.has_key('id') and item['id'].startswith('byLine') and item.string is None) or ( item.has_key('class') and item['class'].startswith('inline') ):
item.extract()
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup return soup

View File

@ -35,7 +35,6 @@ class TechnologyReview(BasicNewsRecipe):
def get_article_url(self, article): def get_article_url(self, article):
return article.get('guid', article.get('id', None)) return article.get('guid', article.get('id', None))
def print_version(self, url): def print_version(self, url):
baseurl='http://www.technologyreview.com/printer_friendly_article.aspx?id=' baseurl='http://www.technologyreview.com/printer_friendly_article.aspx?id='
split1 = string.split(url,"/") split1 = string.split(url,"/")
@ -43,3 +42,25 @@ class TechnologyReview(BasicNewsRecipe):
split2= string.split(xxx,"/") split2= string.split(xxx,"/")
s = baseurl + split2[0] s = baseurl + split2[0]
return s return s
def postprocess_html(self,soup, True):
#remove picture
headerhtml = soup.find(True, {'class':'header'})
headerhtml.replaceWith("")
#remove close button
closehtml = soup.find(True, {'class':'close'})
closehtml.replaceWith("")
#remove banner advertisement
bannerhtml = soup.find(True, {'class':'bannerad'})
bannerhtml.replaceWith("")
#thanks kiklop74! This code removes all links from the text
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup

View File

@ -28,7 +28,7 @@ class TyzdenRecipe(BasicNewsRecipe):
if (weeknum > 1): if (weeknum > 1):
weeknum -= 1 weeknum -= 1
title = u'.tyzden ' + str(weeknum) + '/' + str(year) title = u'tyzden'
base_url_path = 'http://www.tyzden.sk/casopis/' + str(year) + '/' + str(weeknum) base_url_path = 'http://www.tyzden.sk/casopis/' + str(year) + '/' + str(weeknum)
base_url = base_url_path + '.html' base_url = base_url_path + '.html'

View File

@ -2,8 +2,10 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.chardet import xml_to_unicode
class Wired_Daily(BasicNewsRecipe): class Wired_Daily(BasicNewsRecipe):
@ -15,30 +17,43 @@ class Wired_Daily(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
preprocess_regexps = [(re.compile(r'<head.*</head>', re.DOTALL), lambda m:
'<head></head>')]
remove_tags_before = dict(name='div', id='content') remove_tags_before = dict(name='div', id='content')
remove_tags = [dict(id=['social_tools', 'outerWrapper', 'sidebar', remove_tags = [dict(id=['header', 'commenting_module', 'post_nav',
'footer', 'advertisement', 'blog_subscription_unit', 'social_tools', 'sidebar', 'footer', 'social_wishlist', 'pgwidget',
'brightcove_component']), 'outerWrapper', 'inf_widget']),
{'class':'entryActions'}, {'class':['entryActions', 'advertisement', 'entryTags']},
dict(name=['noscript', 'script'])] dict(name=['noscript', 'script']),
dict(name='h4', attrs={'class':re.compile(r'rat\d+')}),
{'class':lambda x: x and x.startswith('contentjump')},
dict(name='li', attrs={'class':['entryCategories', 'entryEdit']})]
feeds = [ feeds = [
('Top News', 'http://feeds.wired.com/wired/index'), ('Top News', 'http://feeds.wired.com/wired/index'),
('Culture', 'http://feeds.wired.com/wired/culture'), ('Product Reviews',
('Software', 'http://feeds.wired.com/wired/software'), 'http://www.wired.com/reviews/feeds/latestProductsRss'),
('Mac', 'http://feeds.feedburner.com/cultofmac/bFow'), ('Autopia', 'http://www.wired.com/autopia/feed/'),
('Gadgets', 'http://feeds.wired.com/wired/gadgets'), ('Danger Room', 'http://www.wired.com/dangerroom/feed/'),
('Cars', 'http://feeds.wired.com/wired/cars'), ('Epicenter', 'http://www.wired.com/epicenter/feed/'),
('Entertainment', 'http://feeds.wired.com/wired/entertainment'), ('Gadget Lab', 'http://www.wired.com/gadgetlab/feed/'),
('Gaming', 'http://feeds.wired.com/wired/gaming'), ('Geek Dad', 'http://www.wired.com/geekdad/feed/'),
('Science', 'http://feeds.wired.com/wired/science'), ('Playbook', 'http://www.wired.com/playbook/feed/'),
('Med Tech', 'http://feeds.wired.com/wired/medtech'), ('Rawfile', 'http://www.wired.com/rawfile/feed/'),
('Politics', 'http://feeds.wired.com/wired/politics'), ('This Day in Tech', 'http://www.wired.com/thisdayintech/feed/'),
('Tech Biz', 'http://feeds.wired.com/wired/techbiz'), ('Threat Level', 'http://www.wired.com/threatlevel/feed/'),
('Commentary', 'http://feeds.wired.com/wired/commentary'), ('Underwire', 'http://www.wired.com/underwire/feed/'),
('Web Monkey', 'http://www.webmonkey.com/feed/'),
('Science', 'http://www.wired.com/wiredscience/feed/'),
] ]
def populate_article_metadata(self, article, soup, first):
if article.text_summary:
article.text_summary = xml_to_unicode(article.text_summary,
resolve_entities=True)[0]
def print_version(self, url): def print_version(self, url):
return url.replace('http://www.wired.com/', 'http://www.wired.com/print/') return url + '/all/1'

View File

@ -0,0 +1,33 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
'''
www.zerohedge.com
'''
from calibre.web.feeds.recipes import BasicNewsRecipe
class ZeroHedge(BasicNewsRecipe):
title = 'Zero Hedge'
__author__ = 'Darko Miletic'
description = 'On a long enough timeline the survival rate for everyone drops to zero'
oldest_article = 10
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = True
encoding = 'utf8'
publisher = 'zero hedge'
category = 'news, USA, world, economy, politics'
language = 'en'
masthead_url = 'http://www.zerohedge.com/themes/newsflash/logo.png'
publication_type = 'blog'
extra_css = 'body{ font-family: sans-serif }'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher': publisher
}
feeds = [(u'Articles', u'http://feeds.feedburner.com/zerohedge/feed')]

View File

@ -459,6 +459,18 @@ def force_unicode(obj, enc=preferred_encoding):
obj = obj.decode('utf-8') obj = obj.decode('utf-8')
return obj return obj
def as_unicode(obj, enc=preferred_encoding):
if not isbytestring(obj):
try:
obj = unicode(obj)
except:
try:
obj = str(obj)
except:
obj = repr(obj)
return force_unicode(obj, enc=enc)
def human_readable(size): def human_readable(size):
""" Convert a size in bytes into a human readable form """ """ Convert a size in bytes into a human readable form """

View File

@ -88,6 +88,7 @@ class Plumber(object):
self.ui_reporter = report_progress self.ui_reporter = report_progress
self.abort_after_input_dump = abort_after_input_dump self.abort_after_input_dump = abort_after_input_dump
# Pipeline options {{{
# Initialize the conversion options that are independent of input and # Initialize the conversion options that are independent of input and
# output formats. The input and output plugins can still disable these # output formats. The input and output plugins can still disable these
# options via recommendations. # options via recommendations.
@ -527,6 +528,7 @@ OptionRecommendation(name='timestamp',
help=_('Set the book timestamp (used by the date column in calibre).')), help=_('Set the book timestamp (used by the date column in calibre).')),
] ]
# }}}
input_fmt = os.path.splitext(self.input)[1] input_fmt = os.path.splitext(self.input)[1]
if not input_fmt: if not input_fmt:

View File

@ -16,7 +16,6 @@ import uuid
from lxml import etree from lxml import etree
from calibre import guess_type
from calibre import prepare_string_for_xml from calibre import prepare_string_for_xml
from calibre.constants import __appname__, __version__ from calibre.constants import __appname__, __version__
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
@ -41,7 +40,7 @@ class FB2MLizer(object):
# in different directories. FB2 images are all in a flat layout so we rename all images # in different directories. FB2 images are all in a flat layout so we rename all images
# into a sequential numbering system to ensure there are no collisions between image names. # into a sequential numbering system to ensure there are no collisions between image names.
self.image_hrefs = {} self.image_hrefs = {}
# Mapping of toc items and their # Mapping of toc items and their
self.toc = {} self.toc = {}
# Used to see whether a new <section> needs to be opened # Used to see whether a new <section> needs to be opened
self.section_level = 0 self.section_level = 0
@ -51,7 +50,7 @@ class FB2MLizer(object):
self.oeb_book = oeb_book self.oeb_book = oeb_book
self.opts = opts self.opts = opts
self.reset_state() self.reset_state()
# Used for adding <section>s and <title>s to allow readers # Used for adding <section>s and <title>s to allow readers
# to generate toc from the document. # to generate toc from the document.
if self.opts.sectionize == 'toc': if self.opts.sectionize == 'toc':
@ -75,20 +74,20 @@ class FB2MLizer(object):
text = re.sub(r'(?miu)<p>\s*</p>', '', text) text = re.sub(r'(?miu)<p>\s*</p>', '', text)
text = re.sub(r'(?miu)\s*</p>', '</p>', text) text = re.sub(r'(?miu)\s*</p>', '</p>', text)
text = re.sub(r'(?miu)</p>\s*<p>', '</p>\n\n<p>', text) text = re.sub(r'(?miu)</p>\s*<p>', '</p>\n\n<p>', text)
text = re.sub(r'(?miu)<title>\s*</title>', '', text) text = re.sub(r'(?miu)<title>\s*</title>', '', text)
text = re.sub(r'(?miu)\s+</title>', '</title>', text) text = re.sub(r'(?miu)\s+</title>', '</title>', text)
text = re.sub(r'(?miu)<section>\s*</section>', '', text) text = re.sub(r'(?miu)<section>\s*</section>', '', text)
text = re.sub(r'(?miu)\s*</section>', '\n</section>', text) text = re.sub(r'(?miu)\s*</section>', '\n</section>', text)
text = re.sub(r'(?miu)</section>\s*', '</section>\n\n', text) text = re.sub(r'(?miu)</section>\s*', '</section>\n\n', text)
text = re.sub(r'(?miu)\s*<section>', '\n<section>', text) text = re.sub(r'(?miu)\s*<section>', '\n<section>', text)
text = re.sub(r'(?miu)<section>\s*', '<section>\n', text) text = re.sub(r'(?miu)<section>\s*', '<section>\n', text)
text = re.sub(r'(?miu)</section><section>', '</section>\n\n<section>', text) text = re.sub(r'(?miu)</section><section>', '</section>\n\n<section>', text)
if self.opts.insert_blank_line: if self.opts.insert_blank_line:
text = re.sub(r'(?miu)</p>', '</p><empty-line />', text) text = re.sub(r'(?miu)</p>', '</p><empty-line />', text)
return text return text
def fb2_header(self): def fb2_header(self):
@ -102,6 +101,7 @@ class FB2MLizer(object):
metadata['date'] = '%i.%i.%i' % (datetime.now().day, datetime.now().month, datetime.now().year) metadata['date'] = '%i.%i.%i' % (datetime.now().day, datetime.now().month, datetime.now().year)
metadata['lang'] = u''.join(self.oeb_book.metadata.lang) if self.oeb_book.metadata.lang else 'en' metadata['lang'] = u''.join(self.oeb_book.metadata.lang) if self.oeb_book.metadata.lang else 'en'
metadata['id'] = None metadata['id'] = None
metadata['cover'] = self.get_cover()
author_parts = self.oeb_book.metadata.creator[0].value.split(' ') author_parts = self.oeb_book.metadata.creator[0].value.split(' ')
if len(author_parts) == 1: if len(author_parts) == 1:
@ -121,10 +121,11 @@ class FB2MLizer(object):
break break
if metadata['id'] is None: if metadata['id'] is None:
self.log.warn('No UUID identifier found') self.log.warn('No UUID identifier found')
metadata['id'] = str(uuid.uuid4()) metadata['id'] = str(uuid.uuid4())
for key, value in metadata.items(): for key, value in metadata.items():
metadata[key] = prepare_string_for_xml(value) if not key == 'cover':
metadata[key] = prepare_string_for_xml(value)
return u'<FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0" xmlns:xlink="http://www.w3.org/1999/xlink">' \ return u'<FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0" xmlns:xlink="http://www.w3.org/1999/xlink">' \
'<description>' \ '<description>' \
@ -136,6 +137,7 @@ class FB2MLizer(object):
'<last-name>%(author_last)s</last-name>' \ '<last-name>%(author_last)s</last-name>' \
'</author>' \ '</author>' \
'<book-title>%(title)s</book-title>' \ '<book-title>%(title)s</book-title>' \
'%(cover)s' \
'<lang>%(lang)s</lang>' \ '<lang>%(lang)s</lang>' \
'</title-info>' \ '</title-info>' \
'<document-info>' \ '<document-info>' \
@ -154,48 +156,64 @@ class FB2MLizer(object):
def fb2_footer(self): def fb2_footer(self):
return u'</FictionBook>' return u'</FictionBook>'
def get_cover(self):
cover_href = None
# Get the raster cover if it's available.
if self.oeb_book.metadata.cover and unicode(self.oeb_book.metadata.cover[0]) in self.oeb_book.manifest.ids:
id = unicode(self.oeb_book.metadata.cover[0])
cover_item = self.oeb_book.manifest.ids[id]
if cover_item.media_type in OEB_RASTER_IMAGES:
cover_href = cover_item.href
else:
# Figure out if we have a title page or a cover page
page_name = ''
if 'titlepage' in self.oeb_book.guide:
page_name = 'titlepage'
elif 'cover' in self.oeb_book.guide:
page_name = 'cover'
if page_name:
cover_item = self.oeb_book.manifest.hrefs[self.oeb_book.guide[page_name].href]
# Get the first image in the page
for img in cover_item.xpath('//img'):
cover_href = cover_item.abshref(img.get('src'))
break
if cover_href:
# Only write the image tag if it is in the manifest.
if cover_href in self.oeb_book.manifest.hrefs.keys():
if cover_href not in self.image_hrefs.keys():
self.image_hrefs[cover_href] = '_%s.jpg' % len(self.image_hrefs.keys())
return u'<coverpage><image xlink:href="#%s" /></coverpage>' % self.image_hrefs[cover_href]
return u''
def get_text(self): def get_text(self):
text = ['<body>'] text = ['<body>']
# Create main section if there are no others to create # Create main section if there are no others to create
if self.opts.sectionize == 'nothing': if self.opts.sectionize == 'nothing':
text.append('<section>') text.append('<section>')
self.section_level += 1 self.section_level += 1
# Insert the title page / cover into the spine if it is not already referenced.
title_name = u''
if 'titlepage' in self.oeb_book.guide:
title_name = 'titlepage'
elif 'cover' in self.oeb_book.guide:
title_name = 'cover'
if title_name:
title_item = self.oeb_book.manifest.hrefs[self.oeb_book.guide[title_name].href]
if title_item.spine_position is None and title_item.media_type == 'application/xhtml+xml':
self.oeb_book.spine.insert(0, title_item, True)
# Create xhtml page to reference cover image so it can be used.
if not title_name and self.oeb_book.metadata.cover and unicode(self.oeb_book.metadata.cover[0]) in self.oeb_book.manifest.ids:
id = unicode(self.oeb_book.metadata.cover[0])
cover_item = self.oeb_book.manifest.ids[id]
if cover_item.media_type in OEB_RASTER_IMAGES:
self.insert_image_cover(cover_item.href)
for item in self.oeb_book.spine: for item in self.oeb_book.spine:
self.log.debug('Converting %s to FictionBook2 XML' % item.href) self.log.debug('Converting %s to FictionBook2 XML' % item.href)
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile) stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
# Start a <section> if we must sectionize each file or if the TOC references this page # Start a <section> if we must sectionize each file or if the TOC references this page
page_section_open = False page_section_open = False
if self.opts.sectionize == 'files' or self.toc.get(item.href) == 'page': if self.opts.sectionize == 'files' or self.toc.get(item.href) == 'page':
text.append('<section>') text.append('<section>')
page_section_open = True page_section_open = True
self.section_level += 1 self.section_level += 1
text += self.dump_text(item.data.find(XHTML('body')), stylizer, item) text += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
if page_section_open: if page_section_open:
text.append('</section>') text.append('</section>')
self.section_level -= 1 self.section_level -= 1
# Close any open sections # Close any open sections
while self.section_level > 0: while self.section_level > 0:
text.append('</section>') text.append('</section>')
@ -203,17 +221,6 @@ class FB2MLizer(object):
return ''.join(text) + '</body>' return ''.join(text) + '</body>'
def insert_image_cover(self, image_href):
from calibre.ebooks.oeb.base import RECOVER_PARSER
try:
root = etree.fromstring(u'<html xmlns="%s"><body><img src="%s" /></body></html>' % (XHTML_NS, image_href), parser=RECOVER_PARSER)
except:
root = etree.fromstring(u'', parser=RECOVER_PARSER)
id, href = self.oeb_book.manifest.generate('fb2_cover', 'fb2_cover.xhtml')
item = self.oeb_book.manifest.add(id, href, guess_type(href)[0], data=root)
self.oeb_book.spine.insert(0, item, True)
def fb2mlize_images(self): def fb2mlize_images(self):
''' '''
This function uses the self.image_hrefs dictionary mapping. It is populated by the dump_text function. This function uses the self.image_hrefs dictionary mapping. It is populated by the dump_text function.
@ -345,7 +352,7 @@ class FB2MLizer(object):
self.toc[page.href] = None self.toc[page.href] = None
elif toc_entry and elem_tree.attrib.get('id', None): elif toc_entry and elem_tree.attrib.get('id', None):
newlevel = toc_entry.get(elem_tree.attrib.get('id', None), None) newlevel = toc_entry.get(elem_tree.attrib.get('id', None), None)
# Start a new section if necessary # Start a new section if necessary
if newlevel: if newlevel:
if not (newlevel > self.section_level): if not (newlevel > self.section_level):

View File

@ -85,42 +85,42 @@ def get_metadata(stream):
""" Return metadata as a L{MetaInfo} object """ """ Return metadata as a L{MetaInfo} object """
stream.seek(0) stream.seek(0)
if stream.read(5) != r'{\rtf': if stream.read(5) != r'{\rtf':
return MetaInformation(_('Unknown'), None) return MetaInformation(_('Unknown'))
block = get_document_info(stream)[0] block = get_document_info(stream)[0]
if not block: if not block:
return MetaInformation(_('Unknown'), None) return MetaInformation(_('Unknown'))
stream.seek(0) stream.seek(0)
cpg = detect_codepage(stream) cpg = detect_codepage(stream)
stream.seek(0) stream.seek(0)
title_match = title_pat.search(block) title_match = title_pat.search(block)
if title_match: if title_match is not None:
title = decode(title_match.group(1).strip(), cpg) title = decode(title_match.group(1).strip(), cpg)
else: else:
title = _('Unknown') title = _('Unknown')
author_match = author_pat.search(block) author_match = author_pat.search(block)
if author_match: if author_match is not None:
author = decode(author_match.group(1).strip(), cpg) author = decode(author_match.group(1).strip(), cpg)
else: else:
author = None author = None
mi = MetaInformation(title, author) mi = MetaInformation(title)
if author: if author:
mi.authors = string_to_authors(author) mi.authors = string_to_authors(author)
comment_match = comment_pat.search(block) comment_match = comment_pat.search(block)
if comment_match: if comment_match is not None:
comment = decode(comment_match.group(1).strip(), cpg) comment = decode(comment_match.group(1).strip(), cpg)
mi.comments = comment mi.comments = comment
tags_match = tags_pat.search(block) tags_match = tags_pat.search(block)
if tags_match: if tags_match is not None:
tags = decode(tags_match.group(1).strip(), cpg) tags = decode(tags_match.group(1).strip(), cpg)
mi.tags = tags mi.tags = tags
publisher_match = publisher_pat.search(block) publisher_match = publisher_pat.search(block)
if publisher_match: if publisher_match is not None:
publisher = decode(publisher_match.group(1).strip(), cpg) publisher = decode(publisher_match.group(1).strip(), cpg)
mi.publisher = publisher mi.publisher = publisher
return mi return mi
def create_metadata(stream, options): def create_metadata(stream, options):
@ -149,7 +149,7 @@ def create_metadata(stream, options):
md.append('}') md.append('}')
stream.seek(0) stream.seek(0)
src = stream.read() src = stream.read()
ans = src[:6] + ''.join(md) + src[6:] ans = src[:6] + u''.join(md) + src[6:]
stream.seek(0) stream.seek(0)
stream.write(ans) stream.write(ans)
@ -197,7 +197,7 @@ def set_metadata(stream, options):
tags = options.tags tags = options.tags
if tags is not None: if tags is not None:
tags = ', '.join(tags) tags = ', '.join(tags)
tags = tags.encode('ascii', 'ignore') tags = tags.encode('ascii', 'replace')
pat = re.compile(base_pat.replace('name', 'category'), re.DOTALL) pat = re.compile(base_pat.replace('name', 'category'), re.DOTALL)
if pat.search(src): if pat.search(src):
src = pat.sub(r'{\\category ' + tags + r'}', src) src = pat.sub(r'{\\category ' + tags + r'}', src)

View File

@ -77,19 +77,16 @@ class RTFInput(InputFormatPlugin):
def generate_xml(self, stream): def generate_xml(self, stream):
from calibre.ebooks.rtf2xml.ParseRtf import ParseRtf from calibre.ebooks.rtf2xml.ParseRtf import ParseRtf
ofile = 'dataxml.xml' ofile = 'dataxml.xml'
run_lev = 1 run_lev, debug_dir = 1, None
if hasattr(self.opts, 'debug_pipeline'): #just to check if the debug process is lauched, no need of this directory in fact
if getattr(self.opts, 'debug_pipeline', None) is not None:
try: try:
os.mkdir('rtfdebug')
debug_dir = 'rtfdebug' debug_dir = 'rtfdebug'
os.mkdir(debug_dir)
run_lev = 4 run_lev = 4
except OSError, ( errno, strerror ): except:
print strerror pass
print errno
debug_dir = None
else:
debug_dir = None
parser = ParseRtf( parser = ParseRtf(
in_file = stream, in_file = stream,
out_file = ofile, out_file = ofile,
@ -127,32 +124,38 @@ class RTFInput(InputFormatPlugin):
# Write or do not write paragraphs. Default is 0. # Write or do not write paragraphs. Default is 0.
empty_paragraphs = 1, empty_paragraphs = 1,
#debug #debug
deb_dir = debug_dir, deb_dir = debug_dir,
run_level = run_lev, run_level = run_lev,
) )
parser.parse_rtf() parser.parse_rtf()
ans = open('dataxml.xml').read() with open(ofile, 'rb') as f:
return ans return f.read()
def extract_images(self, picts): def extract_images(self, picts):
import imghdr
self.log('Extracting images...') self.log('Extracting images...')
raw = open(picts, 'rb').read() with open(picts, 'rb') as f:
raw = f.read()
picts = filter(len, re.findall(r'\{\\pict([^}]+)\}', raw)) picts = filter(len, re.findall(r'\{\\pict([^}]+)\}', raw))
hex = re.compile(r'[^a-zA-Z0-9]') hex = re.compile(r'[^a-fA-F0-9]')
encs = [hex.sub('', pict) for pict in picts] encs = [hex.sub('', pict) for pict in picts]
count = 0 count = 0
imap = {} imap = {}
for enc in encs: for enc in encs:
if len(enc) % 2 == 1: if len(enc) % 2 == 1:
enc = enc[:-1] enc = enc[:-1]
data = enc.decode('hex') data = enc.decode('hex')
fmt = imghdr.what(None, data)
if fmt is None:
fmt = 'wmf'
count += 1 count += 1
name = '%04d.wmf' % count name = '%04d.%s' % (count, fmt)
open(name, 'wb').write(data) with open(name, 'wb') as f:
f.write(data)
imap[count] = name imap[count] = name
#open(name+'.hex', 'wb').write(enc) #open(name+'.hex', 'wb').write(enc)
return self.convert_images(imap) return self.convert_images(imap)
@ -183,6 +186,7 @@ class RTFInput(InputFormatPlugin):
# return self.convert_images(imap) # return self.convert_images(imap)
def convert_images(self, imap): def convert_images(self, imap):
self.default_img = None
for count, val in imap.iteritems(): for count, val in imap.iteritems():
try: try:
imap[count] = self.convert_image(val) imap[count] = self.convert_image(val)
@ -191,11 +195,35 @@ class RTFInput(InputFormatPlugin):
return imap return imap
def convert_image(self, name): def convert_image(self, name):
from calibre.utils.magick import Image if not name.endswith('.wmf'):
img = Image() return name
img.open(name) try:
return self.rasterize_wmf(name)
except:
self.log.exception('Failed to convert WMF image %r'%name)
return self.replace_wmf(name)
def replace_wmf(self, name):
from calibre.ebooks import calibre_cover
if self.default_img is None:
self.default_img = calibre_cover('Conversion of WMF images is not supported',
'Use Microsoft Word or OpenOffice to save this RTF file'
' as HTML and convert that in calibre.', title_size=36,
author_size=20)
name = name.replace('.wmf', '.jpg') name = name.replace('.wmf', '.jpg')
img.save(name) with open(name, 'wb') as f:
f.write(self.default_img)
return name
def rasterize_wmf(self, name):
raise ValueError('Conversion of WMF images not supported')
from calibre.utils.wmf import extract_raster_image
with open(name, 'rb') as f:
data = f.read()
data = extract_raster_image(data)
name = name.replace('.wmf', '.jpg')
with open(name, 'wb') as f:
f.write(data)
return name return name
@ -285,6 +313,7 @@ class RTFInput(InputFormatPlugin):
try: try:
xml = self.generate_xml(stream.name) xml = self.generate_xml(stream.name)
except RtfInvalidCodeException, e: except RtfInvalidCodeException, e:
raise
raise ValueError(_('This RTF file has a feature calibre does not ' raise ValueError(_('This RTF file has a feature calibre does not '
'support. Convert it to HTML first and then try it.\n%s')%e) 'support. Convert it to HTML first and then try it.\n%s')%e)

View File

@ -107,7 +107,7 @@ class ParseRtf:
no_dtd = 0, no_dtd = 0,
char_data = '', char_data = '',
): ):
""" """
Requires: Requires:
'file' --file to parse 'file' --file to parse
@ -124,7 +124,7 @@ class ParseRtf:
through a file. Only for debugging. through a file. Only for debugging.
Returns: Nothing Returns: Nothing
""" """
self.__file = in_file self.__file = in_file
self.__out_file = out_file self.__out_file = out_file
self.__out_dir = out_dir self.__out_dir = out_dir
@ -155,12 +155,12 @@ class ParseRtf:
if hasattr(the_file, 'read'): return if hasattr(the_file, 'read'): return
if the_file == None: if the_file == None:
if type == "file_to_parse": if type == "file_to_parse":
msg = _("\nYou must provide a file for the script to work") msg = "\nYou must provide a file for the script to work"
raise RtfInvalidCodeException, msg raise RtfInvalidCodeException, msg
elif os.path.exists(the_file): elif os.path.exists(the_file):
pass # do nothing pass # do nothing
else: else:
msg = _("\nThe file '%s' cannot be found") % the_file msg = "\nThe file '%s' cannot be found" % the_file
raise RtfInvalidCodeException, msg raise RtfInvalidCodeException, msg
def __check_dir(self, the_dir): def __check_dir(self, the_dir):
@ -169,7 +169,7 @@ class ParseRtf:
return return
dir_exists = os.path.isdir(the_dir) dir_exists = os.path.isdir(the_dir)
if not dir_exists: if not dir_exists:
msg = _("\n%s is not a directory") % the_dir msg = "\n%s is not a directory" % the_dir
raise RtfInvalidCodeException, msg raise RtfInvalidCodeException, msg
return 1 return 1
@ -247,7 +247,7 @@ class ParseRtf:
if check_encoding_obj.check_encoding(self.__file, enc): if check_encoding_obj.check_encoding(self.__file, enc):
file_name = self.__file if isinstance(self.__file, str) \ file_name = self.__file if isinstance(self.__file, str) \
else self.__file.encode('utf-8') else self.__file.encode('utf-8')
msg = _('File %s does not appear to be correctly encoded.\n') % file_name msg = 'File %s does not appear to be correctly encoded.\n' % file_name
raise InvalidRtfException, msg raise InvalidRtfException, msg
delete_info_obj = delete_info.DeleteInfo( delete_info_obj = delete_info.DeleteInfo(
in_file = self.__temp_file, in_file = self.__temp_file,
@ -542,7 +542,7 @@ class ParseRtf:
pass pass
#sys.stderr.write( msg + ' in ' + file_name + "\n") #sys.stderr.write( msg + ' in ' + file_name + "\n")
else: else:
msg = _('%s in file %s\n') % (msg, file_name) msg = '%s in file %s\n' % (msg, file_name)
raise RtfInvalidCodeException, msg raise RtfInvalidCodeException, msg
def __return_code(self, num): def __return_code(self, num):
@ -558,4 +558,4 @@ class ParseRtf:
with open(write_file, 'wb') as write_obj: with open(write_file, 'wb') as write_obj:
for line in read_obj: for line in read_obj:
write_obj.write(line) write_obj.write(line)
return write_file return write_file

View File

@ -54,7 +54,7 @@ class CheckBrackets:
return (False, "closed bracket doesn't match, line %s" % line_count) return (False, "closed bracket doesn't match, line %s" % line_count)
if self.__bracket_count != 0: if self.__bracket_count != 0:
msg = _('At end of file open and closed brackets don\'t match\n' \ msg = ('At end of file open and closed brackets don\'t match\n' \
'total number of brackets is %s') % self.__bracket_count 'total number of brackets is %s') % self.__bracket_count
return (False, msg) return (False, msg)
return (True, _("Brackets match!")) return (True, "Brackets match!")

View File

@ -13,10 +13,10 @@ class CheckEncoding:
try: try:
char.decode(encoding) char.decode(encoding)
except UnicodeError, msg: except UnicodeError, msg:
sys.stderr.write(_('line: %s char: %s\n') % (line_num, char_position)) sys.stderr.write('line: %s char: %s\n' % (line_num, char_position))
sys.stderr.write(str(msg) + '\n') sys.stderr.write(str(msg) + '\n')
def check_encoding(self, path, encoding='us-ascii', verbose = True): def check_encoding(self, path, encoding='us-ascii', verbose=True):
line_num = 0 line_num = 0
with open(path, 'r') as read_obj: with open(path, 'r') as read_obj:
for line in read_obj: for line in read_obj:
@ -28,7 +28,7 @@ class CheckEncoding:
if len(line) < 1000: if len(line) < 1000:
self.__get_position_error(line, encoding, line_num) self.__get_position_error(line, encoding, line_num)
else: else:
sys.stderr.write(_('line: %d has bad encoding\n') % line_num) sys.stderr.write('line: %d has bad encoding\n' % line_num)
return True return True
return False return False

View File

@ -78,14 +78,14 @@ class CombineBorders:
self.add_to_border_desc(line) self.add_to_border_desc(line)
def combine_borders(self): def combine_borders(self):
with open(self.__file, 'r') as read_obj, \ with open(self.__file, 'r') as read_obj:
open(self.__write_to, 'w') as write_obj: with open(self.__write_to, 'w') as write_obj:
for line in read_obj: for line in read_obj:
self.__first_five = line[0:5] self.__first_five = line[0:5]
if self.__state == 'border': if self.__state == 'border':
self.__border_func(line, write_obj) self.__border_func(line, write_obj)
else: else:
write_obj.write(self.__default_func(line)) write_obj.write(self.__default_func(line))
copy_obj = copy.Copy(bug_handler = self.__bug_handler) copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy: if self.__copy:
copy_obj.copy_file(self.__write_to, "combine_borders.data") copy_obj.copy_file(self.__write_to, "combine_borders.data")

View File

@ -1,4 +1,4 @@
import os, tempfile import os, tempfile, sys
from calibre.ebooks.rtf2xml import copy, check_encoding from calibre.ebooks.rtf2xml import copy, check_encoding
@ -208,15 +208,16 @@ class ConvertToTags:
""" """
#keep maximum compatibility with previous version #keep maximum compatibility with previous version
check_encoding_obj = check_encoding.CheckEncoding( check_encoding_obj = check_encoding.CheckEncoding(
bug_handler = self.__bug_handler, bug_handler=self.__bug_handler)
)
if not check_encoding_obj.check_encoding(self.__file, verbose = False): if not check_encoding_obj.check_encoding(self.__file, verbose=False):
self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>') self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
elif not check_encoding_obj.check_encoding(self.__file, self.__encoding): elif not check_encoding_obj.check_encoding(self.__file, self.__encoding):
self.__write_obj.write('<?xml version="1.0" encoding="%s" ?>' % self.__encoding) self.__write_obj.write('<?xml version="1.0" encoding="%s" ?>' % self.__encoding)
else: else:
self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>') self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
sys.stderr.write(_('Bad RTF encoding, revert to US-ASCII chars and hope for the best')) sys.stderr.write('Bad RTF encoding, revert to US-ASCII chars and'
' hope for the best')
self.__new_line = 0 self.__new_line = 0
self.__write_new_line() self.__write_new_line()
if self.__no_dtd: if self.__no_dtd:

View File

@ -3,7 +3,6 @@
# copyright 2002 Paul Henry Tremblay # # copyright 2002 Paul Henry Tremblay #
# # # #
######################################################################### #########################################################################
''' '''
Codepages as to RTF 1.9.1: Codepages as to RTF 1.9.1:
437 United States IBM 437 United States IBM
@ -79,7 +78,7 @@ class DefaultEncoding:
else: else:
code_page = 'ansicpg' + self.__code_page code_page = 'ansicpg' + self.__code_page
return self.__platform, code_page, self.__default_num return self.__platform, code_page, self.__default_num
def get_codepage(self): def get_codepage(self):
if not self.__datafetched: if not self.__datafetched:
self._encoding() self._encoding()
@ -91,7 +90,7 @@ class DefaultEncoding:
self._encoding() self._encoding()
self.__datafetched = True self.__datafetched = True
return self.__platform return self.__platform
def _encoding(self): def _encoding(self):
with open(self.__file, 'r') as read_obj: with open(self.__file, 'r') as read_obj:
if not self.__fetchraw: if not self.__fetchraw:

View File

@ -128,7 +128,7 @@ class DeleteInfo:
# not sure what happens here! # not sure what happens here!
# believe I have a '{\*} # believe I have a '{\*}
if self.__run_level > 3: if self.__run_level > 3:
msg = _('flag problem\n') msg = 'flag problem\n'
raise self.__bug_handler, msg raise self.__bug_handler, msg
return True return True
elif self.__token_info in self.__allowable : elif self.__token_info in self.__allowable :
@ -144,14 +144,14 @@ class DeleteInfo:
self.__found_list_func(line) self.__found_list_func(line)
elif self.__token_info in self.__not_allowable: elif self.__token_info in self.__not_allowable:
if not self.__ob: if not self.__ob:
self.__write_cb = False self.__write_cb = True
self.__ob = 0 self.__ob = 0
self.__state = 'delete' self.__state = 'delete'
self.__cb_count = 0 self.__cb_count = 0
return False return False
else: else:
if self.__run_level > 5: if self.__run_level > 5:
msg = _('After an asterisk, and found neither an allowable or non-allowable token\n\ msg = ('After an asterisk, and found neither an allowable or non-allowable token\n\
token is "%s"\n') % self.__token_info token is "%s"\n') % self.__token_info
raise self.__bug_handler, msg raise self.__bug_handler, msg
if not self.__ob: if not self.__ob:
@ -187,32 +187,31 @@ class DeleteInfo:
def delete_info(self): def delete_info(self):
"""Main method for handling other methods. Read one line in at """Main method for handling other methods. Read one line in at
a time, and determine wheter to print the line based on the state.""" a time, and determine whether to print the line based on the state."""
with open(self.__file, 'r') as read_obj, \ with open(self.__file, 'r') as read_obj:
open(self.__write_to, 'w') as self.__write_obj: with open(self.__write_to, 'w') as self.__write_obj:
for line in read_obj: for line in read_obj:
#ob<nu<open-brack<0001 #ob<nu<open-brack<0001
to_print = True to_print = True
self.__token_info = line[:16] self.__token_info = line[:16]
if self.__token_info == 'ob<nu<open-brack': if self.__token_info == 'ob<nu<open-brack':
self.__ob_count = line[-5:-1] self.__ob_count = line[-5:-1]
if self.__token_info == 'cb<nu<clos-brack': if self.__token_info == 'cb<nu<clos-brack':
self.__cb_count = line[-5:-1] self.__cb_count = line[-5:-1]
action = self.__state_dict.get(self.__state) action = self.__state_dict.get(self.__state)
if not action: if not action:
sys.stderr.write(_('No action in dictionary state is "%s" \n') sys.stderr.write('No action in dictionary state is "%s" \n' % self.__state)
% self.__state) to_print = action(line)
to_print = action(line) # if self.__after_asterisk:
# if self.__after_asterisk: # to_print = self.__asterisk_func(line)
# to_print = self.__asterisk_func(line) # elif self.__list:
# elif self.__list: # self.__in_list_func(line)
# self.__in_list_func(line) # elif self.__delete:
# elif self.__delete: # to_print = self.__delete_func(line)
# to_print = self.__delete_func(line) # else:
# else: # to_print = self.__default_func(line)
# to_print = self.__default_func(line) if to_print:
if to_print: self.__write_obj.write(line)
self.__write_obj.write(line)
copy_obj = copy.Copy(bug_handler = self.__bug_handler) copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy: if self.__copy:
copy_obj.copy_file(self.__write_to, "delete_info.data") copy_obj.copy_file(self.__write_to, "delete_info.data")

View File

@ -120,35 +120,35 @@ class Footnote:
""" """
self.__initiate_sep_values() self.__initiate_sep_values()
self.__footnote_holder = tempfile.mktemp() self.__footnote_holder = tempfile.mktemp()
with open(self.__file) as read_obj, \ with open(self.__file) as read_obj:
open(self.__write_to, 'w') as self.__write_obj, \ with open(self.__write_to, 'w') as self.__write_obj:
open(self.__footnote_holder, 'w') as self.__write_to_foot_obj: with open(self.__footnote_holder, 'w') as self.__write_to_foot_obj:
for line in read_obj: for line in read_obj:
self.__token_info = line[:16] self.__token_info = line[:16]
# keep track of opening and closing brackets # keep track of opening and closing brackets
if self.__token_info == 'ob<nu<open-brack': if self.__token_info == 'ob<nu<open-brack':
self.__ob_count = line[-5:-1] self.__ob_count = line[-5:-1]
if self.__token_info == 'cb<nu<clos-brack': if self.__token_info == 'cb<nu<clos-brack':
self.__cb_count = line[-5:-1] self.__cb_count = line[-5:-1]
# In the middle of footnote text # In the middle of footnote text
if self.__in_footnote: if self.__in_footnote:
self.__in_footnote_func(line) self.__in_footnote_func(line)
# not in the middle of footnote text # not in the middle of footnote text
else: else:
self.__default_sep(line) self.__default_sep(line)
with open(self.__footnote_holder, 'r') as read_obj, \ with open(self.__footnote_holder, 'r') as read_obj:
open(self.__write_to, 'a') as write_obj: with open(self.__write_to, 'a') as write_obj:
write_obj.write( write_obj.write(
'mi<mk<sect-close\n' 'mi<mk<sect-close\n'
'mi<mk<body-close\n' 'mi<mk<body-close\n'
'mi<tg<close_____<section\n' 'mi<tg<close_____<section\n'
'mi<tg<close_____<body\n' 'mi<tg<close_____<body\n'
'mi<tg<close_____<doc\n' 'mi<tg<close_____<doc\n'
'mi<mk<footnt-beg\n') 'mi<mk<footnt-beg\n')
for line in read_obj: for line in read_obj:
write_obj.write(line) write_obj.write(line)
write_obj.write( write_obj.write(
'mi<mk<footnt-end\n') 'mi<mk<footnt-end\n')
os.remove(self.__footnote_holder) os.remove(self.__footnote_holder)
copy_obj = copy.Copy(bug_handler = self.__bug_handler) copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy: if self.__copy:
@ -190,15 +190,15 @@ class Footnote:
These two functions do the work of separating the footnotes form the These two functions do the work of separating the footnotes form the
body. body.
""" """
with open(self.__file) as read_obj, \ with open(self.__file) as read_obj:
open(self.__write_to, 'w') as self.__write_obj, \ with open(self.__write_to, 'w') as self.__write_obj:
open(self.__footnote_holder, 'w') as self.__write_to_foot_obj: with open(self.__footnote_holder, 'w') as self.__write_to_foot_obj:
for line in read_obj: for line in read_obj:
self.__token_info = line[:16] self.__token_info = line[:16]
if self.__state == 'body': if self.__state == 'body':
self.__get_foot_body_func(line) self.__get_foot_body_func(line)
elif self.__state == 'foot': elif self.__state == 'foot':
self.__get_foot_foot_func(line) self.__get_foot_foot_func(line)
def __get_foot_from_temp(self, num): def __get_foot_from_temp(self, num):
""" """
@ -228,13 +228,13 @@ class Footnote:
print out to the third file. print out to the third file.
If no footnote marker is found, simply print out the token (line). If no footnote marker is found, simply print out the token (line).
""" """
with open(self.__footnote_holder, 'r') as self.__read_from_foot_obj, \ with open(self.__footnote_holder, 'r') as self.__read_from_foot_obj:
open(self.__write_to, 'r') as read_obj, \ with open(self.__write_to, 'r') as read_obj:
open(self.__write_to2, 'w') as self.__write_obj: with open(self.__write_to2, 'w') as self.__write_obj:
for line in read_obj: for line in read_obj:
if line[:16] == 'mi<mk<footnt-ind': if line[:16] == 'mi<mk<footnt-ind':
line = self.__get_foot_from_temp(line[17:-1]) line = self.__get_foot_from_temp(line[17:-1])
self.__write_obj.write(line) self.__write_obj.write(line)
def join_footnotes(self): def join_footnotes(self):
""" """

View File

@ -43,6 +43,8 @@ class GetCharMap:
def get_char_map(self, map): def get_char_map(self, map):
if map == 'ansicpg0': if map == 'ansicpg0':
map = 'ansicpg1250' map = 'ansicpg1250'
if map in ('ansicpg10000', '10000'):
map = 'mac_roman'
found_map = False found_map = False
map_dict = {} map_dict = {}
self.__char_file.seek(0) self.__char_file.seek(0)
@ -59,10 +61,10 @@ class GetCharMap:
fields = line.split(':') fields = line.split(':')
fields[1].replace('\\colon', ':') fields[1].replace('\\colon', ':')
map_dict[fields[1]] = fields[3] map_dict[fields[1]] = fields[3]
if not found_map: if not found_map:
msg = _('no map found\nmap is "%s"\n') %(map,) msg = 'no map found\nmap is "%s"\n'%(map,)
raise self.__bug_handler, msg raise self.__bug_handler, msg
return map_dict return map_dict

View File

@ -16,8 +16,10 @@
# # # #
######################################################################### #########################################################################
import sys, os, tempfile, cStringIO import sys, os, tempfile, cStringIO
from calibre.ebooks.rtf2xml import get_char_map, copy from calibre.ebooks.rtf2xml import get_char_map, copy
from calibre.ebooks.rtf2xml.char_set import char_set from calibre.ebooks.rtf2xml.char_set import char_set
class Hex2Utf8: class Hex2Utf8:
""" """
Convert Microsoft hexidecimal numbers to utf-8 Convert Microsoft hexidecimal numbers to utf-8
@ -265,7 +267,7 @@ class Hex2Utf8:
# msg = 'no dictionary entry for %s\n' # msg = 'no dictionary entry for %s\n'
# msg += 'the hexidecimal num is "%s"\n' % (hex_num) # msg += 'the hexidecimal num is "%s"\n' % (hex_num)
# msg += 'dictionary is %s\n' % self.__current_dict_name # msg += 'dictionary is %s\n' % self.__current_dict_name
msg = _('Character "&#x%s;" does not appear to be valid (or is a control character)\n') % token msg = 'Character "&#x%s;" does not appear to be valid (or is a control character)\n' % token
raise self.__bug_handler, msg raise self.__bug_handler, msg
def __found_body_func(self, line): def __found_body_func(self, line):
@ -293,7 +295,7 @@ class Hex2Utf8:
self.__token_info = line[:16] self.__token_info = line[:16]
action = self.__preamble_state_dict.get(self.__state) action = self.__preamble_state_dict.get(self.__state)
if action is None: if action is None:
sys.stderr.write(_('error no state found in hex_2_utf8'), sys.stderr.write('error no state found in hex_2_utf8',
self.__state self.__state
) )
action(line) action(line)
@ -553,7 +555,7 @@ class Hex2Utf8:
self.__token_info = line[:16] self.__token_info = line[:16]
action = self.__body_state_dict.get(self.__state) action = self.__body_state_dict.get(self.__state)
if action is None: if action is None:
sys.stderr.write(_('error no state found in hex_2_utf8'), sys.stderr.write('error no state found in hex_2_utf8',
self.__state self.__state
) )
action(line) action(line)

View File

@ -297,7 +297,7 @@ class Inline:
inline_list = self.__inline_list[last_index:] inline_list = self.__inline_list[last_index:]
if len(inline_list) <= 0: if len(inline_list) <= 0:
if self.__run_level > 3: if self.__run_level > 3:
msg = _('self.__inline_list is %s\n') % self.__inline_list msg = 'self.__inline_list is %s\n' % self.__inline_list
raise self.__bug_handler, msg raise self.__bug_handler, msg
self.__write_obj.write('error\n') self.__write_obj.write('error\n')
self.__groups_in_waiting[0] = 0 self.__groups_in_waiting[0] = 0
@ -393,27 +393,27 @@ class Inline:
the state. the state.
""" """
self.__initiate_values() self.__initiate_values()
with open(self.__file, 'r') as read_obj, \ with open(self.__file, 'r') as read_obj:
open(self.__write_to, 'w') as self.__write_obj: with open(self.__write_to, 'w') as self.__write_obj:
for line in read_obj: for line in read_obj:
token = line[0:-1] token = line[0:-1]
self.__token_info = '' self.__token_info = ''
if token == 'tx<mc<__________<rdblquote'\ if token == 'tx<mc<__________<rdblquote'\
or token == 'tx<mc<__________<ldblquote'\ or token == 'tx<mc<__________<ldblquote'\
or token == 'tx<mc<__________<lquote'\ or token == 'tx<mc<__________<lquote'\
or token == 'tx<mc<__________<rquote'\ or token == 'tx<mc<__________<rquote'\
or token == 'tx<mc<__________<emdash'\ or token == 'tx<mc<__________<emdash'\
or token == 'tx<mc<__________<endash'\ or token == 'tx<mc<__________<endash'\
or token == 'tx<mc<__________<bullet': or token == 'tx<mc<__________<bullet':
self.__token_info = 'text' self.__token_info = 'text'
else: else:
self.__token_info = line[:16] self.__token_info = line[:16]
self.__set_list_func(line) self.__set_list_func(line)
action = self.__state_dict.get(self.__state) action = self.__state_dict.get(self.__state)
if action is None: if action is None:
sys.stderr.write(_('No matching state in module inline_for_lists.py\n')) sys.stderr.write('No matching state in module inline_for_lists.py\n')
sys.stderr.write(self.__state + '\n') sys.stderr.write(self.__state + '\n')
action(line) action(line)
copy_obj = copy.Copy(bug_handler = self.__bug_handler) copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy: if self.__copy:
copy_obj.copy_file(self.__write_to, "inline.data") copy_obj.copy_file(self.__write_to, "inline.data")

View File

@ -15,7 +15,7 @@
# # # #
# # # #
######################################################################### #########################################################################
import os, tempfile, re import os, tempfile
from calibre.ebooks.rtf2xml import copy from calibre.ebooks.rtf2xml import copy
from calibre.utils.cleantext import clean_ascii_chars from calibre.utils.cleantext import clean_ascii_chars

View File

@ -77,11 +77,11 @@ class Pict:
try: try:
os.mkdir(self.__dir_name) os.mkdir(self.__dir_name)
except OSError, msg: except OSError, msg:
msg = _("%sCouldn't make directory '%s':\n") % (str(msg), self.__dir_name) msg = "%sCouldn't make directory '%s':\n" % (str(msg), self.__dir_name)
raise self.__bug_handler raise self.__bug_handler
else: else:
if self.__run_level > 1: if self.__run_level > 1:
sys.stderr.write(_('Removing files from old pict directory...\n')) sys.stderr.write('Removing files from old pict directory...\n')
all_files = os.listdir(self.__dir_name) all_files = os.listdir(self.__dir_name)
for the_file in all_files: for the_file in all_files:
the_file = os.path.join(self.__dir_name, the_file) the_file = os.path.join(self.__dir_name, the_file)
@ -90,7 +90,7 @@ class Pict:
except OSError: except OSError:
pass pass
if self.__run_level > 1: if self.__run_level > 1:
sys.stderr.write(_('Files removed.\n')) sys.stderr.write('Files removed.\n')
def __create_pict_file(self): def __create_pict_file(self):
"""Create a file for all the pict data to be written to. """Create a file for all the pict data to be written to.
@ -146,25 +146,25 @@ class Pict:
def process_pict(self): def process_pict(self):
self.__make_dir() self.__make_dir()
with open(self.__file) as read_obj, \ with open(self.__file) as read_obj:
open(self.__write_to, 'w') as write_obj: with open(self.__write_to, 'w') as write_obj:
for line in read_obj: for line in read_obj:
self.__token_info = line[:16] self.__token_info = line[:16]
if self.__token_info == 'ob<nu<open-brack': if self.__token_info == 'ob<nu<open-brack':
self.__ob_count = line[-5:-1] self.__ob_count = line[-5:-1]
if self.__token_info == 'cb<nu<clos-brack': if self.__token_info == 'cb<nu<clos-brack':
self.__cb_count = line[-5:-1] self.__cb_count = line[-5:-1]
if not self.__in_pict: if not self.__in_pict:
to_print = self.__default(line, write_obj) to_print = self.__default(line, write_obj)
if to_print : if to_print :
write_obj.write(line) write_obj.write(line)
else: else:
to_print = self.__in_pict_func(line) to_print = self.__in_pict_func(line)
if to_print : if to_print :
write_obj.write(line) write_obj.write(line)
if self.__already_found_pict: if self.__already_found_pict:
self.__write_pic_obj.write("}\n") self.__write_pic_obj.write("}\n")
self.__write_pic_obj.close() self.__write_pic_obj.close()
copy_obj = copy.Copy(bug_handler = self.__bug_handler) copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy: if self.__copy:
copy_obj.copy_file(self.__write_to, "pict.data") copy_obj.copy_file(self.__write_to, "pict.data")

View File

@ -622,12 +622,12 @@ class ProcessTokens:
num = int(num) num = int(num)
except ValueError: except ValueError:
if self.__run_level > 3: if self.__run_level > 3:
msg = _('Number "%s" cannot be converted to integer\n') % num msg = 'Number "%s" cannot be converted to integer\n' % num
raise self.__bug_handler, msg raise self.__bug_handler, msg
type = self.__number_type_dict.get(num) type = self.__number_type_dict.get(num)
if type is None: if type is None:
if self.__run_level > 3: if self.__run_level > 3:
msg = _('No type for "%s" in self.__number_type_dict\n') msg = 'No type for "%s" in self.__number_type_dict\n'
raise self.__bug_handler raise self.__bug_handler
type = 'Arabic' type = 'Arabic'
return 'cw<%s<%s<nu<%s\n' % (pre, token, type) return 'cw<%s<%s<nu<%s\n' % (pre, token, type)
@ -637,7 +637,7 @@ class ProcessTokens:
if not lang_name: if not lang_name:
lang_name = "not defined" lang_name = "not defined"
if self.__run_level > 3: if self.__run_level > 3:
msg = _('No entry for number "%s"') % num msg = 'No entry for number "%s"' % num
raise self.__bug_handler, msg raise self.__bug_handler, msg
return 'cw<%s<%s<nu<%s\n' % (pre, token, lang_name) return 'cw<%s<%s<nu<%s\n' % (pre, token, lang_name)
@ -689,7 +689,7 @@ class ProcessTokens:
return 'cw<%s<%s<nu<false\n' % (pre, token) return 'cw<%s<%s<nu<false\n' % (pre, token)
##return 'cw<nu<nu<nu<%s>false<%s\n' % (token, token) ##return 'cw<nu<nu<nu<%s>false<%s\n' % (token, token)
else: else:
msg = _("boolean should have some value module process tokens\ntoken is %s\n'%s'\n") % (token, num) msg = "boolean should have some value module process tokens\ntoken is %s\n'%s'\n" % (token, num)
raise self.__bug_handler, msg raise self.__bug_handler, msg
def __no_sup_sub_func(self, pre, token, num): def __no_sup_sub_func(self, pre, token, num):
@ -703,7 +703,7 @@ class ProcessTokens:
numerator = float(re.search('[0-9.\-]+', numerator).group()) numerator = float(re.search('[0-9.\-]+', numerator).group())
except TypeError, msg: except TypeError, msg:
if self.__run_level > 3: if self.__run_level > 3:
msg = _('No number to process?\nthis indicates that the token \(\\li\) \ msg = ('No number to process?\nthis indicates that the token \(\\li\) \
should have a number and does not\nnumerator is \ should have a number and does not\nnumerator is \
"%s"\ndenominator is "%s"\n') % (numerator, denominator) "%s"\ndenominator is "%s"\n') % (numerator, denominator)
raise self.__bug_handler, msg raise self.__bug_handler, msg
@ -724,12 +724,12 @@ class ProcessTokens:
second = match_obj.group(2) second = match_obj.group(2)
if not second: if not second:
if self.__run_level > 3: if self.__run_level > 3:
msg = _("token is '%s' \n") % token msg = "token is '%s' \n" % token
raise self.__bug_handler, msg raise self.__bug_handler, msg
return first, 0 return first, 0
else: else:
if self.__run_level > 3: if self.__run_level > 3:
msg = _("token is '%s' \n") % token msg = "token is '%s' \n" % token
raise self.__bug_handler raise self.__bug_handler
return token, 0 return token, 0
return first, second return first, second
@ -758,7 +758,7 @@ class ProcessTokens:
pre, token, action = self.dict_token.get(token, (None, None, None)) pre, token, action = self.dict_token.get(token, (None, None, None))
if action: if action:
return action(pre, token, num) return action(pre, token, num)
def __check_brackets(self, in_file): def __check_brackets(self, in_file):
self.__check_brack_obj = check_brackets.CheckBrackets\ self.__check_brack_obj = check_brackets.CheckBrackets\
(file = in_file) (file = in_file)
@ -769,53 +769,54 @@ class ProcessTokens:
def process_tokens(self): def process_tokens(self):
"""Main method for handling other methods. """ """Main method for handling other methods. """
line_count = 0 line_count = 0
with open(self.__file, 'r') as read_obj, open(self.__write_to, 'wb') as write_obj: with open(self.__file, 'r') as read_obj:
for line in read_obj: with open(self.__write_to, 'wb') as write_obj:
token = line.replace("\n","") for line in read_obj:
line_count += 1 token = line.replace("\n","")
if line_count == 1 and token != '\\{': line_count += 1
msg = _('Invalid RTF: document doesn\'t start with {\n') if line_count == 1 and token != '\\{':
msg = 'Invalid RTF: document doesn\'t start with {\n'
raise self.__exception_handler, msg
elif line_count == 2 and token[0:4] != '\\rtf':
msg = 'Invalid RTF: document doesn\'t start with \\rtf \n'
raise self.__exception_handler, msg
the_index = token.find('\\ ')
if token is not None and the_index > -1:
msg = 'Invalid RTF: token "\\ " not valid.\n'
raise self.__exception_handler, msg raise self.__exception_handler, msg
elif line_count == 2 and token[0:4] != '\\rtf': elif token[:1] == "\\":
msg =_('Invalid RTF: document doesn\'t start with \\rtf \n') try:
raise self.__exception_handler, msg token.decode('us-ascii')
except UnicodeError, msg:
the_index = token.find('\\ ') msg = 'Invalid RTF: Tokens not ascii encoded.\n%s' % str(msg)
if token is not None and the_index > -1: raise self.__exception_handler, msg
msg =_('Invalid RTF: token "\\ " not valid.\n') line = self.process_cw(token)
raise self.__exception_handler, msg if line is not None:
elif token[:1] == "\\": write_obj.write(line)
try: else:
token.decode('us-ascii') fields = re.split(self.__utf_exp, token)
except UnicodeError, msg: for field in fields:
msg = _('Invalid RTF: Tokens not ascii encoded.\n%s') % str(msg) if not field:
raise self.__exception_handler, msg continue
line = self.process_cw(token) if field[0:1] == '&':
if line is not None: write_obj.write('tx<ut<__________<%s\n' % field)
write_obj.write(line) else:
else: write_obj.write('tx<nu<__________<%s\n' % field)
fields = re.split(self.__utf_exp, token)
for field in fields:
if not field:
continue
if field[0:1] == '&':
write_obj.write('tx<ut<__________<%s\n' % field)
else:
write_obj.write('tx<nu<__________<%s\n' % field)
if not line_count: if not line_count:
msg =_('Invalid RTF: file appears to be empty.\n') msg = 'Invalid RTF: file appears to be empty.\n'
raise self.__exception_handler, msg raise self.__exception_handler, msg
copy_obj = copy.Copy(bug_handler = self.__bug_handler) copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy: if self.__copy:
copy_obj.copy_file(self.__write_to, "processed_tokens.data") copy_obj.copy_file(self.__write_to, "processed_tokens.data")
copy_obj.rename(self.__write_to, self.__file) copy_obj.rename(self.__write_to, self.__file)
os.remove(self.__write_to) os.remove(self.__write_to)
bad_brackets = self.__check_brackets(self.__file) bad_brackets = self.__check_brackets(self.__file)
if bad_brackets: if bad_brackets:
msg = _('Invalid RTF: document does not have matching brackets.\n') msg = 'Invalid RTF: document does not have matching brackets.\n'
raise self.__exception_handler, msg raise self.__exception_handler, msg
else: else:
return self.__return_code return self.__return_code

View File

@ -37,10 +37,10 @@ class ReplaceIllegals:
def replace_illegals(self): def replace_illegals(self):
""" """
""" """
with open(self.__file, 'r') as read_obj, \ with open(self.__file, 'r') as read_obj:
open(self.__write_to, 'w') as write_obj: with open(self.__write_to, 'w') as write_obj:
for line in read_obj: for line in read_obj:
write_obj.write(clean_ascii_chars(line)) write_obj.write(clean_ascii_chars(line))
copy_obj = copy.Copy() copy_obj = copy.Copy()
if self.__copy: if self.__copy:
copy_obj.copy_file(self.__write_to, "replace_illegals.data") copy_obj.copy_file(self.__write_to, "replace_illegals.data")

View File

@ -0,0 +1,6 @@
from functions import textile, textile_restricted, Textile
if False:
textile, textile_restricted, Textile
__all__ = ['textile', 'textile_restricted']

View File

@ -0,0 +1,981 @@
#!/usr/bin/env python
"""
PyTextile
A Humane Web Text Generator
"""
__version__ = '2.1.4'
__date__ = '2009/12/04'
__copyright__ = """
Copyright (c) 2009, Jason Samsa, http://jsamsa.com/
Copyright (c) 2004, Roberto A. F. De Almeida, http://dealmeida.net/
Copyright (c) 2003, Mark Pilgrim, http://diveintomark.org/
Original PHP Version:
Copyright (c) 2003-2004, Dean Allen <dean@textism.com>
All rights reserved.
Thanks to Carlo Zottmann <carlo@g-blog.net> for refactoring
Textile's procedural code into a class framework
Additions and fixes Copyright (c) 2006 Alex Shiels http://thresholdstate.com/
"""
__license__ = """
L I C E N S E
=============
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name Textile nor the names of its contributors may be used to
endorse or promote products derived from this software without specific
prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
"""
import re
import uuid
from urlparse import urlparse
def _normalize_newlines(string):
out = re.sub(r'\r\n', '\n', string)
out = re.sub(r'\n{3,}', '\n\n', out)
out = re.sub(r'\n\s*\n', '\n\n', out)
out = re.sub(r'"$', '" ', out)
return out
def getimagesize(url):
"""
Attempts to determine an image's width and height, and returns a string
suitable for use in an <img> tag, or None in case of failure.
Requires that PIL is installed.
>>> getimagesize("http://www.google.com/intl/en_ALL/images/logo.gif")
... #doctest: +ELLIPSIS, +SKIP
'width="..." height="..."'
"""
try:
import ImageFile
import urllib2
except ImportError:
return None
try:
p = ImageFile.Parser()
f = urllib2.urlopen(url)
while True:
s = f.read(1024)
if not s:
break
p.feed(s)
if p.image:
return 'width="%i" height="%i"' % p.image.size
except (IOError, ValueError):
return None
class Textile(object):
hlgn = r'(?:\<(?!>)|(?<!<)\>|\<\>|\=|[()]+(?! ))'
vlgn = r'[\-^~]'
clas = r'(?:\([^)]+\))'
lnge = r'(?:\[[^\]]+\])'
styl = r'(?:\{[^}]+\})'
cspn = r'(?:\\\d+)'
rspn = r'(?:\/\d+)'
a = r'(?:%s|%s)*' % (hlgn, vlgn)
s = r'(?:%s|%s)*' % (cspn, rspn)
c = r'(?:%s)*' % '|'.join([clas, styl, lnge, hlgn])
pnct = r'[-!"#$%&()*+,/:;<=>?@\'\[\\\]\.^_`{|}~]'
# urlch = r'[\w"$\-_.+!*\'(),";/?:@=&%#{}|\\^~\[\]`]'
urlch = '[\w"$\-_.+*\'(),";\/?:@=&%#{}|\\^~\[\]`]'
url_schemes = ('http', 'https', 'ftp', 'mailto')
btag = ('bq', 'bc', 'notextile', 'pre', 'h[1-6]', 'fn\d+', 'p')
btag_lite = ('bq', 'bc', 'p')
glyph_defaults = (
('txt_quote_single_open', '&#8216;'),
('txt_quote_single_close', '&#8217;'),
('txt_quote_double_open', '&#8220;'),
('txt_quote_double_close', '&#8221;'),
('txt_apostrophe', '&#8217;'),
('txt_prime', '&#8242;'),
('txt_prime_double', '&#8243;'),
('txt_ellipsis', '&#8230;'),
('txt_emdash', '&#8212;'),
('txt_endash', '&#8211;'),
('txt_dimension', '&#215;'),
('txt_trademark', '&#8482;'),
('txt_registered', '&#174;'),
('txt_copyright', '&#169;'),
)
def __init__(self, restricted=False, lite=False, noimage=False):
"""docstring for __init__"""
self.restricted = restricted
self.lite = lite
self.noimage = noimage
self.get_sizes = False
self.fn = {}
self.urlrefs = {}
self.shelf = {}
self.rel = ''
self.html_type = 'xhtml'
def textile(self, text, rel=None, head_offset=0, html_type='xhtml'):
"""
>>> import textile
>>> textile.textile('some textile')
u'\\t<p>some textile</p>'
"""
self.html_type = html_type
# text = unicode(text)
text = _normalize_newlines(text)
if self.restricted:
text = self.encode_html(text, quotes=False)
if rel:
self.rel = ' rel="%s"' % rel
text = self.getRefs(text)
text = self.block(text, int(head_offset))
text = self.retrieve(text)
return text
def pba(self, input, element=None):
"""
Parse block attributes.
>>> t = Textile()
>>> t.pba(r'\3')
''
>>> t.pba(r'\\3', element='td')
' colspan="3"'
>>> t.pba(r'/4', element='td')
' rowspan="4"'
>>> t.pba(r'\\3/4', element='td')
' colspan="3" rowspan="4"'
>>> t.vAlign('^')
'top'
>>> t.pba('^', element='td')
' style="vertical-align:top;"'
>>> t.pba('{line-height:18px}')
' style="line-height:18px;"'
>>> t.pba('(foo-bar)')
' class="foo-bar"'
>>> t.pba('(#myid)')
' id="myid"'
>>> t.pba('(foo-bar#myid)')
' class="foo-bar" id="myid"'
>>> t.pba('((((')
' style="padding-left:4em;"'
>>> t.pba(')))')
' style="padding-right:3em;"'
>>> t.pba('[fr]')
' lang="fr"'
"""
style = []
aclass = ''
lang = ''
colspan = ''
rowspan = ''
id = ''
if not input:
return ''
matched = input
if element == 'td':
m = re.search(r'\\(\d+)', matched)
if m:
colspan = m.group(1)
m = re.search(r'/(\d+)', matched)
if m:
rowspan = m.group(1)
if element == 'td' or element == 'tr':
m = re.search(r'(%s)' % self.vlgn, matched)
if m:
style.append("vertical-align:%s;" % self.vAlign(m.group(1)))
m = re.search(r'\{([^}]*)\}', matched)
if m:
style.append(m.group(1).rstrip(';') + ';')
matched = matched.replace(m.group(0), '')
m = re.search(r'\[([^\]]+)\]', matched, re.U)
if m:
lang = m.group(1)
matched = matched.replace(m.group(0), '')
m = re.search(r'\(([^()]+)\)', matched, re.U)
if m:
aclass = m.group(1)
matched = matched.replace(m.group(0), '')
m = re.search(r'([(]+)', matched)
if m:
style.append("padding-left:%sem;" % len(m.group(1)))
matched = matched.replace(m.group(0), '')
m = re.search(r'([)]+)', matched)
if m:
style.append("padding-right:%sem;" % len(m.group(1)))
matched = matched.replace(m.group(0), '')
m = re.search(r'(%s)' % self.hlgn, matched)
if m:
style.append("text-align:%s;" % self.hAlign(m.group(1)))
m = re.search(r'^(.*)#(.*)$', aclass)
if m:
id = m.group(2)
aclass = m.group(1)
if self.restricted:
if lang:
return ' lang="%s"'
else:
return ''
result = []
if style:
result.append(' style="%s"' % "".join(style))
if aclass:
result.append(' class="%s"' % aclass)
if lang:
result.append(' lang="%s"' % lang)
if id:
result.append(' id="%s"' % id)
if colspan:
result.append(' colspan="%s"' % colspan)
if rowspan:
result.append(' rowspan="%s"' % rowspan)
return ''.join(result)
def hasRawText(self, text):
"""
checks whether the text has text not already enclosed by a block tag
>>> t = Textile()
>>> t.hasRawText('<p>foo bar biz baz</p>')
False
>>> t.hasRawText(' why yes, yes it does')
True
"""
r = re.compile(r'<(p|blockquote|div|form|table|ul|ol|pre|h\d)[^>]*?>.*</\1>', re.S).sub('', text.strip()).strip()
r = re.compile(r'<(hr|br)[^>]*?/>').sub('', r)
return '' != r
def table(self, text):
r"""
>>> t = Textile()
>>> t.table('|one|two|three|\n|a|b|c|')
'\t<table>\n\t\t<tr>\n\t\t\t<td>one</td>\n\t\t\t<td>two</td>\n\t\t\t<td>three</td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td>a</td>\n\t\t\t<td>b</td>\n\t\t\t<td>c</td>\n\t\t</tr>\n\t</table>\n\n'
"""
text = text + "\n\n"
pattern = re.compile(r'^(?:table(_?%(s)s%(a)s%(c)s)\. ?\n)?^(%(a)s%(c)s\.? ?\|.*\|)\n\n' % {'s':self.s, 'a':self.a, 'c':self.c}, re.S|re.M|re.U)
return pattern.sub(self.fTable, text)
def fTable(self, match):
tatts = self.pba(match.group(1), 'table')
rows = []
for row in [ x for x in match.group(2).split('\n') if x]:
rmtch = re.search(r'^(%s%s\. )(.*)' % (self.a, self.c), row.lstrip())
if rmtch:
ratts = self.pba(rmtch.group(1), 'tr')
row = rmtch.group(2)
else:
ratts = ''
cells = []
for cell in row.split('|')[1:-1]:
ctyp = 'd'
if re.search(r'^_', cell):
ctyp = "h"
cmtch = re.search(r'^(_?%s%s%s\. )(.*)' % (self.s, self.a, self.c), cell)
if cmtch:
catts = self.pba(cmtch.group(1), 'td')
cell = cmtch.group(2)
else:
catts = ''
cell = self.graf(self.span(cell))
cells.append('\t\t\t<t%s%s>%s</t%s>' % (ctyp, catts, cell, ctyp))
rows.append("\t\t<tr%s>\n%s\n\t\t</tr>" % (ratts, '\n'.join(cells)))
cells = []
catts = None
return "\t<table%s>\n%s\n\t</table>\n\n" % (tatts, '\n'.join(rows))
def lists(self, text):
"""
>>> t = Textile()
>>> t.lists("* one\\n* two\\n* three")
'\\t<ul>\\n\\t\\t<li>one</li>\\n\\t\\t<li>two</li>\\n\\t\\t<li>three</li>\\n\\t</ul>'
"""
pattern = re.compile(r'^([#*]+%s .*)$(?![^#*])' % self.c, re.U|re.M|re.S)
return pattern.sub(self.fList, text)
def fList(self, match):
text = match.group(0).split("\n")
result = []
lists = []
for i, line in enumerate(text):
try:
nextline = text[i+1]
except IndexError:
nextline = ''
m = re.search(r"^([#*]+)(%s%s) (.*)$" % (self.a, self.c), line, re.S)
if m:
tl, atts, content = m.groups()
nl = ''
nm = re.search(r'^([#*]+)\s.*', nextline)
if nm:
nl = nm.group(1)
if tl not in lists:
lists.append(tl)
atts = self.pba(atts)
line = "\t<%sl%s>\n\t\t<li>%s" % (self.lT(tl), atts, self.graf(content))
else:
line = "\t\t<li>" + self.graf(content)
if len(nl) <= len(tl):
line = line + "</li>"
for k in reversed(lists):
if len(k) > len(nl):
line = line + "\n\t</%sl>" % self.lT(k)
if len(k) > 1:
line = line + "</li>"
lists.remove(k)
result.append(line)
return "\n".join(result)
def lT(self, input):
if re.search(r'^#+', input):
return 'o'
else:
return 'u'
def doPBr(self, in_):
return re.compile(r'<(p)([^>]*?)>(.*)(</\1>)', re.S).sub(self.doBr, in_)
def doBr(self, match):
if self.html_type == 'html':
content = re.sub(r'(.+)(?:(?<!<br>)|(?<!<br />))\n(?![#*\s|])', '\\1<br>', match.group(3))
else:
content = re.sub(r'(.+)(?:(?<!<br>)|(?<!<br />))\n(?![#*\s|])', '\\1<br />', match.group(3))
return '<%s%s>%s%s' % (match.group(1), match.group(2), content, match.group(4))
def block(self, text, head_offset = 0):
"""
>>> t = Textile()
>>> t.block('h1. foobar baby')
'\\t<h1>foobar baby</h1>'
"""
if not self.lite:
tre = '|'.join(self.btag)
else:
tre = '|'.join(self.btag_lite)
text = text.split('\n\n')
tag = 'p'
atts = cite = graf = ext = c1 = ''
out = []
anon = False
for line in text:
pattern = r'^(%s)(%s%s)\.(\.?)(?::(\S+))? (.*)$' % (tre, self.a, self.c)
match = re.search(pattern, line, re.S)
if match:
if ext:
out.append(out.pop() + c1)
tag, atts, ext, cite, graf = match.groups()
h_match = re.search(r'h([1-6])', tag)
if h_match:
head_level, = h_match.groups()
tag = 'h%i' % max(1,
min(int(head_level) + head_offset,
6))
o1, o2, content, c2, c1 = self.fBlock(tag, atts, ext,
cite, graf)
# leave off c1 if this block is extended,
# we'll close it at the start of the next block
if ext:
line = "%s%s%s%s" % (o1, o2, content, c2)
else:
line = "%s%s%s%s%s" % (o1, o2, content, c2, c1)
else:
anon = True
if ext or not re.search(r'^\s', line):
o1, o2, content, c2, c1 = self.fBlock(tag, atts, ext,
cite, line)
# skip $o1/$c1 because this is part of a continuing
# extended block
if tag == 'p' and not self.hasRawText(content):
line = content
else:
line = "%s%s%s" % (o2, content, c2)
else:
line = self.graf(line)
line = self.doPBr(line)
if self.html_type == 'xhtml':
line = re.sub(r'<br>', '<br />', line)
if ext and anon:
out.append(out.pop() + "\n" + line)
else:
out.append(line)
if not ext:
tag = 'p'
atts = ''
cite = ''
graf = ''
if ext:
out.append(out.pop() + c1)
return '\n\n'.join(out)
def fBlock(self, tag, atts, ext, cite, content):
"""
>>> t = Textile()
>>> t.fBlock("bq", "", None, "", "Hello BlockQuote")
('\\t<blockquote>\\n', '\\t\\t<p>', 'Hello BlockQuote', '</p>', '\\n\\t</blockquote>')
>>> t.fBlock("bq", "", None, "http://google.com", "Hello BlockQuote")
('\\t<blockquote cite="http://google.com">\\n', '\\t\\t<p>', 'Hello BlockQuote', '</p>', '\\n\\t</blockquote>')
>>> t.fBlock("bc", "", None, "", 'printf "Hello, World";') # doctest: +ELLIPSIS
('<pre>', '<code>', ..., '</code>', '</pre>')
>>> t.fBlock("h1", "", None, "", "foobar")
('', '\\t<h1>', 'foobar', '</h1>', '')
"""
atts = self.pba(atts)
o1 = o2 = c2 = c1 = ''
m = re.search(r'fn(\d+)', tag)
if m:
tag = 'p'
if m.group(1) in self.fn:
fnid = self.fn[m.group(1)]
else:
fnid = m.group(1)
atts = atts + ' id="fn%s"' % fnid
if atts.find('class=') < 0:
atts = atts + ' class="footnote"'
content = ('<sup>%s</sup>' % m.group(1)) + content
if tag == 'bq':
cite = self.checkRefs(cite)
if cite:
cite = ' cite="%s"' % cite
else:
cite = ''
o1 = "\t<blockquote%s%s>\n" % (cite, atts)
o2 = "\t\t<p%s>" % atts
c2 = "</p>"
c1 = "\n\t</blockquote>"
elif tag == 'bc':
o1 = "<pre%s>" % atts
o2 = "<code%s>" % atts
c2 = "</code>"
c1 = "</pre>"
content = self.shelve(self.encode_html(content.rstrip("\n") + "\n"))
elif tag == 'notextile':
content = self.shelve(content)
o1 = o2 = ''
c1 = c2 = ''
elif tag == 'pre':
content = self.shelve(self.encode_html(content.rstrip("\n") + "\n"))
o1 = "<pre%s>" % atts
o2 = c2 = ''
c1 = '</pre>'
else:
o2 = "\t<%s%s>" % (tag, atts)
c2 = "</%s>" % tag
content = self.graf(content)
return o1, o2, content, c2, c1
def footnoteRef(self, text):
"""
>>> t = Textile()
>>> t.footnoteRef('foo[1] ') # doctest: +ELLIPSIS
'foo<sup class="footnote"><a href="#fn...">1</a></sup> '
"""
return re.sub(r'\b\[([0-9]+)\](\s)?', self.footnoteID, text)
def footnoteID(self, match):
id, t = match.groups()
if id not in self.fn:
self.fn[id] = str(uuid.uuid4())
fnid = self.fn[id]
if not t:
t = ''
return '<sup class="footnote"><a href="#fn%s">%s</a></sup>%s' % (fnid, id, t)
def glyphs(self, text):
"""
>>> t = Textile()
>>> t.glyphs("apostrophe's")
'apostrophe&#8217;s'
>>> t.glyphs("back in '88")
'back in &#8217;88'
>>> t.glyphs('foo ...')
'foo &#8230;'
>>> t.glyphs('--')
'&#8212;'
>>> t.glyphs('FooBar[tm]')
'FooBar&#8482;'
>>> t.glyphs("<p><cite>Cat's Cradle</cite> by Vonnegut</p>")
'<p><cite>Cat&#8217;s Cradle</cite> by Vonnegut</p>'
"""
# fix: hackish
text = re.sub(r'"\Z', '\" ', text)
glyph_search = (
re.compile(r"(\w)\'(\w)"), # apostrophe's
re.compile(r'(\s)\'(\d+\w?)\b(?!\')'), # back in '88
re.compile(r'(\S)\'(?=\s|'+self.pnct+'|<|$)'), # single closing
re.compile(r'\'/'), # single opening
re.compile(r'(\S)\"(?=\s|'+self.pnct+'|<|$)'), # double closing
re.compile(r'"'), # double opening
re.compile(r'\b([A-Z][A-Z0-9]{2,})\b(?:[(]([^)]*)[)])'), # 3+ uppercase acronym
re.compile(r'\b([A-Z][A-Z\'\-]+[A-Z])(?=[\s.,\)>])'), # 3+ uppercase
re.compile(r'\b(\s{0,1})?\.{3}'), # ellipsis
re.compile(r'(\s?)--(\s?)'), # em dash
re.compile(r'\s-(?:\s|$)'), # en dash
re.compile(r'(\d+)( ?)x( ?)(?=\d+)'), # dimension sign
re.compile(r'\b ?[([]TM[])]', re.I), # trademark
re.compile(r'\b ?[([]R[])]', re.I), # registered
re.compile(r'\b ?[([]C[])]', re.I), # copyright
)
glyph_replace = [x % dict(self.glyph_defaults) for x in (
r'\1%(txt_apostrophe)s\2', # apostrophe's
r'\1%(txt_apostrophe)s\2', # back in '88
r'\1%(txt_quote_single_close)s', # single closing
r'%(txt_quote_single_open)s', # single opening
r'\1%(txt_quote_double_close)s', # double closing
r'%(txt_quote_double_open)s', # double opening
r'<acronym title="\2">\1</acronym>', # 3+ uppercase acronym
r'<span class="caps">\1</span>', # 3+ uppercase
r'\1%(txt_ellipsis)s', # ellipsis
r'\1%(txt_emdash)s\2', # em dash
r' %(txt_endash)s ', # en dash
r'\1\2%(txt_dimension)s\3', # dimension sign
r'%(txt_trademark)s', # trademark
r'%(txt_registered)s', # registered
r'%(txt_copyright)s', # copyright
)]
result = []
for line in re.compile(r'(<.*?>)', re.U).split(text):
if not re.search(r'<.*>', line):
for s, r in zip(glyph_search, glyph_replace):
line = s.sub(r, line)
result.append(line)
return ''.join(result)
def vAlign(self, input):
d = {'^':'top', '-':'middle', '~':'bottom'}
return d.get(input, '')
def hAlign(self, input):
d = {'<':'left', '=':'center', '>':'right', '<>': 'justify'}
return d.get(input, '')
def getRefs(self, text):
"""
what is this for?
"""
pattern = re.compile(r'(?:(?<=^)|(?<=\s))\[(.+)\]((?:http(?:s?):\/\/|\/)\S+)(?=\s|$)', re.U)
text = pattern.sub(self.refs, text)
return text
def refs(self, match):
flag, url = match.groups()
self.urlrefs[flag] = url
return ''
def checkRefs(self, url):
return self.urlrefs.get(url, url)
def isRelURL(self, url):
"""
Identify relative urls.
>>> t = Textile()
>>> t.isRelURL("http://www.google.com/")
False
>>> t.isRelURL("/foo")
True
"""
(scheme, netloc) = urlparse(url)[0:2]
return not scheme and not netloc
def relURL(self, url):
scheme = urlparse(url)[0]
if self.restricted and scheme and scheme not in self.url_schemes:
return '#'
return url
def shelve(self, text):
id = str(uuid.uuid4())
self.shelf[id] = text
return id
def retrieve(self, text):
"""
>>> t = Textile()
>>> id = t.shelve("foobar")
>>> t.retrieve(id)
'foobar'
"""
while True:
old = text
for k, v in self.shelf.items():
text = text.replace(k, v)
if text == old:
break
return text
def encode_html(self, text, quotes=True):
a = (
('&', '&#38;'),
('<', '&#60;'),
('>', '&#62;')
)
if quotes:
a = a + (
("'", '&#39;'),
('"', '&#34;')
)
for k, v in a:
text = text.replace(k, v)
return text
def graf(self, text):
if not self.lite:
text = self.noTextile(text)
text = self.code(text)
text = self.links(text)
if not self.noimage:
text = self.image(text)
if not self.lite:
text = self.lists(text)
text = self.table(text)
text = self.span(text)
text = self.footnoteRef(text)
text = self.glyphs(text)
return text.rstrip('\n')
def links(self, text):
"""
>>> t = Textile()
>>> t.links('fooobar "Google":http://google.com/foobar/ and hello world "flickr":http://flickr.com/photos/jsamsa/ ') # doctest: +ELLIPSIS
'fooobar ... and hello world ...'
"""
punct = '!"#$%&\'*+,-./:;=?@\\^_`|~'
pattern = r'''
(?P<pre> [\s\[{(]|[%s] )?
" # start
(?P<atts> %s )
(?P<text> [^"]+? )
\s?
(?: \(([^)]+?)\)(?=") )? # $title
":
(?P<url> (?:ftp|https?)? (?: :// )? [-A-Za-z0-9+&@#/?=~_()|!:,.;]*[-A-Za-z0-9+&@#/=~_()|] )
(?P<post> [^\w\/;]*? )
(?=<|\s|$)
''' % (re.escape(punct), self.c)
text = re.compile(pattern, re.X).sub(self.fLink, text)
return text
def fLink(self, match):
pre, atts, text, title, url, post = match.groups()
if pre == None:
pre = ''
# assume ) at the end of the url is not actually part of the url
# unless the url also contains a (
if url.endswith(')') and not url.find('(') > -1:
post = url[-1] + post
url = url[:-1]
url = self.checkRefs(url)
atts = self.pba(atts)
if title:
atts = atts + ' title="%s"' % self.encode_html(title)
if not self.noimage:
text = self.image(text)
text = self.span(text)
text = self.glyphs(text)
url = self.relURL(url)
out = '<a href="%s"%s%s>%s</a>' % (self.encode_html(url), atts, self.rel, text)
out = self.shelve(out)
return ''.join([pre, out, post])
def span(self, text):
"""
>>> t = Textile()
>>> t.span(r"hello %(bob)span *strong* and **bold**% goodbye")
'hello <span class="bob">span <strong>strong</strong> and <b>bold</b></span> goodbye'
"""
qtags = (r'\*\*', r'\*', r'\?\?', r'\-', r'__', r'_', r'%', r'\+', r'~', r'\^')
pnct = ".,\"'?!;:"
for qtag in qtags:
pattern = re.compile(r"""
(?:^|(?<=[\s>%(pnct)s])|([\]}]))
(%(qtag)s)(?!%(qtag)s)
(%(c)s)
(?::(\S+))?
([^\s%(qtag)s]+|\S[^%(qtag)s\n]*[^\s%(qtag)s\n])
([%(pnct)s]*)
%(qtag)s
(?:$|([\]}])|(?=%(selfpnct)s{1,2}|\s))
""" % {'qtag':qtag, 'c':self.c, 'pnct':pnct,
'selfpnct':self.pnct}, re.X)
text = pattern.sub(self.fSpan, text)
return text
def fSpan(self, match):
_, tag, atts, cite, content, end, _ = match.groups()
qtags = {
'*': 'strong',
'**': 'b',
'??': 'cite',
'_' : 'em',
'__': 'i',
'-' : 'del',
'%' : 'span',
'+' : 'ins',
'~' : 'sub',
'^' : 'sup'
}
tag = qtags[tag]
atts = self.pba(atts)
if cite:
atts = atts + 'cite="%s"' % cite
content = self.span(content)
out = "<%s%s>%s%s</%s>" % (tag, atts, content, end, tag)
return out
def image(self, text):
"""
>>> t = Textile()
>>> t.image('!/imgs/myphoto.jpg!:http://jsamsa.com')
'<a href="http://jsamsa.com"><img src="/imgs/myphoto.jpg" alt="" /></a>'
"""
pattern = re.compile(r"""
(?:[\[{])? # pre
\! # opening !
(%s) # optional style,class atts
(?:\. )? # optional dot-space
([^\s(!]+) # presume this is the src
\s? # optional space
(?:\(([^\)]+)\))? # optional title
\! # closing
(?::(\S+))? # optional href
(?:[\]}]|(?=\s|$)) # lookahead: space or end of string
""" % self.c, re.U|re.X)
return pattern.sub(self.fImage, text)
def fImage(self, match):
# (None, '', '/imgs/myphoto.jpg', None, None)
atts, url, title, href = match.groups()
atts = self.pba(atts)
if title:
atts = atts + ' title="%s" alt="%s"' % (title, title)
else:
atts = atts + ' alt=""'
if not self.isRelURL(url) and self.get_sizes:
size = getimagesize(url)
if (size):
atts += " %s" % size
if href:
href = self.checkRefs(href)
url = self.checkRefs(url)
url = self.relURL(url)
out = []
if href:
out.append('<a href="%s" class="img">' % href)
if self.html_type == 'html':
out.append('<img src="%s"%s>' % (url, atts))
else:
out.append('<img src="%s"%s />' % (url, atts))
if href:
out.append('</a>')
return ''.join(out)
def code(self, text):
text = self.doSpecial(text, '<code>', '</code>', self.fCode)
text = self.doSpecial(text, '@', '@', self.fCode)
text = self.doSpecial(text, '<pre>', '</pre>', self.fPre)
return text
def fCode(self, match):
before, text, after = match.groups()
if after == None:
after = ''
# text needs to be escaped
if not self.restricted:
text = self.encode_html(text)
return ''.join([before, self.shelve('<code>%s</code>' % text), after])
def fPre(self, match):
before, text, after = match.groups()
if after == None:
after = ''
# text needs to be escapedd
if not self.restricted:
text = self.encode_html(text)
return ''.join([before, '<pre>', self.shelve(text), '</pre>', after])
def doSpecial(self, text, start, end, method=None):
if method == None:
method = self.fSpecial
pattern = re.compile(r'(^|\s|[\[({>])%s(.*?)%s(\s|$|[\])}])?' % (re.escape(start), re.escape(end)), re.M|re.S)
return pattern.sub(method, text)
def fSpecial(self, match):
"""
special blocks like notextile or code
"""
before, text, after = match.groups()
if after == None:
after = ''
return ''.join([before, self.shelve(self.encode_html(text)), after])
def noTextile(self, text):
text = self.doSpecial(text, '<notextile>', '</notextile>', self.fTextile)
return self.doSpecial(text, '==', '==', self.fTextile)
def fTextile(self, match):
before, notextile, after = match.groups()
if after == None:
after = ''
return ''.join([before, self.shelve(notextile), after])
def textile(text, head_offset=0, html_type='xhtml', encoding=None, output=None):
"""
this function takes additional parameters:
head_offset - offset to apply to heading levels (default: 0)
html_type - 'xhtml' or 'html' style tags (default: 'xhtml')
"""
return Textile().textile(text, head_offset=head_offset,
html_type=html_type)
def textile_restricted(text, lite=True, noimage=True, html_type='xhtml'):
"""
Restricted version of Textile designed for weblog comments and other
untrusted input.
Raw HTML is escaped.
Style attributes are disabled.
rel='nofollow' is added to external links.
When lite=True is set (the default):
Block tags are restricted to p, bq, and bc.
Lists and tables are disabled.
When noimage=True is set (the default):
Image tags are disabled.
"""
return Textile(restricted=True, lite=lite,
noimage=noimage).textile(text, rel='nofollow',
html_type=html_type)

View File

@ -12,7 +12,7 @@ from calibre.ebooks.chardet import detect
from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \ from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \
separate_paragraphs_single_line, separate_paragraphs_print_formatted, \ separate_paragraphs_single_line, separate_paragraphs_print_formatted, \
preserve_spaces, detect_paragraph_type, detect_formatting_type, \ preserve_spaces, detect_paragraph_type, detect_formatting_type, \
convert_heuristic, normalize_line_endings convert_heuristic, normalize_line_endings, convert_textile
from calibre import _ent_pat, xml_entity_to_unicode from calibre import _ent_pat, xml_entity_to_unicode
class TXTInput(InputFormatPlugin): class TXTInput(InputFormatPlugin):
@ -41,6 +41,7 @@ class TXTInput(InputFormatPlugin):
'paragraph and no styling is applied.\n' 'paragraph and no styling is applied.\n'
'* heuristic: Process using heuristics to determine formatting such ' '* heuristic: Process using heuristics to determine formatting such '
'as chapter headings and italic text.\n' 'as chapter headings and italic text.\n'
'* textile: Processing using textile formatting.\n'
'* markdown: Processing using markdown formatting. ' '* markdown: Processing using markdown formatting. '
'To learn more about markdown see')+' http://daringfireball.net/projects/markdown/'), 'To learn more about markdown see')+' http://daringfireball.net/projects/markdown/'),
OptionRecommendation(name='preserve_spaces', recommended_value=False, OptionRecommendation(name='preserve_spaces', recommended_value=False,
@ -91,6 +92,9 @@ class TXTInput(InputFormatPlugin):
except RuntimeError: except RuntimeError:
raise ValueError('This txt file has malformed markup, it cannot be' raise ValueError('This txt file has malformed markup, it cannot be'
' converted by calibre. See http://daringfireball.net/projects/markdown/syntax') ' converted by calibre. See http://daringfireball.net/projects/markdown/syntax')
elif options.formatting_type == 'textile':
log.debug('Running text though textile conversion...')
html = convert_textile(txt)
else: else:
# Determine the paragraph type of the document. # Determine the paragraph type of the document.
if options.paragraph_type == 'auto': if options.paragraph_type == 'auto':

View File

@ -1,4 +1,8 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
''' '''
Read content from txt file. Read content from txt file.
@ -7,15 +11,11 @@ Read content from txt file.
import os, re import os, re
from calibre import prepare_string_for_xml, isbytestring from calibre import prepare_string_for_xml, isbytestring
from calibre.ebooks.markdown import markdown
from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.utils.cleantext import clean_ascii_chars from calibre.utils.cleantext import clean_ascii_chars
from calibre.ebooks.txt.heuristicprocessor import TXTHeuristicProcessor from calibre.ebooks.txt.heuristicprocessor import TXTHeuristicProcessor
from calibre.ebooks.conversion.preprocess import DocAnalysis from calibre.ebooks.conversion.preprocess import DocAnalysis
from calibre.utils.cleantext import clean_ascii_chars
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
HTML_TEMPLATE = u'<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s</title></head><body>\n%s\n</body></html>' HTML_TEMPLATE = u'<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s</title></head><body>\n%s\n</body></html>'
@ -34,9 +34,9 @@ def clean_txt(txt):
txt = re.sub('(?<=.)\s+$', '', txt) txt = re.sub('(?<=.)\s+$', '', txt)
# Remove excessive line breaks. # Remove excessive line breaks.
txt = re.sub('\n{3,}', '\n\n', txt) txt = re.sub('\n{3,}', '\n\n', txt)
#remove ASCII invalid chars #remove ASCII invalid chars : 0 to 8 and 11-14 to 24
txt = clean_ascii_chars(txt) txt = clean_ascii_chars(txt)
return txt return txt
def split_txt(txt, epub_split_size_kb=0): def split_txt(txt, epub_split_size_kb=0):
@ -73,12 +73,18 @@ def convert_heuristic(txt, title='', epub_split_size_kb=0):
return tp.convert(txt, title, epub_split_size_kb) return tp.convert(txt, title, epub_split_size_kb)
def convert_markdown(txt, title='', disable_toc=False): def convert_markdown(txt, title='', disable_toc=False):
from calibre.ebooks.markdown import markdown
md = markdown.Markdown( md = markdown.Markdown(
extensions=['footnotes', 'tables', 'toc'], extensions=['footnotes', 'tables', 'toc'],
extension_configs={"toc": {"disable_toc": disable_toc}}, extension_configs={"toc": {"disable_toc": disable_toc}},
safe_mode=False) safe_mode=False)
return HTML_TEMPLATE % (title, md.convert(txt)) return HTML_TEMPLATE % (title, md.convert(txt))
def convert_textile(txt, title=''):
from calibre.ebooks.textile import textile
html = textile(txt, encoding='utf-8')
return HTML_TEMPLATE % (title, html)
def normalize_line_endings(txt): def normalize_line_endings(txt):
txt = txt.replace('\r\n', '\n') txt = txt.replace('\r\n', '\n')
txt = txt.replace('\r', '\n') txt = txt.replace('\r', '\n')
@ -114,66 +120,75 @@ def split_string_separator(txt, size) :
def detect_paragraph_type(txt): def detect_paragraph_type(txt):
''' '''
Tries to determine the formatting of the document. Tries to determine the formatting of the document.
block: Paragraphs are separated by a blank line. block: Paragraphs are separated by a blank line.
single: Each line is a paragraph. single: Each line is a paragraph.
print: Each paragraph starts with a 2+ spaces or a tab print: Each paragraph starts with a 2+ spaces or a tab
and ends when a new paragraph is reached. and ends when a new paragraph is reached.
unformatted: most lines have hard line breaks, few/no blank lines or indents unformatted: most lines have hard line breaks, few/no blank lines or indents
returns block, single, print, unformatted returns block, single, print, unformatted
''' '''
txt = txt.replace('\r\n', '\n') txt = txt.replace('\r\n', '\n')
txt = txt.replace('\r', '\n') txt = txt.replace('\r', '\n')
txt_line_count = len(re.findall('(?mu)^\s*.+$', txt)) txt_line_count = len(re.findall('(?mu)^\s*.+$', txt))
# Check for hard line breaks - true if 55% of the doc breaks in the same region # Check for hard line breaks - true if 55% of the doc breaks in the same region
docanalysis = DocAnalysis('txt', txt) docanalysis = DocAnalysis('txt', txt)
hardbreaks = docanalysis.line_histogram(.55) hardbreaks = docanalysis.line_histogram(.55)
if hardbreaks: if hardbreaks:
# Determine print percentage # Determine print percentage
tab_line_count = len(re.findall('(?mu)^(\t|\s{2,}).+$', txt)) tab_line_count = len(re.findall('(?mu)^(\t|\s{2,}).+$', txt))
print_percent = tab_line_count / float(txt_line_count) print_percent = tab_line_count / float(txt_line_count)
# Determine block percentage # Determine block percentage
empty_line_count = len(re.findall('(?mu)^\s*$', txt)) empty_line_count = len(re.findall('(?mu)^\s*$', txt))
block_percent = empty_line_count / float(txt_line_count) block_percent = empty_line_count / float(txt_line_count)
# Compare the two types - the type with the larger number of instances wins # Compare the two types - the type with the larger number of instances wins
# in cases where only one or the other represents the vast majority of the document neither wins # in cases where only one or the other represents the vast majority of the document neither wins
if print_percent >= block_percent: if print_percent >= block_percent:
if .15 <= print_percent <= .75: if .15 <= print_percent <= .75:
return 'print' return 'print'
elif .15 <= block_percent <= .75: elif .15 <= block_percent <= .75:
return 'block' return 'block'
# Assume unformatted text with hardbreaks if nothing else matches # Assume unformatted text with hardbreaks if nothing else matches
return 'unformatted' return 'unformatted'
# return single if hardbreaks is false # return single if hardbreaks is false
return 'single' return 'single'
def detect_formatting_type(txt): def detect_formatting_type(txt):
markdown_count = 0
textile_count = 0
# Check for markdown # Check for markdown
# Headings # Headings
if len(re.findall('(?mu)^#+', txt)) >= 5: markdown_count += len(re.findall('(?mu)^#+', txt))
return 'markdown' markdown_count += len(re.findall('(?mu)^=+$', txt))
if len(re.findall('(?mu)^=+$', txt)) >= 5: markdown_count += len(re.findall('(?mu)^-+$', txt))
return 'markdown'
if len(re.findall('(?mu)^-+$', txt)) >= 5:
return 'markdown'
# Images # Images
if len(re.findall('(?u)!\[.*?\]\(.+?\)', txt)) >= 5: markdown_count += len(re.findall('(?u)!\[.*?\]\(.+?\)', txt))
return 'markdown'
# Links # Links
if len(re.findall('(?u)(^|(?P<pre>[^!]))\[.*?\]\([^)]+\)', txt)) >= 5: markdown_count += len(re.findall('(?u)(^|(?P<pre>[^!]))\[.*?\]\([^)]+\)', txt))
return 'markdown'
# Escaped characters # Check for textile
md_escapted_characters = ['\\', '`', '*', '_', '{', '}', '[', ']', '(', ')', '#', '+', '-', '.', '!'] # Headings
for c in md_escapted_characters: textile_count += len(re.findall(r'(?mu)^h[1-6]\.', txt))
if txt.count('\\'+c) > 10: # Block quote.
textile_count += len(re.findall(r'(?mu)^bq\.', txt))
# Images
textile_count += len(re.findall(r'\![^\s]+(:[^\s]+)*', txt))
# Links
textile_count += len(re.findall(r'"(\(.+?\))*[^\(]+?(\(.+?\))*":[^\s]+', txt))
if markdown_count > 5 or textile_count > 5:
if markdown_count > textile_count:
return 'markdown' return 'markdown'
else:
return 'textile'
return 'heuristic' return 'heuristic'

View File

@ -28,17 +28,17 @@ class PluginWidget(QWidget, Ui_Form):
def __init__(self, parent=None): def __init__(self, parent=None):
QWidget.__init__(self, parent) QWidget.__init__(self, parent)
self.setupUi(self) self.setupUi(self)
def initialize(self, name, db): #not working properly to update
from calibre.library.catalog import FIELDS from calibre.library.catalog import FIELDS
self.all_fields = [x for x in FIELDS if x != 'all'] self.all_fields = [x for x in FIELDS if x != 'all']
#add custom columns #add custom columns
db = db_()
self.all_fields.extend([x for x in sorted(db.custom_field_keys())]) self.all_fields.extend([x for x in sorted(db.custom_field_keys())])
#populate #populate
for x in self.all_fields: for x in self.all_fields:
QListWidgetItem(x, self.db_fields) QListWidgetItem(x, self.db_fields)
def initialize(self, name, db): #not working properly to update
self.name = name self.name = name
fields = gprefs.get(name+'_db_fields', self.all_fields) fields = gprefs.get(name+'_db_fields', self.all_fields)
# Restore the activated db_fields from last use # Restore the activated db_fields from last use

View File

@ -0,0 +1,21 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
__license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from PyQt4.Qt import QDialog
from calibre.gui2.dialogs.drm_error_ui import Ui_Dialog
class DRMErrorMessage(QDialog, Ui_Dialog):
def __init__(self, parent=None, title=None):
QDialog.__init__(self, parent)
self.setupUi(self)
if title is not None:
t = unicode(self.msg.text())
self.msg.setText('<h2>%s</h2>%s'%(title, t))
self.resize(self.sizeHint())

View File

@ -0,0 +1,102 @@
<?xml version="1.0" encoding="UTF-8"?>
<ui version="4.0">
<class>Dialog</class>
<widget class="QDialog" name="Dialog">
<property name="geometry">
<rect>
<x>0</x>
<y>0</y>
<width>417</width>
<height>235</height>
</rect>
</property>
<property name="windowTitle">
<string>This book is DRMed</string>
</property>
<layout class="QGridLayout" name="gridLayout">
<item row="0" column="0">
<widget class="QLabel" name="label">
<property name="sizePolicy">
<sizepolicy hsizetype="Preferred" vsizetype="Preferred">
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="maximumSize">
<size>
<width>132</width>
<height>16777215</height>
</size>
</property>
<property name="text">
<string/>
</property>
<property name="pixmap">
<pixmap resource="../../../../resources/images.qrc">:/images/document-encrypt.png</pixmap>
</property>
</widget>
</item>
<item row="0" column="1">
<widget class="QLabel" name="msg">
<property name="text">
<string>&lt;p&gt;This book is locked by &lt;b&gt;DRM&lt;/b&gt;. To learn more about DRM and why you cannot read or convert this book in calibre,
&lt;a href=&quot;http://bugs.calibre-ebook.com/wiki/DRM&quot;&gt;click here&lt;/a&gt;.</string>
</property>
<property name="wordWrap">
<bool>true</bool>
</property>
<property name="openExternalLinks">
<bool>true</bool>
</property>
</widget>
</item>
<item row="1" column="0" colspan="2">
<widget class="QDialogButtonBox" name="buttonBox">
<property name="orientation">
<enum>Qt::Horizontal</enum>
</property>
<property name="standardButtons">
<set>QDialogButtonBox::Close</set>
</property>
</widget>
</item>
</layout>
</widget>
<resources>
<include location="../../../../resources/images.qrc"/>
</resources>
<connections>
<connection>
<sender>buttonBox</sender>
<signal>accepted()</signal>
<receiver>Dialog</receiver>
<slot>accept()</slot>
<hints>
<hint type="sourcelabel">
<x>248</x>
<y>254</y>
</hint>
<hint type="destinationlabel">
<x>157</x>
<y>274</y>
</hint>
</hints>
</connection>
<connection>
<sender>buttonBox</sender>
<signal>rejected()</signal>
<receiver>Dialog</receiver>
<slot>reject()</slot>
<hints>
<hint type="sourcelabel">
<x>316</x>
<y>260</y>
</hint>
<hint type="destinationlabel">
<x>286</x>
<y>274</y>
</hint>
</hints>
</connection>
</connections>
</ui>

View File

@ -15,7 +15,7 @@ from calibre.ebooks.metadata import string_to_authors, authors_to_string
from calibre.ebooks.metadata.book.base import composite_formatter from calibre.ebooks.metadata.book.base import composite_formatter
from calibre.ebooks.metadata.meta import get_metadata from calibre.ebooks.metadata.meta import get_metadata
from calibre.gui2.custom_column_widgets import populate_metadata_page from calibre.gui2.custom_column_widgets import populate_metadata_page
from calibre.gui2 import error_dialog from calibre.gui2 import error_dialog, ResizableDialog
from calibre.gui2.progress_indicator import ProgressIndicator from calibre.gui2.progress_indicator import ProgressIndicator
from calibre.utils.config import dynamic from calibre.utils.config import dynamic
from calibre.utils.titlecase import titlecase from calibre.utils.titlecase import titlecase
@ -49,7 +49,7 @@ def get_cover_data(path):
class MyBlockingBusy(QDialog): class MyBlockingBusy(QDialog): # {{{
do_one_signal = pyqtSignal() do_one_signal = pyqtSignal()
@ -241,8 +241,9 @@ class MyBlockingBusy(QDialog):
self.current_index += 1 self.current_index += 1
self.do_one_signal.emit() self.do_one_signal.emit()
# }}}
class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog): class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):
s_r_functions = { '' : lambda x: x, s_r_functions = { '' : lambda x: x,
_('Lower Case') : lambda x: icu_lower(x), _('Lower Case') : lambda x: icu_lower(x),
@ -261,9 +262,8 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
] ]
def __init__(self, window, rows, model, tab): def __init__(self, window, rows, model, tab):
QDialog.__init__(self, window) ResizableDialog.__init__(self, window)
Ui_MetadataBulkDialog.__init__(self) Ui_MetadataBulkDialog.__init__(self)
self.setupUi(self)
self.model = model self.model = model
self.db = model.db self.db = model.db
self.ids = [self.db.id(r) for r in rows] self.ids = [self.db.id(r) for r in rows]

File diff suppressed because it is too large Load Diff

View File

@ -4,7 +4,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import time, os import time, os
from PyQt4.Qt import SIGNAL, QUrl, QAbstractListModel, Qt, \ from PyQt4.Qt import SIGNAL, QUrl, QAbstractListModel, Qt, \
QVariant, QInputDialog QVariant
from calibre.web.feeds.recipes import compile_recipe from calibre.web.feeds.recipes import compile_recipe
from calibre.web.feeds.news import AutomaticNewsRecipe from calibre.web.feeds.news import AutomaticNewsRecipe
@ -256,24 +256,61 @@ class %(classname)s(%(base_class)s):
def add_builtin_recipe(self): def add_builtin_recipe(self):
from calibre.web.feeds.recipes.collection import \ from calibre.web.feeds.recipes.collection import \
get_builtin_recipe_by_title, get_builtin_recipe_titles get_builtin_recipe_collection, get_builtin_recipe_by_id
items = sorted(get_builtin_recipe_titles(), key=sort_key) from PyQt4.Qt import QDialog, QVBoxLayout, QListWidgetItem, \
QListWidget, QDialogButtonBox, QSize
d = QDialog(self)
d.l = QVBoxLayout()
d.setLayout(d.l)
d.list = QListWidget(d)
d.list.doubleClicked.connect(lambda x: d.accept())
d.l.addWidget(d.list)
d.bb = QDialogButtonBox(QDialogButtonBox.Ok|QDialogButtonBox.Cancel,
Qt.Horizontal, d)
d.bb.accepted.connect(d.accept)
d.bb.rejected.connect(d.reject)
d.l.addWidget(d.bb)
d.setWindowTitle(_('Choose builtin recipe'))
items = []
for r in get_builtin_recipe_collection():
id_ = r.get('id', '')
title = r.get('title', '')
lang = r.get('language', '')
if id_ and title:
items.append((title + ' [%s]'%lang, id_))
title, ok = QInputDialog.getItem(self, _('Pick recipe'), _('Pick the recipe to customize'), items.sort(key=lambda x:sort_key(x[0]))
items, 0, False) for title, id_ in items:
if ok: item = QListWidgetItem(title)
title = unicode(title) item.setData(Qt.UserRole, id_)
profile = get_builtin_recipe_by_title(title) d.list.addItem(item)
if self._model.has_title(title):
if question_dialog(self, _('Replace recipe?'), d.resize(QSize(450, 400))
_('A custom recipe named %s already exists. Do you want to ' ret = d.exec_()
'replace it?')%title): d.list.doubleClicked.disconnect()
self._model.replace_by_title(title, profile) if ret != d.Accepted:
else: return
return
items = list(d.list.selectedItems())
if not items:
return
item = items[-1]
id_ = unicode(item.data(Qt.UserRole).toString())
title = unicode(item.data(Qt.DisplayRole).toString()).rpartition(' [')[0]
profile = get_builtin_recipe_by_id(id_)
if profile is None:
raise Exception('Something weird happened')
if self._model.has_title(title):
if question_dialog(self, _('Replace recipe?'),
_('A custom recipe named %s already exists. Do you want to '
'replace it?')%title):
self._model.replace_by_title(title, profile)
else: else:
self.model.add(title, profile) return
else:
self.model.add(title, profile)
self.clear() self.clear()

View File

@ -8,9 +8,9 @@ __docformat__ = 'restructuredtext en'
from functools import partial from functools import partial
from PyQt4.Qt import QIcon, Qt, QWidget, QToolBar, QSize, \ from PyQt4.Qt import QIcon, Qt, QWidget, QToolBar, QSize, \
pyqtSignal, QToolButton, QPushButton, \ pyqtSignal, QToolButton, QMenu, QCheckBox, \
QObject, QVBoxLayout, QSizePolicy, QLabel, QHBoxLayout, QActionGroup, \ QObject, QVBoxLayout, QSizePolicy, QLabel, QHBoxLayout, QActionGroup
QMenu
from calibre.constants import __appname__ from calibre.constants import __appname__
from calibre.gui2.search_box import SearchBox2, SavedSearchBox from calibre.gui2.search_box import SearchBox2, SavedSearchBox
@ -178,7 +178,9 @@ class SearchBar(QWidget): # {{{
x.setToolTip(_("<p>Search the list of books by title, author, publisher, tags, comments, etc.<br><br>Words separated by spaces are ANDed")) x.setToolTip(_("<p>Search the list of books by title, author, publisher, tags, comments, etc.<br><br>Words separated by spaces are ANDed"))
l.addWidget(x) l.addWidget(x)
self.search_button = QPushButton(_('&Go!')) self.search_button = QToolButton()
self.search_button.setToolButtonStyle(Qt.ToolButtonTextOnly)
self.search_button.setText(_('&Go!'))
l.addWidget(self.search_button) l.addWidget(self.search_button)
self.search_button.setSizePolicy(QSizePolicy.Minimum, self.search_button.setSizePolicy(QSizePolicy.Minimum,
QSizePolicy.Minimum) QSizePolicy.Minimum)
@ -192,6 +194,12 @@ class SearchBar(QWidget): # {{{
l.addWidget(x) l.addWidget(x)
x.setToolTip(_("Reset Quick Search")) x.setToolTip(_("Reset Quick Search"))
x = parent.search_highlight_only = QCheckBox()
x.setText(_('&Highlight'))
x.setToolTip(_('Highlight matched books in the book list, instead '
'of restricting the book list to the matches.'))
l.addWidget(x)
x = parent.saved_search = SavedSearchBox(self) x = parent.saved_search = SavedSearchBox(self)
x.setMaximumSize(QSize(150, 16777215)) x.setMaximumSize(QSize(150, 16777215))
x.setMinimumContentsLength(15) x.setMinimumContentsLength(15)

View File

@ -10,7 +10,7 @@ from contextlib import closing
from operator import attrgetter from operator import attrgetter
from PyQt4.Qt import QAbstractTableModel, Qt, pyqtSignal, QIcon, QImage, \ from PyQt4.Qt import QAbstractTableModel, Qt, pyqtSignal, QIcon, QImage, \
QModelIndex, QVariant, QDate QModelIndex, QVariant, QDate, QColor
from calibre.gui2 import NONE, config, UNDEFINED_QDATE from calibre.gui2 import NONE, config, UNDEFINED_QDATE
from calibre.utils.pyparsing import ParseException from calibre.utils.pyparsing import ParseException
@ -93,6 +93,9 @@ class BooksModel(QAbstractTableModel): # {{{
self.bool_no_icon = QIcon(I('list_remove.png')) self.bool_no_icon = QIcon(I('list_remove.png'))
self.bool_blank_icon = QIcon(I('blank.png')) self.bool_blank_icon = QIcon(I('blank.png'))
self.device_connected = False self.device_connected = False
self.rows_matching = set()
self.lowest_row_matching = None
self.highlight_only = False
self.read_config() self.read_config()
def change_alignment(self, colname, alignment): def change_alignment(self, colname, alignment):
@ -229,9 +232,27 @@ class BooksModel(QAbstractTableModel): # {{{
self.endInsertRows() self.endInsertRows()
self.count_changed() self.count_changed()
def set_highlight_only(self, toWhat):
self.highlight_only = toWhat
if self.last_search:
self.research()
def search(self, text, reset=True): def search(self, text, reset=True):
try: try:
self.db.search(text) if self.highlight_only:
self.db.search('')
if not text:
self.rows_matching = set()
self.lowest_row_matching = None
else:
self.rows_matching = self.db.search(text, return_matches=True)
if self.rows_matching:
self.lowest_row_matching = self.db.row(self.rows_matching[0])
self.rows_matching = set(self.rows_matching)
else:
self.rows_matching = set()
self.lowest_row_matching = None
self.db.search(text)
except ParseException as e: except ParseException as e:
self.searched.emit(e.msg) self.searched.emit(e.msg)
return return
@ -337,8 +358,9 @@ class BooksModel(QAbstractTableModel): # {{{
name, val = mi.format_field(key) name, val = mi.format_field(key)
if mi.metadata_for_field(key)['datatype'] == 'comments': if mi.metadata_for_field(key)['datatype'] == 'comments':
name += ':html' name += ':html'
if val: if val and name not in data:
data[name] = val data[name] = val
return data return data
@ -651,6 +673,9 @@ class BooksModel(QAbstractTableModel): # {{{
return NONE return NONE
if role in (Qt.DisplayRole, Qt.EditRole): if role in (Qt.DisplayRole, Qt.EditRole):
return self.column_to_dc_map[col](index.row()) return self.column_to_dc_map[col](index.row())
elif role == Qt.BackgroundColorRole:
if self.id(index) in self.rows_matching:
return QColor('lightgreen')
elif role == Qt.DecorationRole: elif role == Qt.DecorationRole:
if self.column_to_dc_decorator_map[col] is not None: if self.column_to_dc_decorator_map[col] is not None:
return self.column_to_dc_decorator_map[index.column()](index.row()) return self.column_to_dc_decorator_map[index.column()](index.row())

View File

@ -680,8 +680,14 @@ class BooksView(QTableView): # {{{
def set_editable(self, editable, supports_backloading): def set_editable(self, editable, supports_backloading):
self._model.set_editable(editable) self._model.set_editable(editable)
def search_proxy(self, txt):
self._model.search(txt)
if self._model.lowest_row_matching is not None:
self.select_rows([self._model.lowest_row_matching], using_ids=False)
self.setFocus(Qt.OtherFocusReason)
def connect_to_search_box(self, sb, search_done): def connect_to_search_box(self, sb, search_done):
sb.search.connect(self._model.search) sb.search.connect(self.search_proxy)
self._search_done = search_done self._search_done = search_done
self._model.searched.connect(self.search_done) self._model.searched.connect(self.search_done)

View File

@ -37,7 +37,10 @@ class BaseModel(QAbstractListModel):
dont_remove_from=set(['toolbar-device'])) dont_remove_from=set(['toolbar-device']))
if name is None: if name is None:
return FakeAction('--- '+_('Separator')+' ---', None) return FakeAction('--- '+_('Separator')+' ---', None)
return gui.iactions[name] try:
return gui.iactions[name]
except:
return None
def rowCount(self, parent): def rowCount(self, parent):
return len(self._data) return len(self._data)
@ -124,7 +127,8 @@ class CurrentModel(BaseModel):
BaseModel.__init__(self) BaseModel.__init__(self)
self.gprefs_name = 'action-layout-'+key self.gprefs_name = 'action-layout-'+key
current = gprefs[self.gprefs_name] current = gprefs[self.gprefs_name]
self._data = [self.name_to_action(x, gui) for x in current] self._data = [self.name_to_action(x, gui) for x in current]
self._data = [x for x in self._data if x is not None]
self.key = key self.key = key
self.gui = gui self.gui = gui

View File

@ -16,6 +16,7 @@ from calibre.gui2 import config
from calibre.gui2.dialogs.confirm_delete import confirm from calibre.gui2.dialogs.confirm_delete import confirm
from calibre.gui2.dialogs.saved_search_editor import SavedSearchEditor from calibre.gui2.dialogs.saved_search_editor import SavedSearchEditor
from calibre.gui2.dialogs.search import SearchDialog from calibre.gui2.dialogs.search import SearchDialog
from calibre.utils.config import dynamic
from calibre.utils.search_query_parser import saved_searches from calibre.utils.search_query_parser import saved_searches
from calibre.utils.icu import sort_key from calibre.utils.icu import sort_key
@ -375,6 +376,9 @@ class SearchBoxMixin(object): # {{{
unicode(self.search.toolTip()))) unicode(self.search.toolTip())))
self.advanced_search_button.setStatusTip(self.advanced_search_button.toolTip()) self.advanced_search_button.setStatusTip(self.advanced_search_button.toolTip())
self.clear_button.setStatusTip(self.clear_button.toolTip()) self.clear_button.setStatusTip(self.clear_button.toolTip())
self.search_highlight_only.stateChanged.connect(self.highlight_only_changed)
self.search_highlight_only.setChecked(
dynamic.get('search_highlight_only', False))
def focus_search_box(self, *args): def focus_search_box(self, *args):
self.search.setFocus(Qt.OtherFocusReason) self.search.setFocus(Qt.OtherFocusReason)
@ -401,6 +405,11 @@ class SearchBoxMixin(object): # {{{
def focus_to_library(self): def focus_to_library(self):
self.current_view().setFocus(Qt.OtherFocusReason) self.current_view().setFocus(Qt.OtherFocusReason)
def highlight_only_changed(self, toWhat):
dynamic.set('search_highlight_only', toWhat)
self.current_view().model().set_highlight_only(toWhat)
self.focus_to_library()
# }}} # }}}
class SavedSearchBoxMixin(object): # {{{ class SavedSearchBoxMixin(object): # {{{

View File

@ -468,12 +468,8 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{
try: try:
if 'calibre.ebooks.DRMError' in job.details: if 'calibre.ebooks.DRMError' in job.details:
if not minz: if not minz:
d = error_dialog(self, _('Conversion Error'), from calibre.gui2.dialogs.drm_error import DRMErrorMessage
_('<p>Could not convert: %s<p>It is a ' d = DRMErrorMessage(self, job.description.split(':')[-1])
'<a href="%s">DRM</a>ed book. You must first remove the '
'DRM using third party tools.')%\
(job.description.split(':')[-1],
'http://bugs.calibre-ebook.com/wiki/DRM'))
d.setModal(False) d.setModal(False)
d.show() d.show()
self._modeless_dialogs.append(d) self._modeless_dialogs.append(d)

View File

@ -26,6 +26,7 @@ from calibre.gui2.search_box import SearchBox2
from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata import MetaInformation
from calibre.customize.ui import available_input_formats from calibre.customize.ui import available_input_formats
from calibre.gui2.viewer.dictionary import Lookup from calibre.gui2.viewer.dictionary import Lookup
from calibre import as_unicode
class TOCItem(QStandardItem): class TOCItem(QStandardItem):
@ -626,13 +627,12 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
QApplication.processEvents() QApplication.processEvents()
if worker.exception is not None: if worker.exception is not None:
if isinstance(worker.exception, DRMError): if isinstance(worker.exception, DRMError):
error_dialog(self, _('DRM Error'), from calibre.gui2.dialogs.drm_error import DRMErrorMessage
_('<p>This book is protected by <a href="%s">DRM</a>') DRMErrorMessage(self).exec_()
%'http://wiki.mobileread.com/wiki/DRM').exec_()
else: else:
r = getattr(worker.exception, 'reason', worker.exception) r = getattr(worker.exception, 'reason', worker.exception)
error_dialog(self, _('Could not open ebook'), error_dialog(self, _('Could not open ebook'),
unicode(r), det_msg=worker.traceback, show=True) as_unicode(r), det_msg=worker.traceback, show=True)
self.close_progress_indicator() self.close_progress_indicator()
else: else:
self.metadata.show_opf(self.iterator.opf, os.path.splitext(pathtoebook)[1][1:]) self.metadata.show_opf(self.iterator.opf, os.path.splitext(pathtoebook)[1][1:])

View File

@ -411,7 +411,8 @@ class ResultCache(SearchQueryParser): # {{{
if isinstance(location, list): if isinstance(location, list):
if allow_recursion: if allow_recursion:
for loc in location: for loc in location:
matches |= self.get_matches(loc, query, allow_recursion=False) matches |= self.get_matches(loc, query, candidates,
allow_recursion=False)
return matches return matches
raise ParseException(query, len(query), 'Recursive query group detected', self) raise ParseException(query, len(query), 'Recursive query group detected', self)
@ -419,11 +420,11 @@ class ResultCache(SearchQueryParser): # {{{
fm = self.field_metadata[location] fm = self.field_metadata[location]
# take care of dates special case # take care of dates special case
if fm['datatype'] == 'datetime': if fm['datatype'] == 'datetime':
return self.get_dates_matches(location, query.lower()) return self.get_dates_matches(location, query.lower(), candidates)
# take care of numbers special case # take care of numbers special case
if fm['datatype'] in ('rating', 'int', 'float'): if fm['datatype'] in ('rating', 'int', 'float'):
return self.get_numeric_matches(location, query.lower()) return self.get_numeric_matches(location, query.lower(), candidates)
# take care of the 'count' operator for is_multiples # take care of the 'count' operator for is_multiples
if fm['is_multiple'] and \ if fm['is_multiple'] and \
@ -431,7 +432,8 @@ class ResultCache(SearchQueryParser): # {{{
query[1:1] in '=<>!': query[1:1] in '=<>!':
vf = lambda item, loc=fm['rec_index'], ms=fm['is_multiple']:\ vf = lambda item, loc=fm['rec_index'], ms=fm['is_multiple']:\
len(item[loc].split(ms)) if item[loc] is not None else 0 len(item[loc].split(ms)) if item[loc] is not None else 0
return self.get_numeric_matches(location, query[1:], val_func=vf) return self.get_numeric_matches(location, query[1:],
candidates, val_func=vf)
# everything else, or 'all' matches # everything else, or 'all' matches
matchkind = CONTAINS_MATCH matchkind = CONTAINS_MATCH

View File

@ -1524,19 +1524,32 @@ class EPUB_MOBI(CatalogPlugin):
this_title['formats'] = formats this_title['formats'] = formats
# Add user notes to be displayed in header # Add user notes to be displayed in header
# Special case handling for datetime fields # Special case handling for datetime fields and lists
if self.opts.header_note_source_field: if self.opts.header_note_source_field:
field_md = self.__db.metadata_for_field(self.opts.header_note_source_field) field_md = self.__db.metadata_for_field(self.opts.header_note_source_field)
notes = self.__db.get_field(record['id'], notes = self.__db.get_field(record['id'],
self.opts.header_note_source_field, self.opts.header_note_source_field,
index_is_id=True) index_is_id=True)
if notes and field_md['datatype'] == 'datetime':
# Reformat date fields to match UI presentation: dd MMM YYYY
notes = format_date(notes,'dd MMM yyyy')
if notes: if notes:
if field_md['datatype'] == 'text':
if isinstance(notes,list):
notes = ' &middot; '.join(notes)
elif field_md['datatype'] == 'datetime':
notes = format_date(notes,'dd MMM yyyy')
elif field_md['datatype'] == 'composite':
m = re.match(r'\[(.+)\]$', notes)
if m is not None:
# Sniff for special pseudo-list string "[<item, item>]"
bracketed_content = m.group(1)
if ',' in bracketed_content:
# Recast the comma-separated items as a list
items = bracketed_content.split(',')
items = [i.strip() for i in items]
notes = ' &middot; '.join(items)
else:
notes = bracketed_content
this_title['notes'] = {'source':field_md['name'], this_title['notes'] = {'source':field_md['name'],
'content':notes} 'content':notes}
titles.append(this_title) titles.append(this_title)

View File

@ -341,10 +341,6 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
self.has_id = self.data.has_id self.has_id = self.data.has_id
self.count = self.data.count self.count = self.data.count
# Count times get_metadata is called, and how many times in the cache
self.gm_count = 0
self.gm_missed = 0
for prop in ('author_sort', 'authors', 'comment', 'comments', 'isbn', for prop in ('author_sort', 'authors', 'comment', 'comments', 'isbn',
'publisher', 'rating', 'series', 'series_index', 'tags', 'publisher', 'rating', 'series', 'series_index', 'tags',
'title', 'timestamp', 'uuid', 'pubdate', 'ondevice'): 'title', 'timestamp', 'uuid', 'pubdate', 'ondevice'):
@ -710,6 +706,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
formats = row[fm['formats']] formats = row[fm['formats']]
if not formats: if not formats:
formats = None formats = None
else:
formats = formats.split(',')
mi.formats = formats mi.formats = formats
tags = row[fm['tags']] tags = row[fm['tags']]
if tags: if tags:

View File

@ -110,6 +110,7 @@ class cmd_commit(_cmd_commit):
suffix = 'The fix will be in the next release.' suffix = 'The fix will be in the next release.'
action = action+'ed' action = action+'ed'
msg = '%s in branch %s. %s'%(action, nick, suffix) msg = '%s in branch %s. %s'%(action, nick, suffix)
msg = msg.replace('Fixesed', 'Fixed')
server = xmlrpclib.ServerProxy(url) server = xmlrpclib.ServerProxy(url)
server.ticket.update(int(bug), msg, server.ticket.update(int(bug), msg,
{'status':'closed', 'resolution':'fixed'}, {'status':'closed', 'resolution':'fixed'},

View File

@ -4,7 +4,6 @@ __copyright__ = '2010, sengian <sengian1@gmail.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import re, htmlentitydefs import re, htmlentitydefs
from functools import partial
_ascii_pat = None _ascii_pat = None
@ -50,4 +49,4 @@ def unescape(text, rm=False, rchar=u''):
if rm: if rm:
return rchar #replace by char return rchar #replace by char
return text # leave as is return text # leave as is
return re.sub("&#?\w+;", fixup, text) return re.sub("&#?\w+;", fixup, text)

View File

@ -18,6 +18,24 @@ class _Parser(object):
LEX_NUM = 4 LEX_NUM = 4
LEX_EOF = 5 LEX_EOF = 5
def _python(self, func):
locals = {}
exec func in locals
if 'evaluate' not in locals:
self.error('no evaluate function in python')
try:
result = locals['evaluate'](self.parent.kwargs)
if isinstance(result, (float, int)):
result = unicode(result)
elif isinstance(result, list):
result = ','.join(result)
elif isinstance(result, str):
result = unicode(result)
return result
except Exception as e:
self.error('python function threw exception: ' + e.msg)
def _strcmp(self, x, y, lt, eq, gt): def _strcmp(self, x, y, lt, eq, gt):
v = strcmp(x, y) v = strcmp(x, y)
if v < 0: if v < 0:
@ -79,6 +97,7 @@ class _Parser(object):
'field' : (1, lambda s, x: s.parent.get_value(x, [], s.parent.kwargs)), 'field' : (1, lambda s, x: s.parent.get_value(x, [], s.parent.kwargs)),
'multiply' : (2, partial(_math, op='*')), 'multiply' : (2, partial(_math, op='*')),
'print' : (-1, _print), 'print' : (-1, _print),
'python' : (1, _python),
'strcat' : (-1, _concat), 'strcat' : (-1, _concat),
'strcmp' : (5, _strcmp), 'strcmp' : (5, _strcmp),
'substr' : (3, lambda s, x, y, z: x[int(y): len(x) if int(z) == 0 else int(z)]), 'substr' : (3, lambda s, x, y, z: x[int(y): len(x) if int(z) == 0 else int(z)]),
@ -362,7 +381,7 @@ class TemplateFormatter(string.Formatter):
(r'\'.*?((?<!\\)\')', lambda x,t: (3, t[1:-1])), (r'\'.*?((?<!\\)\')', lambda x,t: (3, t[1:-1])),
(r'\n#.*?(?=\n)', None), (r'\n#.*?(?=\n)', None),
(r'\s', None) (r'\s', None)
]) ], flags=re.DOTALL)
def _eval_program(self, val, prog): def _eval_program(self, val, prog):
# keep a cache of the lex'ed program under the theory that re-lexing # keep a cache of the lex'ed program under the theory that re-lexing

View File

@ -92,7 +92,10 @@ def identify_data(data):
or raises an Exception if data is not an image. or raises an Exception if data is not an image.
''' '''
img = Image() img = Image()
img.load(data) if hasattr(img, 'identify'):
img.identify(data)
else:
img.load(data)
width, height = img.size width, height = img.size
fmt = img.format fmt = img.format
return (width, height, fmt) return (width, height, fmt)

View File

@ -456,6 +456,26 @@ magick_Image_load(magick_Image *self, PyObject *args, PyObject *kwargs) {
// }}} // }}}
// Image.identify {{{
static PyObject *
magick_Image_identify(magick_Image *self, PyObject *args, PyObject *kwargs) {
const char *data;
Py_ssize_t dlen;
MagickBooleanType res;
NULL_CHECK(NULL)
if (!PyArg_ParseTuple(args, "s#", &data, &dlen)) return NULL;
res = MagickPingImageBlob(self->wand, data, dlen);
if (!res)
return magick_set_exception(self->wand);
Py_RETURN_NONE;
}
// }}}
// Image.open {{{ // Image.open {{{
static PyObject * static PyObject *
magick_Image_read(magick_Image *self, PyObject *args, PyObject *kwargs) { magick_Image_read(magick_Image *self, PyObject *args, PyObject *kwargs) {
@ -993,6 +1013,10 @@ static PyMethodDef magick_Image_methods[] = {
{"destroy", (PyCFunction)magick_Image_destroy, METH_VARARGS, {"destroy", (PyCFunction)magick_Image_destroy, METH_VARARGS,
"Destroy the underlying ImageMagick Wand. WARNING: After using this method, all methods on this object will raise an exception."}, "Destroy the underlying ImageMagick Wand. WARNING: After using this method, all methods on this object will raise an exception."},
{"identify", (PyCFunction)magick_Image_identify, METH_VARARGS,
"Identify an image from a byte buffer (string)"
},
{"load", (PyCFunction)magick_Image_load, METH_VARARGS, {"load", (PyCFunction)magick_Image_load, METH_VARARGS,
"Load an image from a byte buffer (string)" "Load an image from a byte buffer (string)"
}, },

View File

@ -5,5 +5,52 @@ __license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import glob
from calibre.constants import plugins, iswindows, filesystem_encoding
from calibre.ptempfile import TemporaryDirectory
from calibre import CurrentDir
from calibre.utils.magick import Image, PixelWand
class Unavailable(Exception):
pass
class NoRaster(Exception):
pass
def extract_raster_image(wmf_data):
try:
wmf, wmf_err = plugins['wmf']
except KeyError:
raise Unavailable('libwmf not available on this platform')
if wmf_err:
raise Unavailable(wmf_err)
if iswindows:
import sys, os
appdir = sys.app_dir
if isinstance(appdir, unicode):
appdir = appdir.encode(filesystem_encoding)
fdir = os.path.join(appdir, 'wmffonts')
wmf.set_font_dir(fdir)
data = ''
with TemporaryDirectory('wmf2png') as tdir:
with CurrentDir(tdir):
wmf.render(wmf_data)
images = list(sorted(glob.glob('*.png')))
if not images:
raise NoRaster('No raster images in WMF')
data = open(images[0], 'rb').read()
im = Image()
im.load(data)
pw = PixelWand()
pw.color = '#ffffff'
im.rotate(pw, 180)
return im.export('png')

View File

@ -4,6 +4,7 @@
#include <libwmf/api.h> #include <libwmf/api.h>
#include <libwmf/svg.h> #include <libwmf/svg.h>
//#include <libwmf/gd.h>
typedef struct { typedef struct {
char *data; char *data;
@ -13,7 +14,7 @@ typedef struct {
//This code is taken mostly from the Abiword wmf plugin //This code is taken mostly from the Abiword wmf plugin
// Buffer read {{{
// returns unsigned char cast to int, or EOF // returns unsigned char cast to int, or EOF
static int wmf_WMF_read(void * context) { static int wmf_WMF_read(void * context) {
char c; char c;
@ -22,11 +23,11 @@ static int wmf_WMF_read(void * context) {
if (info->pos == info->len) if (info->pos == info->len)
return EOF; return EOF;
c = info->data[pos]; c = info->data[info->pos];
info->pos++; info->pos++;
return (int)c; return (int)((unsigned char)c);
} }
// returns (-1) on error, else 0 // returns (-1) on error, else 0
@ -44,8 +45,17 @@ static long wmf_WMF_tell(void * context) {
return (long) info->pos; return (long) info->pos;
} }
// }}}
char _png_name_buf[100];
char *wmf_png_name(void *ctxt) {
int *num = (int*)ctxt;
*num = *num + 1;
snprintf(_png_name_buf, 90, "%04d.png", *num);
return _png_name_buf;
}
#define CLEANUP if(API) { if (stream) wmf_free(API, stream); wmf_api_destroy(API); }; #define CLEANUP if(API) { if (stream) wmf_free(API, stream); wmf_api_destroy(API); };
static PyObject * static PyObject *
@ -66,9 +76,9 @@ wmf_render(PyObject *self, PyObject *args) {
unsigned int max_width = 1600; unsigned int max_width = 1600;
unsigned int max_height = 1200; unsigned int max_height = 1200;
unsigned long max_flags = 0;
static const char* Default_Description = "wmf2svg"; static const char* Default_Description = "wmf2svg";
int fname_counter = 0;
wmf_error_t err; wmf_error_t err;
@ -125,6 +135,8 @@ wmf_render(PyObject *self, PyObject *args) {
ddata->Description = (char *)Default_Description; ddata->Description = (char *)Default_Description;
ddata->bbox = bbox; ddata->bbox = bbox;
ddata->image.context = (void *)&fname_counter;
ddata->image.name = wmf_png_name;
wmf_display_size(API, &disp_width, &disp_height, 96, 96); wmf_display_size(API, &disp_width, &disp_height, 96, 96);
@ -156,9 +168,9 @@ wmf_render(PyObject *self, PyObject *args) {
ddata->height = (unsigned int) ceil ((double) wmf_height); ddata->height = (unsigned int) ceil ((double) wmf_height);
} }
ddata->flags |= WMF_SVG_INLINE_IMAGES; // Needs GD
//ddata->flags |= WMF_SVG_INLINE_IMAGES;
ddata->flags |= WMF_GD_OUTPUT_MEMORY | WMF_GD_OWN_BUFFER; //ddata->flags |= WMF_GD_OUTPUT_MEMORY | WMF_GD_OWN_BUFFER;
err = wmf_play(API, 0, &(bbox)); err = wmf_play(API, 0, &(bbox));
@ -178,11 +190,32 @@ wmf_render(PyObject *self, PyObject *args) {
return ans; return ans;
} }
#ifdef _WIN32
void set_libwmf_fontdir(const char *);
static PyObject *
wmf_setfontdir(PyObject *self, PyObject *args) {
char *path;
if (!PyArg_ParseTuple(args, "s", &path))
return NULL;
set_libwmf_fontdir(path);
Py_RETURN_NONE;
}
#endif
static PyMethodDef wmf_methods[] = { static PyMethodDef wmf_methods[] = {
{"render", wmf_render, METH_VARARGS, {"render", wmf_render, METH_VARARGS,
"render(path) -> Render wmf as svg." "render(data) -> Render wmf as svg."
}, },
#ifdef _WIN32
{"set_font_dir", wmf_setfontdir, METH_VARARGS,
"set_font_dir(path) -> Set the path to the fonts dir on windows, must be called at least once before using render()"
},
#endif
{NULL} /* Sentinel */ {NULL} /* Sentinel */
}; };

View File

@ -982,9 +982,12 @@ class ZipFile:
zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH]) zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
if fname != zinfo.orig_filename: if fname != zinfo.orig_filename:
raise BadZipfile, \ print ('WARNING: Header (%r) and directory (%r) filenames do not'
'File name in directory "%s" and header "%s" differ.' % ( ' match inside ZipFile')%(fname, zinfo.orig_filename)
zinfo.orig_filename, fname) print 'Using directory filename %r'%zinfo.orig_filename
#raise BadZipfile, \
# 'File name in directory "%r" and header "%r" differ.' % (
# zinfo.orig_filename, fname)
# check for encrypted flag & handle password # check for encrypted flag & handle password
is_encrypted = zinfo.flag_bits & 0x1 is_encrypted = zinfo.flag_bits & 0x1

View File

@ -700,10 +700,17 @@ class BasicNewsRecipe(Recipe):
for attr in self.remove_attributes: for attr in self.remove_attributes:
for x in soup.findAll(attrs={attr:True}): for x in soup.findAll(attrs={attr:True}):
del x[attr] del x[attr]
for base in list(soup.findAll(['base', 'iframe'])): for base in list(soup.findAll(['base', 'iframe', 'canvas', 'embed',
'command', 'datalist', 'video', 'audio'])):
base.extract() base.extract()
ans = self.postprocess_html(soup, first_fetch) ans = self.postprocess_html(soup, first_fetch)
# Nuke HTML5 tags
for x in ans.findAll(['article', 'aside', 'header', 'footer', 'nav',
'figcaption', 'figure', 'section']):
x.name = 'div'
if job_info: if job_info:
url, f, a, feed_len = job_info url, f, a, feed_len = job_info
try: try:

View File

@ -108,7 +108,6 @@ def download_builtin_recipe(urn):
br = browser() br = browser()
return br.open_novisit('http://status.calibre-ebook.com/recipe/'+urn).read() return br.open_novisit('http://status.calibre-ebook.com/recipe/'+urn).read()
def get_builtin_recipe_by_title(title, log=None, download_recipe=False): def get_builtin_recipe_by_title(title, log=None, download_recipe=False):
for x in get_builtin_recipe_collection(): for x in get_builtin_recipe_collection():
if x.get('title') == title: if x.get('title') == title:
@ -127,6 +126,24 @@ def get_builtin_recipe_by_title(title, log=None, download_recipe=False):
'Failed to download recipe, using builtin version') 'Failed to download recipe, using builtin version')
return P('recipes/%s.recipe'%urn, data=True) return P('recipes/%s.recipe'%urn, data=True)
def get_builtin_recipe_by_id(id_, log=None, download_recipe=False):
for x in get_builtin_recipe_collection():
if x.get('id') == id_:
urn = x.get('id')[8:]
if download_recipe:
try:
if log is not None:
log('Trying to get latest version of recipe:', urn)
return download_builtin_recipe(urn)
except:
if log is None:
import traceback
traceback.print_exc()
else:
log.exception(
'Failed to download recipe, using builtin version')
return P('recipes/%s.recipe'%urn, data=True)
class SchedulerConfig(object): class SchedulerConfig(object):
def __init__(self): def __init__(self):