Sync to trunk.

This commit is contained in:
John Schember 2011-01-13 07:12:06 -05:00
commit 87d5f40d96
69 changed files with 2603 additions and 1784 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.0 KiB

View File

@ -1,59 +1,79 @@
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*-
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>' __author__ = 'Gerardo Diez'
__copyright__ = 'Gerardo Diez<gerardo.diez.garcia@gmail.com>'
description = 'Main daily newspaper from Spain - v1.00 (05, Enero 2011)'
__docformat__ = 'restructuredtext en'
''' '''
www.expansion.com expansion.es
''' '''
from calibre.web.feeds.recipes import BasicNewsRecipe
class Publico(BasicNewsRecipe):
title =u'Expansion.com'
__author__ ='Gerardo Diez'
publisher =u'Unidad Editorial Información Económica, S.L.'
category ='finances, catalunya'
oldest_article =1
max_articles_per_feed =100
simultaneous_downloads =10
cover_url =u'http://estaticos01.expansion.com/iconos/v2.x/v2.0/cabeceras/logo_expansion.png'
timefmt ='[%A, %d %B, %Y]'
encoding ='latin'
language ='es'
remove_javascript =True
no_stylesheets =True
keep_only_tags =dict(name='div', attrs={'class':['noticia primer_elemento']})
remove_tags =[
dict(name='div', attrs={'class':['compartir', 'metadata_desarrollo_noticia', 'relacionadas', 'mas_info','publicidad publicidad_textlink', 'ampliarfoto']}),
dict(name='ul', attrs={'class':['bolos_desarrollo_noticia']}),
dict(name='span', attrs={'class':['comentarios']}),
dict(name='p', attrs={'class':['cintillo_comentarios', 'cintillo_comentarios formulario']}),
dict(name='div', attrs={'id':['comentarios_lectores_listado']})
]
feeds =[
(u'Portada', u'http://estaticos.expansion.com/rss/portada.xml'),
(u'Portada: Bolsas', u'http://estaticos.expansion.com/rss/mercados.xml'),
(u'Divisas', u'http://estaticos.expansion.com/rss/mercadosdivisas.xml'),
(u'Euribor', u'http://estaticos.expansion.com/rss/mercadoseuribor.xml'),
(u'Materias Primas', u'http://estaticos.expansion.com/rss/mercadosmateriasprimas.xml'),
(u'Renta Fija', u'http://estaticos.expansion.com/rss/mercadosrentafija.xml'),
from calibre.web.feeds.news import BasicNewsRecipe (u'Portada: Mi Dinero', u'http://estaticos.expansion.com/rss/midinero.xml'),
from calibre.ebooks.BeautifulSoup import Tag (u'Hipotecas', u'http://estaticos.expansion.com/rss/midinerohipotecas.xml'),
(u'Créditos', u'http://estaticos.expansion.com/rss/midinerocreditos.xml'),
(u'Pensiones', u'http://estaticos.expansion.com/rss/midineropensiones.xml'),
(u'Fondos de Inversión', u'http://estaticos.expansion.com/rss/midinerofondos.xml'),
(u'Motor', u'http://estaticos.expansion.com/rss/midineromotor.xml'),
class Expansion(BasicNewsRecipe): (u'Portada: Empresas', u'http://estaticos.expansion.com/rss/empresas.xml'),
title = 'Diario Expansion' (u'Banca', u'http://estaticos.expansion.com/rss/empresasbanca.xml'),
__author__ = 'Darko Miletic' (u'TMT', u'http://estaticos.expansion.com/rss/empresastmt.xml'),
description = 'Lider de informacion de mercados, economica y politica' (u'Energía', u'http://estaticos.expansion.com/rss/empresasenergia.xml'),
publisher = 'expansion.com' (u'Inmobiliario y Construcción', u'http://estaticos.expansion.com/rss/empresasinmobiliario.xml'),
category = 'news, politics, Spain' (u'Transporte y Turismo', u'http://estaticos.expansion.com/rss/empresastransporte.xml'),
oldest_article = 2 (u'Automoción e Industria', u'http://estaticos.expansion.com/rss/empresasauto-industria.xml'),
max_articles_per_feed = 100 (u'Distribución', u'http://estaticos.expansion.com/rss/empresasdistribucion.xml'),
no_stylesheets = True (u'Deporte y Negocio', u' http://estaticos.expansion.com/rss/empresasdeporte.xml'),
use_embedded_content = False (u'Mi Negocio', u'http://estaticos.expansion.com/rss/empresasminegocio.xml'),
delay = 1 (u'Interiores', u'http://estaticos.expansion.com/rss/empresasinteriores.xml'),
encoding = 'iso-8859-15' (u'Digitech', u'http://estaticos.expansion.com/rss/empresasdigitech.xml'),
language = 'es'
direction = 'ltr' (u'Portada: Economía y Política', u'http://estaticos.expansion.com/rss/economiapolitica.xml'),
(u'Política', u'http://estaticos.expansion.com/rss/economia.xml'),
(u'Portada: Sociedad', u'http://estaticos.expansion.com/rss/entorno.xml'),
html2lrf_options = [ (u'Portada: Opinión', u'http://estaticos.expansion.com/rss/opinion.xml'),
'--comment' , description (u'Llaves y editoriales', u'http://estaticos.expansion.com/rss/opinioneditorialyllaves.xml'),
, '--category' , category (u'Tribunas', u'http://estaticos.expansion.com/rss/opiniontribunas.xml'),
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' (u'Portada: Jurídico', u'http://estaticos.expansion.com/rss/juridico.xml'),
(u'Entrevistas', u'http://estaticos.expansion.com/rss/juridicoentrevistas.xml'),
(u'Opinión', u'http://estaticos.expansion.com/rss/juridicoopinion.xml'),
(u'Sentencias', u'http://estaticos.expansion.com/rss/juridicosentencias.xml'),
feeds = [ (u'Mujer', u'http://estaticos.expansion.com/rss/mujer-empresa.xml'),
(u'Ultimas noticias', u'http://rss.expansion.com/rss/descarga.htm?data2=178') (u'Catalu&ntilde;a', u'http://estaticos.expansion.com/rss/catalunya.xml'),
,(u'Temas del dia' , u'http://rss.expansion.com/rss/descarga.htm?data2=178') (u'Función pública', u'http://estaticos.expansion.com/rss/funcion-publica.xml')
] ]
keep_only_tags = [dict(name='div', attrs={'id':'principal'})]
remove_tags = [
dict(name=['object','link','script'])
,dict(name='div', attrs={'class':['utilidades','tit_relacionadas']})
]
remove_tags_after = [dict(name='div', attrs={'class':'tit_relacionadas'})]
def preprocess_html(self, soup):
soup.html['dir' ] = self.direction
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
soup.head.insert(0,mcharset)
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -1,10 +1,9 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
''' '''
msnbc.msn.com msnbc.msn.com
''' '''
import re
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
class MsNBC(BasicNewsRecipe): class MsNBC(BasicNewsRecipe):
@ -19,7 +18,16 @@ class MsNBC(BasicNewsRecipe):
publisher = 'msnbc.com' publisher = 'msnbc.com'
category = 'news, USA, world' category = 'news, USA, world'
language = 'en' language = 'en'
extra_css = ' body{ font-family: sans-serif } .head{font-family: serif; font-size: xx-large; font-weight: bold; color: #CC0000} .abstract{font-weight: bold} .source{font-size: small} .updateTime{font-size: small} ' extra_css = """
body{ font-family: Georgia,Times,serif }
.hide{display: none}
.caption{font-family: Arial,sans-serif; font-size: x-small}
.entry-summary{font-family: Arial,sans-serif}
.copyright{font-size: 0.95em; font-style: italic}
.source-org{font-size: small; font-family: Arial,sans-serif}
img{display: block; margin-bottom: 0.5em}
span.byline{display: none}
"""
conversion_options = { conversion_options = {
'comments' : description 'comments' : description
@ -28,14 +36,20 @@ class MsNBC(BasicNewsRecipe):
,'publisher': publisher ,'publisher': publisher
} }
preprocess_regexps = [ remove_tags_before = dict(name='h1', attrs={'id':'headline'})
(re.compile(r'</style></head>', re.DOTALL|re.IGNORECASE),lambda match: '</style>') remove_tags_after = dict(name='span', attrs={'class':['copyright','Linear copyright']})
,(re.compile(r'<div class="head">', re.DOTALL|re.IGNORECASE),lambda match: '</head><body><div class="head">'), keep_only_tags=[
] dict(attrs={'id':['headline','deck','byline','source','intelliTXT']})
,dict(attrs={'class':['gl_headline','articleText','drawer-content Linear','v-center3','byline','textBodyBlack']})
]
remove_attributes=['property','lang','rel','xmlns:fb','xmlns:v','xmlns:dc','xmlns:dcmitype','xmlns:og','xmlns:media','xmlns:vcard','typeof','itemscope','itemtype','itemprop','about','type','size','width','height','onreadystatechange','data','border','hspace','vspace']
remove_tags = [
dict(name=['iframe','object','link','embed','meta','table'])
,dict(name='span', attrs={'class':['copyright','Linear copyright']})
,dict(name='div', attrs={'class':'social'})
]
remove_tags_before = dict(name='div', attrs={'class':'head'})
remove_tags_after = dict(name='div', attrs={'class':'copyright'})
remove_tags = [dict(name=['iframe','object','link','script','form'])]
feeds = [ feeds = [
(u'US News' , u'http://rss.msnbc.msn.com/id/3032524/device/rss/rss.xml' ) (u'US News' , u'http://rss.msnbc.msn.com/id/3032524/device/rss/rss.xml' )
@ -48,11 +62,26 @@ class MsNBC(BasicNewsRecipe):
,(u'Tech & Science', u'http://rss.msnbc.msn.com/id/3032117/device/rss/rss.xml' ) ,(u'Tech & Science', u'http://rss.msnbc.msn.com/id/3032117/device/rss/rss.xml' )
] ]
def print_version(self, url):
return url + 'print/1/displaymode/1098/'
def preprocess_html(self, soup): def preprocess_html(self, soup):
for item in soup.head.findAll('div'): for item in soup.body.findAll('html'):
item.extract() item.name='div'
for item in soup.body.findAll('div'):
if item.has_key('id') and item['id'].startswith('vine-'):
item.extract()
if item.has_key('class') and ( item['class'].startswith('ad') or item['class'].startswith('vine')):
item.extract()
for item in soup.body.findAll('img'):
if not item.has_key('alt'):
item['alt'] = 'image'
for item in soup.body.findAll('ol'):
if item.has_key('class') and item['class'].startswith('grid'):
item.extract()
for item in soup.body.findAll('span'):
if ( item.has_key('id') and item['id'].startswith('byLine') and item.string is None) or ( item.has_key('class') and item['class'].startswith('inline') ):
item.extract()
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup return soup

View File

@ -685,3 +685,28 @@ class NYTimes(BasicNewsRecipe):
divTag.replaceWith(tag) divTag.replaceWith(tag)
return soup return soup
def populate_article_metadata(self, article, soup, first):
shortparagraph = ""
try:
if len(article.text_summary.strip()) == 0:
articlebodies = soup.findAll('div',attrs={'class':'articleBody'})
if articlebodies:
for articlebody in articlebodies:
if articlebody:
paras = articlebody.findAll('p')
for p in paras:
refparagraph = self.massageNCXText(self.tag_to_string(p,use_alt=False)).strip()
#account for blank paragraphs and short paragraphs by appending them to longer ones
if len(refparagraph) > 0:
if len(refparagraph) > 70: #approximately one line of text
article.summary = article.text_summary = shortparagraph + refparagraph
return
else:
shortparagraph = refparagraph + " "
if shortparagraph.strip().find(" ") == -1 and not shortparagraph.strip().endswith(":"):
shortparagraph = shortparagraph + "- "
except:
self.log("Error creating article descriptions")
return

View File

@ -685,4 +685,27 @@ class NYTimes(BasicNewsRecipe):
divTag.replaceWith(tag) divTag.replaceWith(tag)
return soup return soup
def populate_article_metadata(self, article, soup, first):
shortparagraph = ""
try:
if len(article.text_summary.strip()) == 0:
articlebodies = soup.findAll('div',attrs={'class':'articleBody'})
if articlebodies:
for articlebody in articlebodies:
if articlebody:
paras = articlebody.findAll('p')
for p in paras:
refparagraph = self.massageNCXText(self.tag_to_string(p,use_alt=False)).strip()
#account for blank paragraphs and short paragraphs by appending them to longer ones
if len(refparagraph) > 0:
if len(refparagraph) > 70: #approximately one line of text
article.summary = article.text_summary = shortparagraph + refparagraph
return
else:
shortparagraph = refparagraph + " "
if shortparagraph.strip().find(" ") == -1 and not shortparagraph.strip().endswith(":"):
shortparagraph = shortparagraph + "- "
except:
self.log("Error creating article descriptions")
return

View File

@ -35,7 +35,6 @@ class TechnologyReview(BasicNewsRecipe):
def get_article_url(self, article): def get_article_url(self, article):
return article.get('guid', article.get('id', None)) return article.get('guid', article.get('id', None))
def print_version(self, url): def print_version(self, url):
baseurl='http://www.technologyreview.com/printer_friendly_article.aspx?id=' baseurl='http://www.technologyreview.com/printer_friendly_article.aspx?id='
split1 = string.split(url,"/") split1 = string.split(url,"/")
@ -43,3 +42,25 @@ class TechnologyReview(BasicNewsRecipe):
split2= string.split(xxx,"/") split2= string.split(xxx,"/")
s = baseurl + split2[0] s = baseurl + split2[0]
return s return s
def postprocess_html(self,soup, True):
#remove picture
headerhtml = soup.find(True, {'class':'header'})
headerhtml.replaceWith("")
#remove close button
closehtml = soup.find(True, {'class':'close'})
closehtml.replaceWith("")
#remove banner advertisement
bannerhtml = soup.find(True, {'class':'bannerad'})
bannerhtml.replaceWith("")
#thanks kiklop74! This code removes all links from the text
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup

View File

@ -28,7 +28,7 @@ class TyzdenRecipe(BasicNewsRecipe):
if (weeknum > 1): if (weeknum > 1):
weeknum -= 1 weeknum -= 1
title = u'.tyzden ' + str(weeknum) + '/' + str(year) title = u'tyzden'
base_url_path = 'http://www.tyzden.sk/casopis/' + str(year) + '/' + str(weeknum) base_url_path = 'http://www.tyzden.sk/casopis/' + str(year) + '/' + str(weeknum)
base_url = base_url_path + '.html' base_url = base_url_path + '.html'

View File

@ -2,8 +2,10 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.chardet import xml_to_unicode
class Wired_Daily(BasicNewsRecipe): class Wired_Daily(BasicNewsRecipe):
@ -15,30 +17,43 @@ class Wired_Daily(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
preprocess_regexps = [(re.compile(r'<head.*</head>', re.DOTALL), lambda m:
'<head></head>')]
remove_tags_before = dict(name='div', id='content') remove_tags_before = dict(name='div', id='content')
remove_tags = [dict(id=['social_tools', 'outerWrapper', 'sidebar', remove_tags = [dict(id=['header', 'commenting_module', 'post_nav',
'footer', 'advertisement', 'blog_subscription_unit', 'social_tools', 'sidebar', 'footer', 'social_wishlist', 'pgwidget',
'brightcove_component']), 'outerWrapper', 'inf_widget']),
{'class':'entryActions'}, {'class':['entryActions', 'advertisement', 'entryTags']},
dict(name=['noscript', 'script'])] dict(name=['noscript', 'script']),
dict(name='h4', attrs={'class':re.compile(r'rat\d+')}),
{'class':lambda x: x and x.startswith('contentjump')},
dict(name='li', attrs={'class':['entryCategories', 'entryEdit']})]
feeds = [ feeds = [
('Top News', 'http://feeds.wired.com/wired/index'), ('Top News', 'http://feeds.wired.com/wired/index'),
('Culture', 'http://feeds.wired.com/wired/culture'), ('Product Reviews',
('Software', 'http://feeds.wired.com/wired/software'), 'http://www.wired.com/reviews/feeds/latestProductsRss'),
('Mac', 'http://feeds.feedburner.com/cultofmac/bFow'), ('Autopia', 'http://www.wired.com/autopia/feed/'),
('Gadgets', 'http://feeds.wired.com/wired/gadgets'), ('Danger Room', 'http://www.wired.com/dangerroom/feed/'),
('Cars', 'http://feeds.wired.com/wired/cars'), ('Epicenter', 'http://www.wired.com/epicenter/feed/'),
('Entertainment', 'http://feeds.wired.com/wired/entertainment'), ('Gadget Lab', 'http://www.wired.com/gadgetlab/feed/'),
('Gaming', 'http://feeds.wired.com/wired/gaming'), ('Geek Dad', 'http://www.wired.com/geekdad/feed/'),
('Science', 'http://feeds.wired.com/wired/science'), ('Playbook', 'http://www.wired.com/playbook/feed/'),
('Med Tech', 'http://feeds.wired.com/wired/medtech'), ('Rawfile', 'http://www.wired.com/rawfile/feed/'),
('Politics', 'http://feeds.wired.com/wired/politics'), ('This Day in Tech', 'http://www.wired.com/thisdayintech/feed/'),
('Tech Biz', 'http://feeds.wired.com/wired/techbiz'), ('Threat Level', 'http://www.wired.com/threatlevel/feed/'),
('Commentary', 'http://feeds.wired.com/wired/commentary'), ('Underwire', 'http://www.wired.com/underwire/feed/'),
('Web Monkey', 'http://www.webmonkey.com/feed/'),
('Science', 'http://www.wired.com/wiredscience/feed/'),
] ]
def populate_article_metadata(self, article, soup, first):
if article.text_summary:
article.text_summary = xml_to_unicode(article.text_summary,
resolve_entities=True)[0]
def print_version(self, url): def print_version(self, url):
return url.replace('http://www.wired.com/', 'http://www.wired.com/print/') return url + '/all/1'

View File

@ -0,0 +1,33 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
'''
www.zerohedge.com
'''
from calibre.web.feeds.recipes import BasicNewsRecipe
class ZeroHedge(BasicNewsRecipe):
title = 'Zero Hedge'
__author__ = 'Darko Miletic'
description = 'On a long enough timeline the survival rate for everyone drops to zero'
oldest_article = 10
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = True
encoding = 'utf8'
publisher = 'zero hedge'
category = 'news, USA, world, economy, politics'
language = 'en'
masthead_url = 'http://www.zerohedge.com/themes/newsflash/logo.png'
publication_type = 'blog'
extra_css = 'body{ font-family: sans-serif }'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher': publisher
}
feeds = [(u'Articles', u'http://feeds.feedburner.com/zerohedge/feed')]

View File

@ -287,7 +287,7 @@
<xsl:value-of select="count(preceding::rtf:footnote) + 1"/> <xsl:value-of select="count(preceding::rtf:footnote) + 1"/>
<xsl:text>]</xsl:text> <xsl:text>]</xsl:text>
</xsl:when> </xsl:when>
<xsl:when test="(@superscript = 'true')"> <xsl:when test="(@superscript)">
<xsl:element name="sup"> <xsl:element name="sup">
<xsl:element name="span"> <xsl:element name="span">
<xsl:attribute name="class"> <xsl:attribute name="class">
@ -297,7 +297,7 @@
</xsl:element> </xsl:element>
</xsl:element> </xsl:element>
</xsl:when> </xsl:when>
<xsl:when test="(@underscript = 'true')"> <xsl:when test="(@underscript or @subscript)">
<xsl:element name="sub"> <xsl:element name="sub">
<xsl:element name="span"> <xsl:element name="span">
<xsl:attribute name="class"> <xsl:attribute name="class">

View File

@ -459,6 +459,18 @@ def force_unicode(obj, enc=preferred_encoding):
obj = obj.decode('utf-8') obj = obj.decode('utf-8')
return obj return obj
def as_unicode(obj, enc=preferred_encoding):
if not isbytestring(obj):
try:
obj = unicode(obj)
except:
try:
obj = str(obj)
except:
obj = repr(obj)
return force_unicode(obj, enc=enc)
def human_readable(size): def human_readable(size):
""" Convert a size in bytes into a human readable form """ """ Convert a size in bytes into a human readable form """

View File

@ -91,3 +91,19 @@ class NOOK_COLOR(NOOK):
EBOOK_DIR_MAIN = 'My Files/Books' EBOOK_DIR_MAIN = 'My Files/Books'
'''
def create_upload_path(self, path, mdata, fname, create_dirs=True):
filepath = NOOK.create_upload_path(self, path, mdata, fname,
create_dirs=create_dirs)
edm = self.EBOOK_DIR_MAIN.replace('/', os.sep)
npath = os.path.join(edm, _('News')) + os.sep
if npath in filepath:
filepath = filepath.replace(npath, os.sep.join('My Files',
'Magazines')+os.sep)
filedir = os.path.dirname(filepath)
if create_dirs and not os.path.exists(filedir):
os.makedirs(filedir)
return filepath
'''

View File

@ -88,6 +88,7 @@ class Plumber(object):
self.ui_reporter = report_progress self.ui_reporter = report_progress
self.abort_after_input_dump = abort_after_input_dump self.abort_after_input_dump = abort_after_input_dump
# Pipeline options {{{
# Initialize the conversion options that are independent of input and # Initialize the conversion options that are independent of input and
# output formats. The input and output plugins can still disable these # output formats. The input and output plugins can still disable these
# options via recommendations. # options via recommendations.
@ -527,6 +528,7 @@ OptionRecommendation(name='timestamp',
help=_('Set the book timestamp (used by the date column in calibre).')), help=_('Set the book timestamp (used by the date column in calibre).')),
] ]
# }}}
input_fmt = os.path.splitext(self.input)[1] input_fmt = os.path.splitext(self.input)[1]
if not input_fmt: if not input_fmt:

View File

@ -16,7 +16,6 @@ import uuid
from lxml import etree from lxml import etree
from calibre import guess_type
from calibre import prepare_string_for_xml from calibre import prepare_string_for_xml
from calibre.constants import __appname__, __version__ from calibre.constants import __appname__, __version__
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace

View File

@ -10,7 +10,8 @@ from calibre.ebooks.metadata import MetaInformation, string_to_authors
title_pat = re.compile(r'\{\\info.*?\{\\title(.*?)(?<!\\)\}', re.DOTALL) title_pat = re.compile(r'\{\\info.*?\{\\title(.*?)(?<!\\)\}', re.DOTALL)
author_pat = re.compile(r'\{\\info.*?\{\\author(.*?)(?<!\\)\}', re.DOTALL) author_pat = re.compile(r'\{\\info.*?\{\\author(.*?)(?<!\\)\}', re.DOTALL)
comment_pat = re.compile(r'\{\\info.*?\{\\subject(.*?)(?<!\\)\}', re.DOTALL) comment_pat = re.compile(r'\{\\info.*?\{\\subject(.*?)(?<!\\)\}', re.DOTALL)
category_pat = re.compile(r'\{\\info.*?\{\\category(.*?)(?<!\\)\}', re.DOTALL) tags_pat = re.compile(r'\{\\info.*?\{\\category(.*?)(?<!\\)\}', re.DOTALL)
publisher_pat = re.compile(r'\{\\info.*?\{\\manager(.*?)(?<!\\)\}', re.DOTALL)
def get_document_info(stream): def get_document_info(stream):
""" """
@ -82,61 +83,73 @@ def decode(raw, codec):
def get_metadata(stream): def get_metadata(stream):
""" Return metadata as a L{MetaInfo} object """ """ Return metadata as a L{MetaInfo} object """
title, author, comment, category = None, None, None, None
stream.seek(0) stream.seek(0)
if stream.read(5) != r'{\rtf': if stream.read(5) != r'{\rtf':
return MetaInformation(None, None) return MetaInformation(_('Unknown'))
block = get_document_info(stream)[0] block = get_document_info(stream)[0]
if not block: if not block:
return MetaInformation(None, None) return MetaInformation(_('Unknown'))
stream.seek(0) stream.seek(0)
cpg = detect_codepage(stream) cpg = detect_codepage(stream)
stream.seek(0) stream.seek(0)
title_match = title_pat.search(block) title_match = title_pat.search(block)
if title_match: if title_match is not None:
title = decode(title_match.group(1).strip(), cpg) title = decode(title_match.group(1).strip(), cpg)
else:
title = _('Unknown')
author_match = author_pat.search(block) author_match = author_pat.search(block)
if author_match: if author_match is not None:
author = decode(author_match.group(1).strip(), cpg) author = decode(author_match.group(1).strip(), cpg)
comment_match = comment_pat.search(block) else:
if comment_match: author = None
comment = decode(comment_match.group(1).strip(), cpg) mi = MetaInformation(title)
category_match = category_pat.search(block)
if category_match:
category = decode(category_match.group(1).strip(), cpg)
mi = MetaInformation(title, author)
if author: if author:
mi.authors = string_to_authors(author) mi.authors = string_to_authors(author)
mi.comments = comment
mi.category = category comment_match = comment_pat.search(block)
if comment_match is not None:
comment = decode(comment_match.group(1).strip(), cpg)
mi.comments = comment
tags_match = tags_pat.search(block)
if tags_match is not None:
tags = decode(tags_match.group(1).strip(), cpg)
mi.tags = tags
publisher_match = publisher_pat.search(block)
if publisher_match is not None:
publisher = decode(publisher_match.group(1).strip(), cpg)
mi.publisher = publisher
return mi return mi
def create_metadata(stream, options): def create_metadata(stream, options):
md = r'{\info' md = [r'{\info']
if options.title: if options.title:
title = options.title.encode('ascii', 'ignore') title = options.title.encode('ascii', 'ignore')
md += r'{\title %s}'%(title,) md.append(r'{\title %s}'%(title,))
if options.authors: if options.authors:
au = options.authors au = options.authors
if not isinstance(au, basestring): if not isinstance(au, basestring):
au = u', '.join(au) au = u', '.join(au)
author = au.encode('ascii', 'ignore') author = au.encode('ascii', 'ignore')
md += r'{\author %s}'%(author,) md.append(r'{\author %s}'%(author,))
if options.get('category', None):
category = options.category.encode('ascii', 'ignore')
md += r'{\category %s}'%(category,)
comp = options.comment if hasattr(options, 'comment') else options.comments comp = options.comment if hasattr(options, 'comment') else options.comments
if comp: if comp:
comment = comp.encode('ascii', 'ignore') comment = comp.encode('ascii', 'ignore')
md += r'{\subject %s}'%(comment,) md.append(r'{\subject %s}'%(comment,))
if len(md) > 6: if options.publisher:
md += '}' publisher = options.publisher.encode('ascii', 'ignore')
md.append(r'{\manager %s}'%(publisher,))
if options.tags:
tags = u', '.join(options.tags)
tags = tags.encode('ascii', 'ignore')
md.append(r'{\category %s}'%(tags,))
if len(md) > 1:
md.append('}')
stream.seek(0) stream.seek(0)
src = stream.read() src = stream.read()
ans = src[:6] + md + src[6:] ans = src[:6] + u''.join(md) + src[6:]
stream.seek(0) stream.seek(0)
stream.write(ans) stream.write(ans)
@ -156,7 +169,7 @@ def set_metadata(stream, options):
base_pat = r'\{\\name(.*?)(?<!\\)\}' base_pat = r'\{\\name(.*?)(?<!\\)\}'
title = options.title title = options.title
if title != None: if title is not None:
title = title.encode('ascii', 'replace') title = title.encode('ascii', 'replace')
pat = re.compile(base_pat.replace('name', 'title'), re.DOTALL) pat = re.compile(base_pat.replace('name', 'title'), re.DOTALL)
if pat.search(src): if pat.search(src):
@ -164,7 +177,7 @@ def set_metadata(stream, options):
else: else:
src = add_metadata_item(src, 'title', title) src = add_metadata_item(src, 'title', title)
comment = options.comments comment = options.comments
if comment != None: if comment is not None:
comment = comment.encode('ascii', 'replace') comment = comment.encode('ascii', 'replace')
pat = re.compile(base_pat.replace('name', 'subject'), re.DOTALL) pat = re.compile(base_pat.replace('name', 'subject'), re.DOTALL)
if pat.search(src): if pat.search(src):
@ -172,7 +185,7 @@ def set_metadata(stream, options):
else: else:
src = add_metadata_item(src, 'subject', comment) src = add_metadata_item(src, 'subject', comment)
author = options.authors author = options.authors
if author != None: if author is not None:
author = ', '.join(author) author = ', '.join(author)
author = author.encode('ascii', 'ignore') author = author.encode('ascii', 'ignore')
pat = re.compile(base_pat.replace('name', 'author'), re.DOTALL) pat = re.compile(base_pat.replace('name', 'author'), re.DOTALL)
@ -180,14 +193,23 @@ def set_metadata(stream, options):
src = pat.sub(r'{\\author ' + author + r'}', src) src = pat.sub(r'{\\author ' + author + r'}', src)
else: else:
src = add_metadata_item(src, 'author', author) src = add_metadata_item(src, 'author', author)
category = options.get('category', None) tags = options.tags
if category != None: if tags is not None:
category = category.encode('ascii', 'replace') tags = ', '.join(tags)
tags = tags.encode('ascii', 'replace')
pat = re.compile(base_pat.replace('name', 'category'), re.DOTALL) pat = re.compile(base_pat.replace('name', 'category'), re.DOTALL)
if pat.search(src): if pat.search(src):
src = pat.sub(r'{\\category ' + category + r'}', src) src = pat.sub(r'{\\category ' + tags + r'}', src)
else: else:
src = add_metadata_item(src, 'category', category) src = add_metadata_item(src, 'category', tags)
publisher = options.publisher
if publisher is not None:
publisher = publisher.encode('ascii', 'replace')
pat = re.compile(base_pat.replace('name', 'manager'), re.DOTALL)
if pat.search(src):
src = pat.sub(r'{\\manager ' + publisher + r'}', src)
else:
src = add_metadata_item(src, 'manager', publisher)
stream.seek(pos + olen) stream.seek(pos + olen)
after = stream.read() after = stream.read()
stream.seek(pos) stream.seek(pos)

View File

@ -77,7 +77,15 @@ class RTFInput(InputFormatPlugin):
def generate_xml(self, stream): def generate_xml(self, stream):
from calibre.ebooks.rtf2xml.ParseRtf import ParseRtf from calibre.ebooks.rtf2xml.ParseRtf import ParseRtf
ofile = 'out.xml' ofile = 'dataxml.xml'
run_lev, debug_dir = 1, None
if getattr(self.opts, 'debug_pipeline', None) is not None:
try:
os.mkdir(debug_dir)
debug_dir = 'rtfdebug'
run_lev = 4
except:
pass
parser = ParseRtf( parser = ParseRtf(
in_file = stream, in_file = stream,
out_file = ofile, out_file = ofile,
@ -115,43 +123,45 @@ class RTFInput(InputFormatPlugin):
# Write or do not write paragraphs. Default is 0. # Write or do not write paragraphs. Default is 0.
empty_paragraphs = 1, empty_paragraphs = 1,
#debug
deb_dir = debug_dir,
run_level = run_lev,
) )
parser.parse_rtf() parser.parse_rtf()
ans = open('out.xml').read() with open(ofile, 'rb') as f:
os.remove('out.xml') return f.read()
return ans
def extract_images(self, picts): def extract_images(self, picts):
import imghdr
self.log('Extracting images...') self.log('Extracting images...')
with open(picts, 'rb') as f:
raw = f.read()
picts = filter(len, re.findall(r'\{\\pict([^}]+)\}', raw))
hex = re.compile(r'[^a-fA-F0-9]')
encs = [hex.sub('', pict) for pict in picts]
count = 0 count = 0
raw = open(picts, 'rb').read()
starts = []
for match in re.finditer(r'\{\\pict([^}]+)\}', raw):
starts.append(match.start(1))
imap = {} imap = {}
for enc in encs:
for start in starts:
pos, bc = start, 1
while bc > 0:
if raw[pos] == '}': bc -= 1
elif raw[pos] == '{': bc += 1
pos += 1
pict = raw[start:pos+1]
enc = re.sub(r'[^a-zA-Z0-9]', '', pict)
if len(enc) % 2 == 1: if len(enc) % 2 == 1:
enc = enc[:-1] enc = enc[:-1]
data = enc.decode('hex') data = enc.decode('hex')
fmt = imghdr.what(None, data)
if fmt is None:
fmt = 'wmf'
count += 1 count += 1
name = (('%4d'%count).replace(' ', '0'))+'.wmf' name = '%04d.%s' % (count, fmt)
open(name, 'wb').write(data) with open(name, 'wb') as f:
f.write(data)
imap[count] = name imap[count] = name
#open(name+'.hex', 'wb').write(enc) #open(name+'.hex', 'wb').write(enc)
return self.convert_images(imap) return self.convert_images(imap)
def convert_images(self, imap): def convert_images(self, imap):
for count, val in imap.items(): self.default_img = None
for count, val in imap.iteritems():
try: try:
imap[count] = self.convert_image(val) imap[count] = self.convert_image(val)
except: except:
@ -159,11 +169,35 @@ class RTFInput(InputFormatPlugin):
return imap return imap
def convert_image(self, name): def convert_image(self, name):
from calibre.utils.magick import Image if not name.endswith('.wmf'):
img = Image() return name
img.open(name) try:
return self.rasterize_wmf(name)
except:
self.log.exception('Failed to convert WMF image %r'%name)
return self.replace_wmf(name)
def replace_wmf(self, name):
from calibre.ebooks import calibre_cover
if self.default_img is None:
self.default_img = calibre_cover('Conversion of WMF images is not supported',
'Use Microsoft Word or OpenOffice to save this RTF file'
' as HTML and convert that in calibre.', title_size=36,
author_size=20)
name = name.replace('.wmf', '.jpg') name = name.replace('.wmf', '.jpg')
img.save(name) with open(name, 'wb') as f:
f.write(self.default_img)
return name
def rasterize_wmf(self, name):
raise ValueError('Conversion of WMF images not supported')
from calibre.utils.wmf import extract_raster_image
with open(name, 'rb') as f:
data = f.read()
data = extract_raster_image(data)
name = name.replace('.wmf', '.jpg')
with open(name, 'wb') as f:
f.write(data)
return name return name
@ -192,27 +226,27 @@ class RTFInput(InputFormatPlugin):
css += '\n'+'\n'.join(font_size_classes) css += '\n'+'\n'.join(font_size_classes)
css += '\n' +'\n'.join(color_classes) css += '\n' +'\n'.join(color_classes)
for cls, val in border_styles.items(): for cls, val in border_styles.iteritems():
css += '\n\n.%s {\n%s\n}'%(cls, val) css += '\n\n.%s {\n%s\n}'%(cls, val)
with open('styles.css', 'ab') as f: with open('styles.css', 'ab') as f:
f.write(css) f.write(css)
def preprocess(self, fname): # def preprocess(self, fname):
self.log('\tPreprocessing to convert unicode characters') # self.log('\tPreprocessing to convert unicode characters')
try: # try:
data = open(fname, 'rb').read() # data = open(fname, 'rb').read()
from calibre.ebooks.rtf.preprocess import RtfTokenizer, RtfTokenParser # from calibre.ebooks.rtf.preprocess import RtfTokenizer, RtfTokenParser
tokenizer = RtfTokenizer(data) # tokenizer = RtfTokenizer(data)
tokens = RtfTokenParser(tokenizer.tokens) # tokens = RtfTokenParser(tokenizer.tokens)
data = tokens.toRTF() # data = tokens.toRTF()
fname = 'preprocessed.rtf' # fname = 'preprocessed.rtf'
with open(fname, 'wb') as f: # with open(fname, 'wb') as f:
f.write(data) # f.write(data)
except: # except:
self.log.exception( # self.log.exception(
'Failed to preprocess RTF to convert unicode sequences, ignoring...') # 'Failed to preprocess RTF to convert unicode sequences, ignoring...')
return fname # return fname
def convert_borders(self, doc): def convert_borders(self, doc):
border_styles = [] border_styles = []
@ -249,17 +283,14 @@ class RTFInput(InputFormatPlugin):
self.log = log self.log = log
self.log('Converting RTF to XML...') self.log('Converting RTF to XML...')
#Name of the preprocesssed RTF file #Name of the preprocesssed RTF file
fname = self.preprocess(stream.name) # fname = self.preprocess(stream.name)
try: try:
xml = self.generate_xml(fname) xml = self.generate_xml(stream.name)
except RtfInvalidCodeException, e: except RtfInvalidCodeException, e:
raise
raise ValueError(_('This RTF file has a feature calibre does not ' raise ValueError(_('This RTF file has a feature calibre does not '
'support. Convert it to HTML first and then try it.\n%s')%e) 'support. Convert it to HTML first and then try it.\n%s')%e)
'''dataxml = open('dataxml.xml', 'w')
dataxml.write(xml)
dataxml.close'''
d = glob.glob(os.path.join('*_rtf_pict_dir', 'picts.rtf')) d = glob.glob(os.path.join('*_rtf_pict_dir', 'picts.rtf'))
if d: if d:
imap = {} imap = {}

View File

@ -17,7 +17,8 @@
######################################################################### #########################################################################
# $Revision: 1.41 $ # $Revision: 1.41 $
# $Date: 2006/03/24 23:50:07 $ # $Date: 2006/03/24 23:50:07 $
import sys,os import sys, os
from calibre.ebooks.rtf2xml import headings_to_sections, \ from calibre.ebooks.rtf2xml import headings_to_sections, \
line_endings, footnote, fields_small, default_encoding, \ line_endings, footnote, fields_small, default_encoding, \
make_lists, preamble_div, header, colors, group_borders, \ make_lists, preamble_div, header, colors, group_borders, \
@ -90,7 +91,6 @@ class ParseRtf:
out_file = '', out_file = '',
out_dir = None, out_dir = None,
dtd = '', dtd = '',
#debug = 0, #why? calibre
deb_dir = None, deb_dir = None,
convert_symbol = None, convert_symbol = None,
convert_wingdings = None, convert_wingdings = None,
@ -107,6 +107,7 @@ class ParseRtf:
no_dtd = 0, no_dtd = 0,
char_data = '', char_data = '',
): ):
""" """
Requires: Requires:
'file' --file to parse 'file' --file to parse
@ -119,12 +120,11 @@ class ParseRtf:
script tries to output to directory where is script is exectued.) script tries to output to directory where is script is exectued.)
'deb_dir' --debug directory. If a debug_dir is provided, the script 'deb_dir' --debug directory. If a debug_dir is provided, the script
will copy each run through as a file to examine in the debug_dir will copy each run through as a file to examine in the debug_dir
'perl_script'--use perl to make tokens. This runs just a bit faster.
(I will probably phase this out.)
'check_brackets' -- make sure the brackets match up after each run 'check_brackets' -- make sure the brackets match up after each run
through a file. Only for debugging. through a file. Only for debugging.
Returns: Nothing Returns: Nothing
""" """
self.__file = in_file self.__file = in_file
self.__out_file = out_file self.__out_file = out_file
self.__out_dir = out_dir self.__out_dir = out_dir
@ -132,7 +132,7 @@ class ParseRtf:
self.__dtd_path = dtd self.__dtd_path = dtd
self.__check_file(in_file,"file_to_parse") self.__check_file(in_file,"file_to_parse")
self.__char_data = char_data self.__char_data = char_data
self.__debug_dir = deb_dir #self.__debug_dir = debug calibre self.__debug_dir = deb_dir
self.__check_dir(self.__temp_dir) self.__check_dir(self.__temp_dir)
self.__copy = self.__check_dir(self.__debug_dir) self.__copy = self.__check_dir(self.__debug_dir)
self.__convert_caps = convert_caps self.__convert_caps = convert_caps
@ -155,25 +155,24 @@ class ParseRtf:
if hasattr(the_file, 'read'): return if hasattr(the_file, 'read'): return
if the_file == None: if the_file == None:
if type == "file_to_parse": if type == "file_to_parse":
message = "You must provide a file for the script to work" msg = "\nYou must provide a file for the script to work"
msg = message
raise RtfInvalidCodeException, msg raise RtfInvalidCodeException, msg
elif os.path.exists(the_file): elif os.path.exists(the_file):
pass # do nothing pass # do nothing
else: else:
message = "The file '%s' cannot be found" % the_file msg = "\nThe file '%s' cannot be found" % the_file
msg = message
raise RtfInvalidCodeException, msg raise RtfInvalidCodeException, msg
def __check_dir(self, the_dir): def __check_dir(self, the_dir):
"""Check to see if directory exists""" """Check to see if directory exists"""
if not the_dir : if not the_dir :
return return
dir_exists = os.path.isdir(the_dir) dir_exists = os.path.isdir(the_dir)
if not dir_exists: if not dir_exists:
message = "%s is not a directory" % the_dir msg = "\n%s is not a directory" % the_dir
msg = message
raise RtfInvalidCodeException, msg raise RtfInvalidCodeException, msg
return 1 return 1
def parse_rtf(self): def parse_rtf(self):
""" """
Parse the file by calling on other classes. Parse the file by calling on other classes.
@ -194,13 +193,14 @@ class ParseRtf:
copy_obj.set_dir(self.__debug_dir) copy_obj.set_dir(self.__debug_dir)
copy_obj.remove_files() copy_obj.remove_files()
copy_obj.copy_file(self.__temp_file, "original_file") copy_obj.copy_file(self.__temp_file, "original_file")
# new as of 2005-08-02. Do I want this? # Function to check if bracket are well handled
if self.__debug_dir or self.__run_level > 2: if self.__debug_dir or self.__run_level > 2:
self.__check_brack_obj = check_brackets.CheckBrackets\ self.__check_brack_obj = check_brackets.CheckBrackets\
(file = self.__temp_file, (file = self.__temp_file,
bug_handler = RtfInvalidCodeException, bug_handler = RtfInvalidCodeException,
) )
# convert Macintosh line endings to Unix line endings #convert Macintosh and Windows line endings to Unix line endings
#why do this if you don't wb after?
line_obj = line_endings.FixLineEndings( line_obj = line_endings.FixLineEndings(
in_file = self.__temp_file, in_file = self.__temp_file,
bug_handler = RtfInvalidCodeException, bug_handler = RtfInvalidCodeException,
@ -208,13 +208,13 @@ class ParseRtf:
run_level = self.__run_level, run_level = self.__run_level,
replace_illegals = self.__replace_illegals, replace_illegals = self.__replace_illegals,
) )
return_value = line_obj.fix_endings() return_value = line_obj.fix_endings() #calibre return what?
self.__return_code(return_value) self.__return_code(return_value)
tokenize_obj = tokenize.Tokenize( tokenize_obj = tokenize.Tokenize(
bug_handler = RtfInvalidCodeException, bug_handler = RtfInvalidCodeException,
in_file = self.__temp_file, in_file = self.__temp_file,
copy = self.__copy, copy = self.__copy,
run_level = self.__run_level,) run_level = self.__run_level)
tokenize_obj.tokenize() tokenize_obj.tokenize()
process_tokens_obj = process_tokens.ProcessTokens( process_tokens_obj = process_tokens.ProcessTokens(
in_file = self.__temp_file, in_file = self.__temp_file,
@ -230,12 +230,25 @@ class ParseRtf:
os.remove(self.__temp_file) os.remove(self.__temp_file)
except OSError: except OSError:
pass pass
#Check to see if the file is correctly encoded
encode_obj = default_encoding.DefaultEncoding(
in_file = self.__temp_file,
run_level = self.__run_level,
bug_handler = RtfInvalidCodeException,
check_raw = True,
)
platform, code_page, default_font_num = encode_obj.find_default_encoding()
check_encoding_obj = check_encoding.CheckEncoding( check_encoding_obj = check_encoding.CheckEncoding(
bug_handler = RtfInvalidCodeException, bug_handler = RtfInvalidCodeException,
) )
check_encoding_obj.check_encoding(self.__file) enc = encode_obj.get_codepage()
sys.stderr.write('File "%s" does not appear to be RTF.\n' % self.__file if isinstance(self.__file, str) else self.__file.encode('utf-8')) if enc != 'mac_roman':
raise InvalidRtfException, msg enc = 'cp' + enc
if check_encoding_obj.check_encoding(self.__file, enc):
file_name = self.__file if isinstance(self.__file, str) \
else self.__file.encode('utf-8')
msg = 'File %s does not appear to be correctly encoded.\n' % file_name
raise InvalidRtfException, msg
delete_info_obj = delete_info.DeleteInfo( delete_info_obj = delete_info.DeleteInfo(
in_file = self.__temp_file, in_file = self.__temp_file,
copy = self.__copy, copy = self.__copy,
@ -508,6 +521,7 @@ class ParseRtf:
indent = self.__indent, indent = self.__indent,
run_level = self.__run_level, run_level = self.__run_level,
no_dtd = self.__no_dtd, no_dtd = self.__no_dtd,
encoding = encode_obj.get_codepage(),
bug_handler = RtfInvalidCodeException, bug_handler = RtfInvalidCodeException,
) )
tags_obj.convert_to_tags() tags_obj.convert_to_tags()
@ -520,35 +534,28 @@ class ParseRtf:
output_obj.output() output_obj.output()
os.remove(self.__temp_file) os.remove(self.__temp_file)
return self.__exit_level return self.__exit_level
def __bracket_match(self, file_name): def __bracket_match(self, file_name):
if self.__run_level > 2: if self.__run_level > 2:
good_br, msg = self.__check_brack_obj.check_brackets() good_br, msg = self.__check_brack_obj.check_brackets()
if good_br: if good_br:
pass pass
# sys.stderr.write( msg + ' in ' + file_name + "\n") #sys.stderr.write( msg + ' in ' + file_name + "\n")
else: else:
msg += msg + " in file '" + file_name + "'\n" msg = '%s in file %s\n' % (msg, file_name)
raise RtfInvalidCodeException, msg raise RtfInvalidCodeException, msg
def __return_code(self, num): def __return_code(self, num):
if num == None: if num == None:
return return
if int(num) > self.__exit_level: if int(num) > self.__exit_level:
self.__exit_level = num self.__exit_level = num
def __make_temp_file(self,file): def __make_temp_file(self,file):
"""Make a temporary file to parse""" """Make a temporary file to parse"""
write_file="rtf_write_file" write_file="rtf_write_file"
read_obj = file if hasattr(file, 'read') else open(file,'r') read_obj = file if hasattr(file, 'read') else open(file,'r')
write_obj = open(write_file, 'w') with open(write_file, 'wb') as write_obj:
line = "dummy" for line in read_obj:
while line: write_obj.write(line)
line = read_obj.read(1000)
write_obj.write(line )
write_obj.close()
return write_file return write_file
"""
mi<tg<open______<style-sheet\n
mi<tg<close_____<style-sheet\n
mi<tg<open-att__<footnote<num>1\n
mi<tg<empty-att_<page-definition<margin>33\n
mi<tg<empty_____<para\n
"""

View File

@ -24,38 +24,38 @@ class CheckBrackets:
self.__ob_count = 0 self.__ob_count = 0
self.__cb_count = 0 self.__cb_count = 0
self.__open_bracket_num = [] self.__open_bracket_num = []
def open_brack(self, line): def open_brack(self, line):
num = line[-5:-1] num = line[-5:-1]
self.__open_bracket_num.append(num) self.__open_bracket_num.append(num)
self.__bracket_count += 1 self.__bracket_count += 1
def close_brack(self, line): def close_brack(self, line):
num = line[-5:-1] num = line[-5:-1]
##self.__open_bracket_num.append(num)
try: try:
last_num = self.__open_bracket_num.pop() last_num = self.__open_bracket_num.pop()
except: except:
return 0 return False
if num != last_num: if num != last_num:
return 0 return False
self.__bracket_count -= 1 self.__bracket_count -= 1
return 1 return True
def check_brackets(self): def check_brackets(self):
read_obj = open(self.__file, 'r')
line = 'dummy'
line_count = 0 line_count = 0
while line: with open(self.__file, 'r') as read_obj:
line_count += 1 for line in read_obj:
line = read_obj.readline() line_count += 1
self.__token_info = line[:16] self.__token_info = line[:16]
if self.__token_info == 'ob<nu<open-brack': if self.__token_info == 'ob<nu<open-brack':
self.open_brack(line) self.open_brack(line)
if self.__token_info == 'cb<nu<clos-brack': if self.__token_info == 'cb<nu<clos-brack':
right_count = self.close_brack(line) if not self.close_brack(line):
if not right_count: return (False, "closed bracket doesn't match, line %s" % line_count)
return (0, "closed bracket doesn't match, line %s" % line_count)
read_obj.close()
if self.__bracket_count != 0: if self.__bracket_count != 0:
msg = 'At end of file open and closed brackets don\'t match\n' msg = ('At end of file open and closed brackets don\'t match\n' \
msg = msg + 'total number of brackets is %s' % self.__bracket_count 'total number of brackets is %s') % self.__bracket_count
return (0, msg) return (False, msg)
return (1, "brackets match!") return (True, "Brackets match!")

View File

@ -1,8 +1,11 @@
#!/usr/bin/env python #!/usr/bin/env python
import sys import sys
class CheckEncoding: class CheckEncoding:
def __init__(self, bug_handler): def __init__(self, bug_handler):
self.__bug_handler = bug_handler self.__bug_handler = bug_handler
def __get_position_error(self, line, encoding, line_num): def __get_position_error(self, line, encoding, line_num):
char_position = 0 char_position = 0
for char in line: for char in line:
@ -12,21 +15,23 @@ class CheckEncoding:
except UnicodeError, msg: except UnicodeError, msg:
sys.stderr.write('line: %s char: %s\n' % (line_num, char_position)) sys.stderr.write('line: %s char: %s\n' % (line_num, char_position))
sys.stderr.write(str(msg) + '\n') sys.stderr.write(str(msg) + '\n')
def check_encoding(self, path, encoding='us-ascii'):
read_obj = open(path, 'r') def check_encoding(self, path, encoding='us-ascii', verbose=True):
line_to_read = 1
line_num = 0 line_num = 0
while line_to_read: with open(path, 'r') as read_obj:
line_num += 1 for line in read_obj:
line_to_read = read_obj.readline() line_num += 1
line = line_to_read try:
try: line.decode(encoding)
line.decode(encoding) except UnicodeError:
except UnicodeError: if verbose:
if len(line) < 1000: if len(line) < 1000:
self.__get_position_error(line, encoding, line_num) self.__get_position_error(line, encoding, line_num)
else: else:
sys.stderr.write('line: %d has bad encoding\n'%line_num) sys.stderr.write('line: %d has bad encoding\n' % line_num)
return True
return False
if __name__ == '__main__': if __name__ == '__main__':
check_encoding_obj = CheckEncoding() check_encoding_obj = CheckEncoding()
check_encoding_obj.check_encoding(sys.argv[1]) check_encoding_obj.check_encoding(sys.argv[1])

View File

@ -16,7 +16,9 @@
# # # #
######################################################################### #########################################################################
import os, tempfile import os, tempfile
from calibre.ebooks.rtf2xml import copy from calibre.ebooks.rtf2xml import copy
class CombineBorders: class CombineBorders:
"""Combine borders in RTF tokens to make later processing easier""" """Combine borders in RTF tokens to make later processing easier"""
def __init__(self, def __init__(self,
@ -32,28 +34,31 @@ class CombineBorders:
self.__state = 'default' self.__state = 'default'
self.__bord_pos = 'default' self.__bord_pos = 'default'
self.__bord_att = [] self.__bord_att = []
def found_bd(self, line): def found_bd(self, line):
#cw<bd<bor-t-r-vi #cw<bd<bor-t-r-vi
self.__state = 'border' self.__state = 'border'
self.__bord_pos = line[6:16] self.__bord_pos = line[6:16]
def __default_func(self, line): def __default_func(self, line):
#cw<bd<bor-t-r-vi #cw<bd<bor-t-r-vi
if self.__first_five == 'cw<bd': if self.__first_five == 'cw<bd':
self.found_bd(line) self.found_bd(line)
return '' return ''
return line return line
def end_border(self, line, write_obj): def end_border(self, line, write_obj):
joiner = "|" border_string = "|".join(self.__bord_att)
border_string = joiner.join(self.__bord_att)
self.__bord_att = [] self.__bord_att = []
write_obj.write('cw<bd<%s<nu<%s\n' % (self.__bord_pos, write_obj.write('cw<bd<%s<nu<%s\n' % (self.__bord_pos,
border_string)) border_string))
self.__state = 'default' self.__state = 'default'
self.__bord_string = '' self.__bord_string = ''
if self.__first_five == 'cw<bd': if self.__first_five == 'cw<bd':
self. found_bd(line) self. found_bd(line)
else: else:
write_obj.write(line) write_obj.write(line)
def add_to_border_desc(self, line): def add_to_border_desc(self, line):
#cw<bt<bdr-hair__<nu<true #cw<bt<bdr-hair__<nu<true
#cw<bt<bdr-linew<nu<0.50 #cw<bt<bdr-linew<nu<0.50
@ -65,26 +70,22 @@ class CombineBorders:
else: else:
num = ':' + num num = ':' + num
self.__bord_att.append(border_desc + num) self.__bord_att.append(border_desc + num)
def __border_func(self, line, write_obj): def __border_func(self, line, write_obj):
if self.__first_five != 'cw<bt': if self.__first_five != 'cw<bt':
self.end_border(line, write_obj) self.end_border(line, write_obj)
else: else:
self.add_to_border_desc(line) self.add_to_border_desc(line)
def combine_borders(self): def combine_borders(self):
read_obj = open(self.__file, 'r') with open(self.__file, 'r') as read_obj:
write_obj = open(self.__write_to, 'w') with open(self.__write_to, 'w') as write_obj:
line_to_read = 'dummy' for line in read_obj:
while line_to_read: self.__first_five = line[0:5]
line_to_read = read_obj.readline() if self.__state == 'border':
line = line_to_read self.__border_func(line, write_obj)
self.__first_five = line[0:5] else:
if self.__state == 'border': write_obj.write(self.__default_func(line))
self.__border_func(line, write_obj)
else:
to_print = self.__default_func(line)
write_obj.write(to_print)
read_obj.close()
write_obj.close()
copy_obj = copy.Copy(bug_handler = self.__bug_handler) copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy: if self.__copy:
copy_obj.copy_file(self.__write_to, "combine_borders.data") copy_obj.copy_file(self.__write_to, "combine_borders.data")

View File

@ -1,6 +1,9 @@
import os, tempfile import os, tempfile, sys
from calibre.ebooks.rtf2xml import copy
from calibre.ebooks.rtf2xml import copy, check_encoding
public_dtd = 'rtf2xml1.0.dtd' public_dtd = 'rtf2xml1.0.dtd'
class ConvertToTags: class ConvertToTags:
""" """
Convert file to XML Convert file to XML
@ -10,6 +13,7 @@ class ConvertToTags:
bug_handler, bug_handler,
dtd_path, dtd_path,
no_dtd, no_dtd,
encoding,
indent = None, indent = None,
copy = None, copy = None,
run_level = 1, run_level = 1,
@ -29,9 +33,14 @@ class ConvertToTags:
self.__copy = copy self.__copy = copy
self.__dtd_path = dtd_path self.__dtd_path = dtd_path
self.__no_dtd = no_dtd self.__no_dtd = no_dtd
if encoding != 'mac_roman':
self.__encoding = 'cp' + encoding
else:
self.__encoding = 'mac_roman'
self.__indent = indent self.__indent = indent
self.__run_level = run_level self.__run_level = run_level
self.__write_to = tempfile.mktemp() self.__write_to = tempfile.mktemp()
def __initiate_values(self): def __initiate_values(self):
""" """
Set values, including those for the dictionary. Set values, including those for the dictionary.
@ -61,6 +70,7 @@ class ConvertToTags:
'tx<ut<__________' : self.__text_func, 'tx<ut<__________' : self.__text_func,
'mi<tg<empty_____' : self.__empty_func, 'mi<tg<empty_____' : self.__empty_func,
} }
def __open_func(self, line): def __open_func(self, line):
""" """
Print the opening tag and newlines when needed. Print the opening tag and newlines when needed.
@ -73,6 +83,7 @@ class ConvertToTags:
if info in self.__two_new_line: if info in self.__two_new_line:
self.__write_extra_new_line() self.__write_extra_new_line()
self.__write_obj.write('<%s>' % info) self.__write_obj.write('<%s>' % info)
def __empty_func(self, line): def __empty_func(self, line):
""" """
Print out empty tag and newlines when needed. Print out empty tag and newlines when needed.
@ -85,10 +96,11 @@ class ConvertToTags:
self.__write_new_line() self.__write_new_line()
if info in self.__two_new_line: if info in self.__two_new_line:
self.__write_extra_new_line() self.__write_extra_new_line()
def __open_att_func(self, line): def __open_att_func(self, line):
""" """
Process lines for open tags that have attributes. Process lines for open tags that have attributes.
The important infor is between [17:-1]. Take this info and split it The important info is between [17:-1]. Take this info and split it
with the delimeter '<'. The first token in this group is the element with the delimeter '<'. The first token in this group is the element
name. The rest are attributes, separated fromt their values by '>'. So name. The rest are attributes, separated fromt their values by '>'. So
read each token one at a time, and split them by '>'. read each token one at a time, and split them by '>'.
@ -119,6 +131,7 @@ class ConvertToTags:
self.__write_new_line() self.__write_new_line()
if element_name in self.__two_new_line: if element_name in self.__two_new_line:
self.__write_extra_new_line() self.__write_extra_new_line()
def __empty_att_func(self, line): def __empty_att_func(self, line):
""" """
Same as the __open_att_func, except a '/' is placed at the end of the tag. Same as the __open_att_func, except a '/' is placed at the end of the tag.
@ -143,6 +156,7 @@ class ConvertToTags:
self.__write_new_line() self.__write_new_line()
if element_name in self.__two_new_line: if element_name in self.__two_new_line:
self.__write_extra_new_line() self.__write_extra_new_line()
def __close_func(self, line): def __close_func(self, line):
""" """
Print out the closed tag and new lines, if appropriate. Print out the closed tag and new lines, if appropriate.
@ -156,6 +170,7 @@ class ConvertToTags:
self.__write_new_line() self.__write_new_line()
if info in self.__two_new_line: if info in self.__two_new_line:
self.__write_extra_new_line() self.__write_extra_new_line()
def __text_func(self, line): def __text_func(self, line):
""" """
Simply print out the information between [17:-1] Simply print out the information between [17:-1]
@ -163,6 +178,7 @@ class ConvertToTags:
#tx<nu<__________<Normal; #tx<nu<__________<Normal;
# change this! # change this!
self.__write_obj.write(line[17:-1]) self.__write_obj.write(line[17:-1])
def __write_extra_new_line(self): def __write_extra_new_line(self):
""" """
Print out extra new lines if the new lines have not exceeded two. If Print out extra new lines if the new lines have not exceeded two. If
@ -172,8 +188,10 @@ class ConvertToTags:
return return
if self.__new_line < 2: if self.__new_line < 2:
self.__write_obj.write('\n') self.__write_obj.write('\n')
def __default_func(self, line): def __default_func(self, line):
pass pass
def __write_new_line(self): def __write_new_line(self):
""" """
Print out a new line if a new line has not already been printed out. Print out a new line if a new line has not already been printed out.
@ -183,11 +201,23 @@ class ConvertToTags:
if not self.__new_line: if not self.__new_line:
self.__write_obj.write('\n') self.__write_obj.write('\n')
self.__new_line += 1 self.__new_line += 1
def __write_dec(self): def __write_dec(self):
""" """
Write the XML declaration at the top of the document. Write the XML declaration at the top of the document.
""" """
self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>') #keep maximum compatibility with previous version
check_encoding_obj = check_encoding.CheckEncoding(
bug_handler=self.__bug_handler)
if not check_encoding_obj.check_encoding(self.__file, verbose=False):
self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
elif not check_encoding_obj.check_encoding(self.__file, self.__encoding):
self.__write_obj.write('<?xml version="1.0" encoding="%s" ?>' % self.__encoding)
else:
self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
sys.stderr.write('Bad RTF encoding, revert to US-ASCII chars and'
' hope for the best')
self.__new_line = 0 self.__new_line = 0
self.__write_new_line() self.__write_new_line()
if self.__no_dtd: if self.__no_dtd:
@ -207,6 +237,7 @@ class ConvertToTags:
) )
self.__new_line = 0 self.__new_line = 0
self.__write_new_line() self.__write_new_line()
def convert_to_tags(self): def convert_to_tags(self):
""" """
Read in the file one line at a time. Get the important info, between Read in the file one line at a time. Get the important info, between
@ -222,18 +253,14 @@ class ConvertToTags:
an empty tag function. an empty tag function.
""" """
self.__initiate_values() self.__initiate_values()
read_obj = open(self.__file, 'r')
self.__write_obj = open(self.__write_to, 'w') self.__write_obj = open(self.__write_to, 'w')
self.__write_dec() self.__write_dec()
line_to_read = 1 with open(self.__file, 'r') as read_obj:
while line_to_read: for line in read_obj:
line_to_read = read_obj.readline() self.__token_info = line[:16]
line = line_to_read action = self.__state_dict.get(self.__token_info)
self.__token_info = line[:16] if action is not None:
action = self.__state_dict.get(self.__token_info) action(line)
if action != None:
action(line)
read_obj.close()
self.__write_obj.close() self.__write_obj.close()
copy_obj = copy.Copy(bug_handler = self.__bug_handler) copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy: if self.__copy:

View File

@ -23,6 +23,7 @@ class Copy:
def __init__(self, bug_handler, file = None, deb_dir = None, ): def __init__(self, bug_handler, file = None, deb_dir = None, ):
self.__file = file self.__file = file
self.__bug_handler = bug_handler self.__bug_handler = bug_handler
def set_dir(self, deb_dir): def set_dir(self, deb_dir):
"""Set the temporary directory to write files to""" """Set the temporary directory to write files to"""
if deb_dir is None: if deb_dir is None:
@ -33,19 +34,11 @@ class Copy:
message = "%(deb_dir)s is not a directory" % vars() message = "%(deb_dir)s is not a directory" % vars()
raise self.__bug_handler , message raise self.__bug_handler , message
Copy.__dir = deb_dir Copy.__dir = deb_dir
def remove_files(self ): def remove_files(self ):
"""Remove files from directory""" """Remove files from directory"""
self.__remove_the_files(Copy.__dir) self.__remove_the_files(Copy.__dir)
"""
list_of_files = os.listdir(Copy.__dir)
list_of_files = os.listdir(the_dir)
for file in list_of_files:
rem_file = os.path.join(Copy.__dir,file)
if os.path.isdir(rem_file):
self.remove_files(rem_file)
else:
os.remove(rem_file)
"""
def __remove_the_files(self, the_dir): def __remove_the_files(self, the_dir):
"""Remove files from directory""" """Remove files from directory"""
list_of_files = os.listdir(the_dir) list_of_files = os.listdir(the_dir)
@ -58,6 +51,7 @@ class Copy:
os.remove(rem_file) os.remove(rem_file)
except OSError: except OSError:
pass pass
def copy_file(self, file, new_file): def copy_file(self, file, new_file):
""" """
Copy the file to a new name Copy the file to a new name

View File

@ -1,61 +1,142 @@
######################################################################### #########################################################################
# # # #
# #
# copyright 2002 Paul Henry Tremblay # # copyright 2002 Paul Henry Tremblay #
# # # #
# This program is distributed in the hope that it will be useful, #
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
# General Public License for more details. #
# #
# You should have received a copy of the GNU General Public License #
# along with this program; if not, write to the Free Software #
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA #
# 02111-1307 USA #
# #
# #
######################################################################### #########################################################################
'''
Codepages as to RTF 1.9.1:
437 United States IBM
708 Arabic (ASMO 708)
709 Arabic (ASMO 449+, BCON V4)
710 Arabic (transparent Arabic)
711 Arabic (Nafitha Enhanced)
720 Arabic (transparent ASMO)
819 Windows 3.1 (United States and Western Europe)
850 IBM multilingual
852 Eastern European
860 Portuguese
862 Hebrew
863 French Canadian
864 Arabic
865 Norwegian
866 Soviet Union
874 Thai
932 Japanese
936 Simplified Chinese
949 Korean
950 Traditional Chinese
1250 Eastern European
1251 Cyrillic
1252 Western European
1253 Greek
1254 Turkish
1255 Hebrew
1256 Arabic
1257 Baltic
1258 Vietnamese
1361 Johab
10000 MAC Roman
10001 MAC Japan
10004 MAC Arabic
10005 MAC Hebrew
10006 MAC Greek
10007 MAC Cyrillic
10029 MAC Latin2
10081 MAC Turkish
57002 Devanagari
57003 Bengali
57004 Tamil
57005 Telugu
57006 Assamese
57007 Oriya
57008 Kannada
57009 Malayalam
57010 Gujarati
57011 Punjabi
'''
import re
class DefaultEncoding: class DefaultEncoding:
""" """
Find the default encoding for the doc Find the default encoding for the doc
""" """
def __init__(self, in_file, bug_handler, run_level = 1,): def __init__(self, in_file, bug_handler, run_level = 1, check_raw = False):
"""
Required:
'file'
Returns:
nothing
"""
self.__file = in_file self.__file = in_file
self.__bug_handler = bug_handler self.__bug_handler = bug_handler
self.__platform = 'Windows'
self.__default_num = 'not-defined'
self.__code_page = '1252'
self.__datafetched = False
self.__fetchraw = check_raw
def find_default_encoding(self): def find_default_encoding(self):
platform = 'Windows' if not self.__datafetched:
default_num = 'not-defined' self._encoding()
code_page = 'ansicpg1252' self.__datafetched = True
read_obj = open(self.__file, 'r') if self.__platform == 'Macintosh':
line_to_read = 1 code_page = self.__code_page
while line_to_read: else:
line_to_read = read_obj.readline() code_page = 'ansicpg' + self.__code_page
line = line_to_read return self.__platform, code_page, self.__default_num
self.__token_info = line[:16]
if self.__token_info == 'mi<mk<rtfhed-end': def get_codepage(self):
break if not self.__datafetched:
if self.__token_info == 'cw<ri<ansi-codpg': self._encoding()
#cw<ri<ansi-codpg<nu<10000 self.__datafetched = True
num = line[20:-1] return self.__code_page
if not num:
num = '1252' def get_platform(self):
code_page = 'ansicpg' + num if not self.__datafetched:
if self.__token_info == 'cw<ri<macintosh_': self._encoding()
platform = 'Macintosh' self.__datafetched = True
if self.__token_info == 'cw<ri<deflt-font': return self.__platform
default_num = line[20:-1]
#cw<ri<deflt-font<nu<0 def _encoding(self):
#action = self.__state_dict.get(self.__state) with open(self.__file, 'r') as read_obj:
#if action == None: if not self.__fetchraw:
#print self.__state for line in read_obj:
#action(line) self.__token_info = line[:16]
read_obj.close() if self.__token_info == 'mi<mk<rtfhed-end':
if platform == 'Macintosh': break
code_page = 'mac_roman' if self.__token_info == 'cw<ri<ansi-codpg':
return platform, code_page, default_num #cw<ri<ansi-codpg<nu<10000
self.__code_page = line[20:-1] if int(line[20:-1]) \
else '1252'
if self.__token_info == 'cw<ri<macintosh_':
self.__platform = 'Macintosh'
self.__code_page = 'mac_roman'
elif self.__token_info == 'cw<ri<pc________':
self.__platform = 'IBMPC'
self.__code_page = '437'
elif self.__token_info == 'cw<ri<pca_______':
self.__platform = 'OS/2'
self.__code_page = '850'
if self.__token_info == 'cw<ri<deflt-font':
self.__default_num = line[20:-1]
#cw<ri<deflt-font<nu<0
else:
fenc = re.compile(r'\\(mac|pc|ansi|pca)[\\ \{\}\t\n]+')
fenccp = re.compile(r'\\ansicpg(\d+)[\\ \{\}\t\n]+')
for line in read_obj:
if fenccp.search(line):
cp = fenccp.search(line).group(1)
if not int(cp):
self.__code_page = cp
break
if fenc.search(line):
enc = fenc.search(line).group(1)
if enc == 'mac':
self.__code_page = 'mac_roman'
elif enc == 'pc':
self.__code_page = '437'
elif enc == 'pca':
self.__code_page = '850'
# if __name__ == '__main__':
# encode_obj = DefaultEncoding(
# in_file = sys.argv[1],
# bug_handler = Exception,
# check_raw = True,
# )
# print encode_obj.get_codepage()

View File

@ -16,7 +16,9 @@
# # # #
######################################################################### #########################################################################
import sys, os, tempfile import sys, os, tempfile
from calibre.ebooks.rtf2xml import copy from calibre.ebooks.rtf2xml import copy
class DeleteInfo: class DeleteInfo:
"""Delelet unecessary destination groups""" """Delelet unecessary destination groups"""
def __init__(self, def __init__(self,
@ -29,17 +31,18 @@ class DeleteInfo:
self.__bug_handler = bug_handler self.__bug_handler = bug_handler
self.__copy = copy self.__copy = copy
self.__write_to = tempfile.mktemp() self.__write_to = tempfile.mktemp()
self.__bracket_count=0 self.__bracket_count= 0
self.__ob_count = 0 self.__ob_count = 0
self.__cb_count = 0 self.__cb_count = 0
self.__after_asterisk = 0 # self.__after_asterisk = False
self.__delete = 0 # self.__delete = 0
self.__initiate_allow() self.__initiate_allow()
self.__ob = 0 self.__ob = 0
self.__write_cb = 0 self.__write_cb = False
self.__run_level = run_level self.__run_level = run_level
self.__found_delete = 0 self.__found_delete = False
self.__list = 0 # self.__list = False
def __initiate_allow(self): def __initiate_allow(self):
""" """
Initiate a list of destination groups which should be printed out. Initiate a list of destination groups which should be printed out.
@ -66,9 +69,10 @@ class DeleteInfo:
self.__state_dict = { self.__state_dict = {
'default' : self.__default_func, 'default' : self.__default_func,
'after_asterisk' : self.__asterisk_func, 'after_asterisk' : self.__asterisk_func,
'delete' : self.__delete_func, 'delete' : self.__delete_func,
'list' : self.__list_func, 'list' : self.__list_func,
} }
def __default_func(self,line): def __default_func(self,line):
"""Handle lines when in no special state. Look for an asterisk to """Handle lines when in no special state. Look for an asterisk to
begin a special state. Otherwise, print out line.""" begin a special state. Otherwise, print out line."""
@ -81,27 +85,29 @@ class DeleteInfo:
if self.__ob: if self.__ob:
self.__write_obj.write(self.__ob) self.__write_obj.write(self.__ob)
self.__ob = line self.__ob = line
return 0 return False
else: else:
# write previous bracket, since didn't fine asterisk # write previous bracket, since didn't fine asterisk
if self.__ob: if self.__ob:
self.__write_obj.write(self.__ob) self.__write_obj.write(self.__ob)
self.__ob = 0 self.__ob = 0
return 1 return True
def __delete_func(self,line): def __delete_func(self,line):
"""Handle lines when in delete state. Don't print out lines """Handle lines when in delete state. Don't print out lines
unless the state has ended.""" unless the state has ended."""
if self.__delete_count == self.__cb_count: if self.__delete_count == self.__cb_count:
self.__state = 'default' self.__state = 'default'
if self.__write_cb: if self.__write_cb:
self.__write_cb = 0 self.__write_cb = True
return 1 return True
return 0 return False
def __asterisk_func(self,line): def __asterisk_func(self,line):
""" """
Determine whether to delete info in group Determine whether to delete info in group
Note on self.__cb flag. Note on self.__cb flag.
If you find that you are in a delete group, and the preivous If you find that you are in a delete group, and the previous
token in not an open bracket (self.__ob = 0), that means token in not an open bracket (self.__ob = 0), that means
that the delete group is nested inside another acceptable that the delete group is nested inside another acceptable
detination group. In this case, you have alrady written detination group. In this case, you have alrady written
@ -110,21 +116,21 @@ class DeleteInfo:
""" """
# Test for {\*}, in which case don't enter # Test for {\*}, in which case don't enter
# delete state # delete state
self.__after_asterisk = 0 # only enter this function once # self.__after_asterisk = False # only enter this function once
self.__found_delete = 1 self.__found_delete = True
if self.__token_info == 'cb<nu<clos-brack': if self.__token_info == 'cb<nu<clos-brack':
if self.__delete_count == self.__cb_count: if self.__delete_count == self.__cb_count:
self.__state = 'default' self.__state = 'default'
self.__ob = 0 self.__ob = 0
# changed this because haven't printed out start # changed this because haven't printed out start
return 0 return False
else: else:
# not sure what happens here! # not sure what happens here!
# believe I have a '{\*} # believe I have a '{\*}
if self.__run_level > 3: if self.__run_level > 3:
msg = 'flag problem\n' msg = 'flag problem\n'
raise self.__bug_handler, msg raise self.__bug_handler, msg
return 1 return True
elif self.__token_info in self.__allowable : elif self.__token_info in self.__allowable :
if self.__ob: if self.__ob:
self.__write_obj.write(self.__ob) self.__write_obj.write(self.__ob)
@ -132,85 +138,81 @@ class DeleteInfo:
self.__state = 'default' self.__state = 'default'
else: else:
pass pass
return 1 return True
elif self.__token_info == 'cw<ls<list______': elif self.__token_info == 'cw<ls<list______':
self.__ob = 0 self.__ob = 0
self.__found_list_func(line) self.__found_list_func(line)
elif self.__token_info in self.__not_allowable: elif self.__token_info in self.__not_allowable:
if not self.__ob: if not self.__ob:
self.__write_cb = 1 self.__write_cb = True
self.__ob = 0 self.__ob = 0
self.__state = 'delete' self.__state = 'delete'
self.__cb_count = 0 self.__cb_count = 0
return 0 return False
else: else:
if self.__run_level > 5: if self.__run_level > 5:
msg = 'After an asterisk, and found neither an allowable or non-allowble token\n' msg = ('After an asterisk, and found neither an allowable or non-allowable token\n\
msg += 'token is "%s"\n' % self.__token_info token is "%s"\n') % self.__token_info
raise self.__bug_handler raise self.__bug_handler, msg
if not self.__ob: if not self.__ob:
self.__write_cb = 1 self.__write_cb = True
self.__ob = 0 self.__ob = 0
self.__state = 'delete' self.__state = 'delete'
self.__cb_count = 0 self.__cb_count = 0
return 0 return False
def __found_list_func(self, line): def __found_list_func(self, line):
""" """
print out control words in this group print out control words in this group
""" """
self.__state = 'list' self.__state = 'list'
def __list_func(self, line): def __list_func(self, line):
""" """
Check to see if the group has ended. Check to see if the group has ended.
Return 1 for all control words. Return True for all control words.
Return 0 otherwise. Return False otherwise.
""" """
if self.__delete_count == self.__cb_count and self.__token_info ==\ if self.__delete_count == self.__cb_count and self.__token_info ==\
'cb<nu<clos-brack': 'cb<nu<clos-brack':
self.__state = 'default' self.__state = 'default'
if self.__write_cb: if self.__write_cb:
self.__write_cb = 0 self.__write_cb = False
return 1 return True
return 0 return False
elif line[0:2] == 'cw': elif line[0:2] == 'cw':
return 1 return True
else: else:
return 0 return False
def delete_info(self): def delete_info(self):
"""Main method for handling other methods. Read one line in at """Main method for handling other methods. Read one line in at
a time, and determine wheter to print the line based on the state.""" a time, and determine whether to print the line based on the state."""
line_to_read = 'dummy' with open(self.__file, 'r') as read_obj:
read_obj = open(self.__file, 'r') with open(self.__write_to, 'w') as self.__write_obj:
self.__write_obj = open(self.__write_to, 'w') for line in read_obj:
while line_to_read: #ob<nu<open-brack<0001
#ob<nu<open-brack<0001 to_print = True
to_print =1 self.__token_info = line[:16]
line_to_read = read_obj.readline() if self.__token_info == 'ob<nu<open-brack':
line = line_to_read self.__ob_count = line[-5:-1]
self.__token_info = line[:16] if self.__token_info == 'cb<nu<clos-brack':
if self.__token_info == 'ob<nu<open-brack': self.__cb_count = line[-5:-1]
self.__ob_count = line[-5:-1] action = self.__state_dict.get(self.__state)
if self.__token_info == 'cb<nu<clos-brack': if not action:
self.__cb_count = line[-5:-1] sys.stderr.write(_('No action in dictionary state is "%s" \n')
action = self.__state_dict.get(self.__state) % self.__state)
if not action: to_print = action(line)
sys.stderr.write('No action in dictionary state is "%s" \n' # if self.__after_asterisk:
% self.__state) # to_print = self.__asterisk_func(line)
to_print = action(line) # elif self.__list:
""" # self.__in_list_func(line)
if self.__after_asterisk: # elif self.__delete:
to_print = self.__asterisk_func(line) # to_print = self.__delete_func(line)
elif self.__list: # else:
self.__in_list_func(line) # to_print = self.__default_func(line)
elif self.__delete: if to_print:
to_print = self.__delete_func(line) self.__write_obj.write(line)
else:
to_print = self.__default_func(line)
"""
if to_print:
self.__write_obj.write(line)
self.__write_obj.close()
read_obj.close()
copy_obj = copy.Copy(bug_handler = self.__bug_handler) copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy: if self.__copy:
copy_obj.copy_file(self.__write_to, "delete_info.data") copy_obj.copy_file(self.__write_to, "delete_info.data")

View File

@ -16,7 +16,9 @@
# # # #
######################################################################### #########################################################################
import os, tempfile import os, tempfile
from calibre.ebooks.rtf2xml import copy from calibre.ebooks.rtf2xml import copy
class Footnote: class Footnote:
""" """
Two public methods are available. The first separates all of the Two public methods are available. The first separates all of the
@ -35,6 +37,7 @@ class Footnote:
self.__copy = copy self.__copy = copy
self.__write_to = tempfile.mktemp() self.__write_to = tempfile.mktemp()
self.__found_a_footnote = 0 self.__found_a_footnote = 0
def __first_line_func(self, line): def __first_line_func(self, line):
""" """
Print the tag info for footnotes. Check whether footnote is an Print the tag info for footnotes. Check whether footnote is an
@ -47,6 +50,7 @@ class Footnote:
self.__write_to_foot_obj.write( self.__write_to_foot_obj.write(
'mi<tg<open-att__<footnote<num>%s\n' % self.__footnote_count) 'mi<tg<open-att__<footnote<num>%s\n' % self.__footnote_count)
self.__first_line = 0 self.__first_line = 0
def __in_footnote_func(self, line): def __in_footnote_func(self, line):
"""Handle all tokens that are part of footnote""" """Handle all tokens that are part of footnote"""
if self.__first_line: if self.__first_line:
@ -68,6 +72,7 @@ class Footnote:
'mi<mk<footnt-clo\n') 'mi<mk<footnt-clo\n')
else: else:
self.__write_to_foot_obj.write(line) self.__write_to_foot_obj.write(line)
def __found_footnote(self, line): def __found_footnote(self, line):
""" Found a footnote""" """ Found a footnote"""
self.__found_a_footnote = 1 self.__found_a_footnote = 1
@ -81,6 +86,7 @@ class Footnote:
'mi<mk<footnt-ind<%04d\n' % self.__footnote_count) 'mi<mk<footnt-ind<%04d\n' % self.__footnote_count)
self.__write_to_foot_obj.write( self.__write_to_foot_obj.write(
'mi<mk<footnt-ope<%04d\n' % self.__footnote_count) 'mi<mk<footnt-ope<%04d\n' % self.__footnote_count)
def __default_sep(self, line): def __default_sep(self, line):
"""Handle all tokens that are not footnote tokens""" """Handle all tokens that are not footnote tokens"""
if self.__token_info == 'cw<nt<footnote__': if self.__token_info == 'cw<nt<footnote__':
@ -91,6 +97,7 @@ class Footnote:
self.__write_obj.write( self.__write_obj.write(
'tx<nu<__________<%s\n' % num 'tx<nu<__________<%s\n' % num
) )
def __initiate_sep_values(self): def __initiate_sep_values(self):
""" """
initiate counters for separate_footnotes method. initiate counters for separate_footnotes method.
@ -102,6 +109,7 @@ class Footnote:
self.__in_footnote = 0 self.__in_footnote = 0
self.__first_line = 0 #have not processed the first line of footnote self.__first_line = 0 #have not processed the first line of footnote
self.__footnote_count = 0 self.__footnote_count = 0
def separate_footnotes(self): def separate_footnotes(self):
""" """
Separate all the footnotes in an RTF file and put them at the bottom, Separate all the footnotes in an RTF file and put them at the bottom,
@ -111,58 +119,50 @@ class Footnote:
bottom of the main file. bottom of the main file.
""" """
self.__initiate_sep_values() self.__initiate_sep_values()
read_obj = open(self.__file)
self.__write_obj = open(self.__write_to, 'w')
self.__footnote_holder = tempfile.mktemp() self.__footnote_holder = tempfile.mktemp()
self.__write_to_foot_obj = open(self.__footnote_holder, 'w') with open(self.__file) as read_obj:
line_to_read = 1 with open(self.__write_to, 'w') as self.__write_obj:
while line_to_read: with open(self.__footnote_holder, 'w') as self.__write_to_foot_obj:
line_to_read = read_obj.readline() for line in read_obj:
line = line_to_read self.__token_info = line[:16]
self.__token_info = line[:16] # keep track of opening and closing brackets
# keep track of opening and closing brackets if self.__token_info == 'ob<nu<open-brack':
if self.__token_info == 'ob<nu<open-brack': self.__ob_count = line[-5:-1]
self.__ob_count = line[-5:-1] if self.__token_info == 'cb<nu<clos-brack':
if self.__token_info == 'cb<nu<clos-brack': self.__cb_count = line[-5:-1]
self.__cb_count = line[-5:-1] # In the middle of footnote text
# In the middle of footnote text if self.__in_footnote:
if self.__in_footnote: self.__in_footnote_func(line)
self.__in_footnote_func(line) # not in the middle of footnote text
# not in the middle of footnote text else:
else: self.__default_sep(line)
self.__default_sep(line) with open(self.__footnote_holder, 'r') as read_obj:
self.__write_obj.close() with open(self.__write_to, 'a') as write_obj:
read_obj.close() write_obj.write(
self.__write_to_foot_obj.close() 'mi<mk<sect-close\n'
read_obj = open(self.__footnote_holder, 'r') 'mi<mk<body-close\n'
write_obj = open(self.__write_to, 'a') 'mi<tg<close_____<section\n'
write_obj.write( 'mi<tg<close_____<body\n'
'mi<mk<sect-close\n' 'mi<tg<close_____<doc\n'
'mi<mk<body-close\n' 'mi<mk<footnt-beg\n')
'mi<tg<close_____<section\n' for line in read_obj:
'mi<tg<close_____<body\n' write_obj.write(line)
'mi<tg<close_____<doc\n' write_obj.write(
'mi<mk<footnt-beg\n') 'mi<mk<footnt-end\n')
line = 1
while line:
line = read_obj.readline()
write_obj.write(line)
write_obj.write(
'mi<mk<footnt-end\n')
read_obj.close()
write_obj.close()
os.remove(self.__footnote_holder) os.remove(self.__footnote_holder)
copy_obj = copy.Copy(bug_handler = self.__bug_handler) copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy: if self.__copy:
copy_obj.copy_file(self.__write_to, "footnote_separate.data") copy_obj.copy_file(self.__write_to, "footnote_separate.data")
copy_obj.rename(self.__write_to, self.__file) copy_obj.rename(self.__write_to, self.__file)
os.remove(self.__write_to) os.remove(self.__write_to)
def update_info(self, file, copy): def update_info(self, file, copy):
""" """
Unused method Unused method
""" """
self.__file = file self.__file = file
self.__copy = copy self.__copy = copy
def __get_foot_body_func(self, line): def __get_foot_body_func(self, line):
""" """
Process lines in main body and look for beginning of footnotes. Process lines in main body and look for beginning of footnotes.
@ -172,6 +172,7 @@ class Footnote:
self.__state = 'foot' self.__state = 'foot'
else: else:
self.__write_obj.write(line) self.__write_obj.write(line)
def __get_foot_foot_func(self, line): def __get_foot_foot_func(self, line):
""" """
Copy footnotes from bottom of file to a separate, temporary file. Copy footnotes from bottom of file to a separate, temporary file.
@ -180,6 +181,7 @@ class Footnote:
self.__state = 'body' self.__state = 'body'
else: else:
self.__write_to_foot_obj.write(line) self.__write_to_foot_obj.write(line)
def __get_footnotes(self): def __get_footnotes(self):
""" """
Private method to remove footnotes from main file. Read one line from Private method to remove footnotes from main file. Read one line from
@ -188,21 +190,16 @@ class Footnote:
These two functions do the work of separating the footnotes form the These two functions do the work of separating the footnotes form the
body. body.
""" """
read_obj = open(self.__file) with open(self.__file) as read_obj:
self.__write_obj = open(self.__write_to, 'w') with open(self.__write_to, 'w') as self.__write_obj:
# self.__write_to = "footnote_info.data" with open(self.__footnote_holder, 'w') as self.__write_to_foot_obj:
self.__write_to_foot_obj = open(self.__footnote_holder, 'w') for line in read_obj:
line = 1 self.__token_info = line[:16]
while line: if self.__state == 'body':
line = read_obj.readline() self.__get_foot_body_func(line)
self.__token_info = line[:16] elif self.__state == 'foot':
if self.__state == 'body': self.__get_foot_foot_func(line)
self.__get_foot_body_func(line)
elif self.__state == 'foot':
self.__get_foot_foot_func(line)
read_obj.close()
self.__write_obj.close()
self.__write_to_foot_obj.close()
def __get_foot_from_temp(self, num): def __get_foot_from_temp(self, num):
""" """
Private method for joining footnotes to body. This method reads from Private method for joining footnotes to body. This method reads from
@ -213,9 +210,7 @@ class Footnote:
look_for = 'mi<mk<footnt-ope<' + num + '\n' look_for = 'mi<mk<footnt-ope<' + num + '\n'
found_foot = 0 found_foot = 0
string_to_return = '' string_to_return = ''
line = 1 for line in self.__read_from_foot_obj:
while line:
line = self.__read_from_foot_obj.readline()
if found_foot: if found_foot:
if line == 'mi<mk<footnt-clo\n': if line == 'mi<mk<footnt-clo\n':
return string_to_return return string_to_return
@ -223,6 +218,7 @@ class Footnote:
else: else:
if line == look_for: if line == look_for:
found_foot = 1 found_foot = 1
def __join_from_temp(self): def __join_from_temp(self):
""" """
Private method for rejoining footnotes to body. Read from the Private method for rejoining footnotes to body. Read from the
@ -232,16 +228,14 @@ class Footnote:
print out to the third file. print out to the third file.
If no footnote marker is found, simply print out the token (line). If no footnote marker is found, simply print out the token (line).
""" """
self.__read_from_foot_obj = open(self.__footnote_holder, 'r') with open(self.__footnote_holder, 'r') as self.__read_from_foot_obj:
read_obj = open(self.__write_to, 'r') with open(self.__write_to, 'r') as read_obj:
self.__write_obj = open(self.__write_to2, 'w') with open(self.__write_to2, 'w') as self.__write_obj:
line = 1 for line in read_obj:
while line: if line[:16] == 'mi<mk<footnt-ind':
line = read_obj.readline() line = self.__get_foot_from_temp(line[17:-1])
if line[:16] == 'mi<mk<footnt-ind': self.__write_obj.write(line)
line = self.__get_foot_from_temp(line[17:-1])
self.__write_obj.write(line)
read_obj.close()
def join_footnotes(self): def join_footnotes(self):
""" """
Join the footnotes from the bottom of the file and put them in their Join the footnotes from the bottom of the file and put them in their
@ -258,8 +252,8 @@ class Footnote:
self.__state = 'body' self.__state = 'body'
self.__get_footnotes() self.__get_footnotes()
self.__join_from_temp() self.__join_from_temp()
self.__write_obj.close() # self.__write_obj.close()
self.__read_from_foot_obj.close() # self.__read_from_foot_obj.close()
copy_obj = copy.Copy(bug_handler = self.__bug_handler) copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy: if self.__copy:
copy_obj.copy_file(self.__write_to2, "footnote_joined.data") copy_obj.copy_file(self.__write_to2, "footnote_joined.data")

View File

@ -43,16 +43,18 @@ class GetCharMap:
def get_char_map(self, map): def get_char_map(self, map):
if map == 'ansicpg0': if map == 'ansicpg0':
map = 'ansicpg1250' map = 'ansicpg1250'
found_map = 0 if map in ('ansicpg10000', '10000'):
map = 'mac_roman'
found_map = False
map_dict = {} map_dict = {}
self.__char_file.seek(0) self.__char_file.seek(0)
for line in self.__char_file.readlines(): for line in self.__char_file:
if not line.strip(): continue if not line.strip(): continue
begin_element = '<%s>' % map; begin_element = '<%s>' % map;
end_element = '</%s>' % map end_element = '</%s>' % map
if not found_map: if not found_map:
if begin_element in line: if begin_element in line:
found_map = 1 found_map = True
else: else:
if end_element in line: if end_element in line:
break break
@ -62,8 +64,7 @@ class GetCharMap:
if not found_map: if not found_map:
msg = 'no map found\n' msg = 'no map found\nmap is "%s"\n'%(map,)
msg += 'map is "%s"\n'%(map,)
raise self.__bug_handler, msg raise self.__bug_handler, msg
return map_dict return map_dict

View File

@ -54,10 +54,10 @@ class Hex2Utf8:
'convert_to_caps'--wether to convert caps to utf-8 'convert_to_caps'--wether to convert caps to utf-8
Returns: Returns:
nothing nothing
""" """
self.__file = in_file self.__file = in_file
self.__copy = copy self.__copy = copy
if area_to_convert != 'preamble' and area_to_convert != 'body': if area_to_convert not in ('preamble', 'body'):
msg = ( msg = (
'Developer error! Wrong flag.\n' 'Developer error! Wrong flag.\n'
'in module "hex_2_utf8.py\n' 'in module "hex_2_utf8.py\n'
@ -79,7 +79,8 @@ class Hex2Utf8:
self.__write_to = tempfile.mktemp() self.__write_to = tempfile.mktemp()
self.__bug_handler = bug_handler self.__bug_handler = bug_handler
self.__invalid_rtf_handler = invalid_rtf_handler self.__invalid_rtf_handler = invalid_rtf_handler
def update_values( self,
def update_values(self,
file, file,
area_to_convert, area_to_convert,
char_file, char_file,
@ -132,6 +133,7 @@ class Hex2Utf8:
# self.__convert_symbol = 0 # self.__convert_symbol = 0
# self.__convert_wingdings = 0 # self.__convert_wingdings = 0
# self.__convert_zapf = 0 # self.__convert_zapf = 0
def __initiate_values(self): def __initiate_values(self):
""" """
Required: Required:
@ -191,6 +193,7 @@ class Hex2Utf8:
'body' : self.__body_func, 'body' : self.__body_func,
'mi<mk<body-open_' : self.__found_body_func, 'mi<mk<body-open_' : self.__found_body_func,
'tx<hx<__________' : self.__hex_text_func, 'tx<hx<__________' : self.__hex_text_func,
# 'tx<nu<__________' : self.__text_func,
} }
self.__body_state_dict = { self.__body_state_dict = {
'preamble' : self.__preamble_for_body_func, 'preamble' : self.__preamble_for_body_func,
@ -209,6 +212,7 @@ class Hex2Utf8:
} }
self.__caps_list = ['false'] self.__caps_list = ['false']
self.__font_list = ['not-defined'] self.__font_list = ['not-defined']
def __hex_text_func(self, line): def __hex_text_func(self, line):
""" """
Required: Required:
@ -218,12 +222,12 @@ class Hex2Utf8:
token is in the dictionary, then check if the value starts with a token is in the dictionary, then check if the value starts with a
"&". If it does, then tag the result as utf text. Otherwise, tag it "&". If it does, then tag the result as utf text. Otherwise, tag it
as normal text. as normal text.
If the nex_num is not in the dictionary, then a mistake has been If the hex_num is not in the dictionary, then a mistake has been
made. made.
""" """
hex_num = line[17:-1] hex_num = line[17:-1]
converted = self.__current_dict.get(hex_num) converted = self.__current_dict.get(hex_num)
if converted != None: if converted is not None:
# tag as utf-8 # tag as utf-8
if converted[0:1] == "&": if converted[0:1] == "&":
font = self.__current_dict_name font = self.__current_dict_name
@ -263,42 +267,43 @@ class Hex2Utf8:
# msg += 'dictionary is %s\n' % self.__current_dict_name # msg += 'dictionary is %s\n' % self.__current_dict_name
msg = 'Character "&#x%s;" does not appear to be valid (or is a control character)\n' % token msg = 'Character "&#x%s;" does not appear to be valid (or is a control character)\n' % token
raise self.__bug_handler, msg raise self.__bug_handler, msg
def __found_body_func(self, line): def __found_body_func(self, line):
self.__state = 'body' self.__state = 'body'
self.__write_obj.write(line) self.__write_obj.write(line)
def __body_func(self, line): def __body_func(self, line):
""" """
When parsing preamble When parsing preamble
""" """
self.__write_obj.write(line) self.__write_obj.write(line)
def __preamble_func(self, line): def __preamble_func(self, line):
action = self.__preamble_state_dict.get(self.__token_info) action = self.__preamble_state_dict.get(self.__token_info)
if action != None: if action is not None:
action(line) action(line)
else: else:
self.__write_obj.write(line) self.__write_obj.write(line)
def __convert_preamble(self): def __convert_preamble(self):
self.__state = 'preamble' self.__state = 'preamble'
read_obj = open(self.__file, 'r')
self.__write_obj = open(self.__write_to, 'w') self.__write_obj = open(self.__write_to, 'w')
line_to_read = 1 with open(self.__file, 'r') as read_obj:
while line_to_read: for line in read_obj:
line_to_read = read_obj.readline() self.__token_info = line[:16]
line = line_to_read action = self.__preamble_state_dict.get(self.__state)
self.__token_info = line[:16] if action is None:
action = self.__preamble_state_dict.get(self.__state) sys.stderr.write(_('error no state found in hex_2_utf8'),
if action == None: self.__state
sys.stderr.write('error no state found in hex_2_utf8', )
self.__state action(line)
)
action(line)
read_obj.close()
self.__write_obj.close() self.__write_obj.close()
copy_obj = copy.Copy(bug_handler = self.__bug_handler) copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy: if self.__copy:
copy_obj.copy_file(self.__write_to, "preamble_utf_convert.data") copy_obj.copy_file(self.__write_to, "preamble_utf_convert.data")
copy_obj.rename(self.__write_to, self.__file) copy_obj.rename(self.__write_to, self.__file)
os.remove(self.__write_to) os.remove(self.__write_to)
def __preamble_for_body_func(self, line): def __preamble_for_body_func(self, line):
""" """
Required: Required:
@ -311,6 +316,7 @@ class Hex2Utf8:
if self.__token_info == 'mi<mk<body-open_': if self.__token_info == 'mi<mk<body-open_':
self.__found_body_func(line) self.__found_body_func(line)
self.__write_obj.write(line) self.__write_obj.write(line)
def __body_for_body_func(self, line): def __body_for_body_func(self, line):
""" """
Required: Required:
@ -321,10 +327,11 @@ class Hex2Utf8:
Used when parsing the body. Used when parsing the body.
""" """
action = self.__in_body_dict.get(self.__token_info) action = self.__in_body_dict.get(self.__token_info)
if action != None: if action is not None:
action(line) action(line)
else: else:
self.__write_obj.write(line) self.__write_obj.write(line)
def __start_font_func(self, line): def __start_font_func(self, line):
""" """
Required: Required:
@ -348,6 +355,7 @@ class Hex2Utf8:
else: else:
self.__current_dict_name = 'default' self.__current_dict_name = 'default'
self.__current_dict = self.__def_dict self.__current_dict = self.__def_dict
def __end_font_func(self, line): def __end_font_func(self, line):
""" """
Required: Required:
@ -376,6 +384,7 @@ class Hex2Utf8:
else: else:
self.__current_dict_name = 'default' self.__current_dict_name = 'default'
self.__current_dict = self.__def_dict self.__current_dict = self.__def_dict
def __start_special_font_func_old(self, line): def __start_special_font_func_old(self, line):
""" """
Required: Required:
@ -398,6 +407,7 @@ class Hex2Utf8:
self.__current_dict.append(self.__dingbats_dict) self.__current_dict.append(self.__dingbats_dict)
self.__special_fonts_found += 1 self.__special_fonts_found += 1
self.__current_dict_name = 'Zapf Dingbats' self.__current_dict_name = 'Zapf Dingbats'
def __end_special_font_func(self, line): def __end_special_font_func(self, line):
""" """
Required: Required:
@ -416,6 +426,7 @@ class Hex2Utf8:
self.__current_dict.pop() self.__current_dict.pop()
self.__special_fonts_found -= 1 self.__special_fonts_found -= 1
self.__dict_name = 'default' self.__dict_name = 'default'
def __start_caps_func_old(self, line): def __start_caps_func_old(self, line):
""" """
Required: Required:
@ -427,6 +438,7 @@ class Hex2Utf8:
self.__in_caps to 1 self.__in_caps to 1
""" """
self.__in_caps = 1 self.__in_caps = 1
def __start_caps_func(self, line): def __start_caps_func(self, line):
""" """
Required: Required:
@ -440,6 +452,7 @@ class Hex2Utf8:
self.__in_caps = 1 self.__in_caps = 1
value = line[17:-1] value = line[17:-1]
self.__caps_list.append(value) self.__caps_list.append(value)
def __end_caps_func(self, line): def __end_caps_func(self, line):
""" """
Required: Required:
@ -455,7 +468,8 @@ class Hex2Utf8:
else: else:
sys.stderr.write('Module is hex_2_utf8\n') sys.stderr.write('Module is hex_2_utf8\n')
sys.stderr.write('method is __end_caps_func\n') sys.stderr.write('method is __end_caps_func\n')
sys.stderr.write('caps list should be more than one?\n') sys.stderr.write('caps list should be more than one?\n') #self.__in_caps not set
def __text_func(self, line): def __text_func(self, line):
""" """
Required: Required:
@ -466,9 +480,8 @@ class Hex2Utf8:
if in caps, convert. Otherwise, print out. if in caps, convert. Otherwise, print out.
""" """
text = line[17:-1] text = line[17:-1]
if self.__current_dict_name == 'Symbol'\ # print line
or self.__current_dict_name == 'Wingdings'\ if self.__current_dict_name in ('Symbol', 'Wingdings', 'Zapf Dingbats'):
or self.__current_dict_name == 'Zapf Dingbats':
the_string = '' the_string = ''
for letter in text: for letter in text:
hex_num = hex(ord(letter)) hex_num = hex(ord(letter))
@ -477,21 +490,21 @@ class Hex2Utf8:
hex_num = hex_num[2:] hex_num = hex_num[2:]
hex_num = '\'%s' % hex_num hex_num = '\'%s' % hex_num
converted = self.__current_dict.get(hex_num) converted = self.__current_dict.get(hex_num)
if converted == None: if converted is None:
sys.stderr.write('module is hex_2_ut8\n') sys.stderr.write('module is hex_2_ut8\n')
sys.stderr.write('method is __text_func\n') sys.stderr.write('method is __text_func\n')
sys.stderr.write('no hex value for "%s"\n' % hex_num) sys.stderr.write('no hex value for "%s"\n' % hex_num)
else: else:
the_string += converted the_string += converted
self.__write_obj.write('tx<nu<__________<%s\n' % the_string) self.__write_obj.write('tx<nu<__________<%s\n' % the_string)
# print the_string
else: else:
if self.__caps_list[-1] == 'true' \ if self.__caps_list[-1] == 'true' \
and self.__convert_caps\ and self.__convert_caps\
and self.__current_dict_name != 'Symbol'\ and self.__current_dict_name not in ('Symbol', 'Wingdings', 'Zapf Dingbats'):
and self.__current_dict_name != 'Wingdings'\
and self.__current_dict_name != 'Zapf Dingbats':
text = text.upper() text = text.upper()
self.__write_obj.write('tx<nu<__________<%s\n' % text) self.__write_obj.write('tx<nu<__________<%s\n' % text)
def __utf_to_caps_func(self, line): def __utf_to_caps_func(self, line):
""" """
Required: Required:
@ -506,6 +519,7 @@ class Hex2Utf8:
# utf_text = utf_text.upper() # utf_text = utf_text.upper()
utf_text = self.__utf_token_to_caps_func(utf_text) utf_text = self.__utf_token_to_caps_func(utf_text)
self.__write_obj.write('tx<ut<__________<%s\n' % utf_text) self.__write_obj.write('tx<ut<__________<%s\n' % utf_text)
def __utf_token_to_caps_func(self, char_entity): def __utf_token_to_caps_func(self, char_entity):
""" """
Required: Required:
@ -530,28 +544,26 @@ class Hex2Utf8:
return char_entity return char_entity
else: else:
return converted return converted
def __convert_body(self): def __convert_body(self):
self.__state = 'body' self.__state = 'body'
read_obj = open(self.__file, 'r') with open(self.__file, 'r') as read_obj:
self.__write_obj = open(self.__write_to, 'w') self.__write_obj = open(self.__write_to, 'w')
line_to_read = 1 for line in read_obj:
while line_to_read: self.__token_info = line[:16]
line_to_read = read_obj.readline() action = self.__body_state_dict.get(self.__state)
line = line_to_read if action is None:
self.__token_info = line[:16] sys.stderr.write('error no state found in hex_2_utf8',
action = self.__body_state_dict.get(self.__state) self.__state
if action == None: )
sys.stderr.write('error no state found in hex_2_utf8', action(line)
self.__state
)
action(line)
read_obj.close()
self.__write_obj.close() self.__write_obj.close()
copy_obj = copy.Copy(bug_handler = self.__bug_handler) copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy: if self.__copy:
copy_obj.copy_file(self.__write_to, "body_utf_convert.data") copy_obj.copy_file(self.__write_to, "body_utf_convert.data")
copy_obj.rename(self.__write_to, self.__file) copy_obj.rename(self.__write_to, self.__file)
os.remove(self.__write_to) os.remove(self.__write_to)
def convert_hex_2_utf8(self): def convert_hex_2_utf8(self):
self.__initiate_values() self.__initiate_values()
if self.__area_to_convert == 'preamble': if self.__area_to_convert == 'preamble':

View File

@ -1,5 +1,7 @@
import sys, os, tempfile import sys, os, tempfile
from calibre.ebooks.rtf2xml import copy from calibre.ebooks.rtf2xml import copy
""" """
States. States.
1. default 1. default
@ -36,6 +38,7 @@ class Inline:
self.__copy = copy self.__copy = copy
self.__run_level = run_level self.__run_level = run_level
self.__write_to = tempfile.mktemp() self.__write_to = tempfile.mktemp()
def __initiate_values(self): def __initiate_values(self):
""" """
Initiate all values. Initiate all values.
@ -51,7 +54,6 @@ class Inline:
'tx<ut<__________' : self.__found_text_func, 'tx<ut<__________' : self.__found_text_func,
'mi<mk<inline-fld' : self.__found_text_func, 'mi<mk<inline-fld' : self.__found_text_func,
'text' : self.__found_text_func, 'text' : self.__found_text_func,
'cw<nu<hard-lineb' : self.__found_text_func, #calibre
'cb<nu<clos-brack' : self.__close_bracket_func, 'cb<nu<clos-brack' : self.__close_bracket_func,
'mi<mk<par-end___' : self.__end_para_func, 'mi<mk<par-end___' : self.__end_para_func,
'mi<mk<footnt-ope' : self.__end_para_func, 'mi<mk<footnt-ope' : self.__end_para_func,
@ -63,7 +65,6 @@ class Inline:
'tx<hx<__________' : self.__found_text_func, 'tx<hx<__________' : self.__found_text_func,
'tx<ut<__________' : self.__found_text_func, 'tx<ut<__________' : self.__found_text_func,
'text' : self.__found_text_func, 'text' : self.__found_text_func,
'cw<nu<hard-lineb' : self.__found_text_func, #calibre
'mi<mk<inline-fld' : self.__found_text_func, 'mi<mk<inline-fld' : self.__found_text_func,
'ob<nu<open-brack': self.__found_open_bracket_func, 'ob<nu<open-brack': self.__found_open_bracket_func,
'mi<mk<par-end___' : self.__end_para_func, 'mi<mk<par-end___' : self.__end_para_func,
@ -83,12 +84,12 @@ class Inline:
self.__in_para = 0 # not in paragraph self.__in_para = 0 # not in paragraph
self.__char_dict = { self.__char_dict = {
# character info => ci # character info => ci
'annotation' : 'annotation', 'annotation' : 'annotation',
'blue______' : 'blue', 'blue______' : 'blue',
'bold______' : 'bold', 'bold______' : 'bold',
'caps______' : 'caps', 'caps______' : 'caps',
'char-style' : 'character-style', 'char-style' : 'character-style',
'dbl-strike' : 'double-strike-through', 'dbl-strike' : 'double-strike-through',
'emboss____' : 'emboss', 'emboss____' : 'emboss',
'engrave___' : 'engrave', 'engrave___' : 'engrave',
'font-color' : 'font-color', 'font-color' : 'font-color',
@ -96,7 +97,7 @@ class Inline:
'font-size_' : 'font-size', 'font-size_' : 'font-size',
'font-style' : 'font-style', 'font-style' : 'font-style',
'font-up___' : 'superscript', 'font-up___' : 'superscript',
'footnot-mk' : 'footnote-marker', 'footnot-mk' : 'footnote-marker',
'green_____' : 'green', 'green_____' : 'green',
'hidden____' : 'hidden', 'hidden____' : 'hidden',
'italics___' : 'italics', 'italics___' : 'italics',
@ -107,9 +108,10 @@ class Inline:
'strike-thr' : 'strike-through', 'strike-thr' : 'strike-through',
'subscript_' : 'subscript', 'subscript_' : 'subscript',
'superscrip' : 'superscript', 'superscrip' : 'superscript',
'underlined' : 'underlined', 'underlined' : 'underlined',
} }
self.__caps_list = ['false'] self.__caps_list = ['false']
def __set_list_func(self, line): def __set_list_func(self, line):
""" """
Requires: Requires:
@ -128,6 +130,7 @@ class Inline:
self.__place = 'in_list' self.__place = 'in_list'
self.__inline_list = self.__list_inline_list self.__inline_list = self.__list_inline_list
self.__groups_in_waiting = self.__groups_in_waiting_list self.__groups_in_waiting = self.__groups_in_waiting_list
def __default_func(self, line): def __default_func(self, line):
""" """
Requires: Requires:
@ -140,8 +143,8 @@ class Inline:
action = self.__default_dict.get(self.__token_info) action = self.__default_dict.get(self.__token_info)
if action: if action:
action(line) action(line)
if self.__token_info != 'cw<nu<hard-lineb': #calibre self.__write_obj.write(line)
self.__write_obj.write(line)
def __found_open_bracket_func(self, line): def __found_open_bracket_func(self, line):
""" """
Requires: Requires:
@ -156,6 +159,7 @@ class Inline:
self.__groups_in_waiting[0] += 1 self.__groups_in_waiting[0] += 1
self.__inline_list.append({}) self.__inline_list.append({})
self.__inline_list[-1]['contains_inline'] = 0 self.__inline_list[-1]['contains_inline'] = 0
def __after_open_bracket_func(self, line): def __after_open_bracket_func(self, line):
""" """
Requires: Requires:
@ -176,6 +180,7 @@ class Inline:
self.__state = 'default' # a non control word? self.__state = 'default' # a non control word?
action(line) action(line)
self.__write_obj.write(line) self.__write_obj.write(line)
def __handle_control_word(self, line): def __handle_control_word(self, line):
""" """
Required: Required:
@ -206,6 +211,7 @@ class Inline:
elif char_value == 'Zapf Dingbats': elif char_value == 'Zapf Dingbats':
self.__write_obj.write('mi<mk<font-dingb\n') self.__write_obj.write('mi<mk<font-dingb\n')
""" """
def __close_bracket_func(self, line): def __close_bracket_func(self, line):
""" """
Requires: Requires:
@ -244,6 +250,7 @@ class Inline:
self.__inline_list.pop() self.__inline_list.pop()
if self.__groups_in_waiting[0] != 0: if self.__groups_in_waiting[0] != 0:
self.__groups_in_waiting[0] -= 1 self.__groups_in_waiting[0] -= 1
def __found_text_func(self, line): def __found_text_func(self, line):
""" """
Required: Required:
@ -257,7 +264,6 @@ class Inline:
Text can mark the start of a paragraph. Text can mark the start of a paragraph.
If already in a paragraph, check to see if any groups are waiting If already in a paragraph, check to see if any groups are waiting
to be added. If so, use another method to write these groups. to be added. If so, use another method to write these groups.
3. If not check if hardline break, then write
""" """
if self.__place == 'in_list': if self.__place == 'in_list':
self.__write_inline() self.__write_inline()
@ -265,10 +271,7 @@ class Inline:
if not self.__in_para: if not self.__in_para:
self.__in_para = 1 self.__in_para = 1
self.__start_para_func(line) self.__start_para_func(line)
else: elif self.__groups_in_waiting[0] != 0:
if self.__token_info == 'cw<nu<hard-lineb': #calibre
self.__write_obj.write('mi<tg<empty_____<hardline-break\n')
if self.__groups_in_waiting[0] != 0:
self.__write_inline() self.__write_inline()
def __write_inline(self): def __write_inline(self):
@ -314,6 +317,7 @@ class Inline:
self.__write_obj.write('<%s>%s' % (the_key, the_dict[the_key])) self.__write_obj.write('<%s>%s' % (the_key, the_dict[the_key]))
self.__write_obj.write('\n') self.__write_obj.write('\n')
self.__groups_in_waiting[0] = 0 self.__groups_in_waiting[0] = 0
def __end_para_func(self, line): def __end_para_func(self, line):
""" """
Requires: Requires:
@ -342,6 +346,7 @@ class Inline:
self.__write_obj.write('mi<mk<caps-end__\n') self.__write_obj.write('mi<mk<caps-end__\n')
self.__write_obj.write('mi<tg<close_____<inline\n') self.__write_obj.write('mi<tg<close_____<inline\n')
self.__in_para = 0 self.__in_para = 0
def __start_para_func(self, line): def __start_para_func(self, line):
""" """
Requires: Requires:
@ -369,12 +374,14 @@ class Inline:
self.__write_obj.write('<%s>%s' % (the_key, the_dict[the_key])) self.__write_obj.write('<%s>%s' % (the_key, the_dict[the_key]))
self.__write_obj.write('\n') self.__write_obj.write('\n')
self.__groups_in_waiting[0] = 0 self.__groups_in_waiting[0] = 0
def __found_field_func(self, line): def __found_field_func(self, line):
""" """
Just a default function to make sure I don't prematurely exit Just a default function to make sure I don't prematurely exit
default state default state
""" """
pass pass
def form_tags(self): def form_tags(self):
""" """
Requires: Requires:
@ -386,32 +393,27 @@ class Inline:
the state. the state.
""" """
self.__initiate_values() self.__initiate_values()
read_obj = open(self.__file, 'r') with open(self.__file, 'r') as read_obj:
self.__write_obj = open(self.__write_to, 'w') with open(self.__write_to, 'w') as self.__write_obj:
line_to_read = 1 for line in read_obj:
while line_to_read: token = line[0:-1]
line_to_read = read_obj.readline() self.__token_info = ''
line = line_to_read if token == 'tx<mc<__________<rdblquote'\
token = line[0:-1] or token == 'tx<mc<__________<ldblquote'\
self.__token_info = '' or token == 'tx<mc<__________<lquote'\
if token == 'tx<mc<__________<rdblquote'\ or token == 'tx<mc<__________<rquote'\
or token == 'tx<mc<__________<ldblquote'\ or token == 'tx<mc<__________<emdash'\
or token == 'tx<mc<__________<lquote'\ or token == 'tx<mc<__________<endash'\
or token == 'tx<mc<__________<rquote'\ or token == 'tx<mc<__________<bullet':
or token == 'tx<mc<__________<emdash'\ self.__token_info = 'text'
or token == 'tx<mc<__________<endash'\ else:
or token == 'tx<mc<__________<bullet': self.__token_info = line[:16]
self.__token_info = 'text' self.__set_list_func(line)
else: action = self.__state_dict.get(self.__state)
self.__token_info = line[:16] if action is None:
self.__set_list_func(line) sys.stderr.write('No matching state in module inline_for_lists.py\n')
action = self.__state_dict.get(self.__state) sys.stderr.write(self.__state + '\n')
if action == None: action(line)
sys.stderr.write('No matching state in module inline_for_lists.py\n')
sys.stderr.write(self.__state + '\n')
action(line)
read_obj.close()
self.__write_obj.close()
copy_obj = copy.Copy(bug_handler = self.__bug_handler) copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy: if self.__copy:
copy_obj.copy_file(self.__write_to, "inline.data") copy_obj.copy_file(self.__write_to, "inline.data")

View File

@ -15,8 +15,11 @@
# # # #
# # # #
######################################################################### #########################################################################
import os, tempfile, re import os, tempfile
from calibre.ebooks.rtf2xml import copy from calibre.ebooks.rtf2xml import copy
from calibre.utils.cleantext import clean_ascii_chars
class FixLineEndings: class FixLineEndings:
"""Fix line endings""" """Fix line endings"""
def __init__(self, def __init__(self,
@ -32,36 +35,23 @@ class FixLineEndings:
self.__run_level = run_level self.__run_level = run_level
self.__write_to = tempfile.mktemp() self.__write_to = tempfile.mktemp()
self.__replace_illegals = replace_illegals self.__replace_illegals = replace_illegals
def fix_endings(self): def fix_endings(self):
##tempFileName = tempfile.mktemp() #read
illegal_regx = re.compile( '\x00|\x01|\x02|\x03|\x04|\x05|\x06|\x07|\x08|\x0B|\x0E|\x0F|\x10|\x11|\x12|\x13') with open(self.__file, 'r') as read_obj:
#nums = [0, 1, 2, 3, 4, 5, 6, 7, 8, 11, 14, 15, 16, 17, 18, 19] input_file = read_obj.read()
""" #calibre go from win and mac to unix
read_obj = open(self.__file, 'r') input_file = input_file.replace ('\r\n', '\n')
line = read_obj.read(1000) input_file = input_file.replace ('\r', '\n')
regexp = re.compile(r"\r") #remove ASCII invalid chars : 0 to 8 and 11-14 to 24-26-27
macintosh = regexp.search(line) if self.__replace_illegals:
read_obj.close() input_file = clean_ascii_chars(input_file)
""" #write
# always check since I have to get rid of illegal characters with open(self.__write_to, 'wb') as write_obj:
macintosh = 1 write_obj.write(input_file)
if macintosh: #copy
line = 1 copy_obj = copy.Copy(bug_handler = self.__bug_handler)
read_obj = open(self.__file, 'r') if self.__copy:
write_obj = open(self.__write_to, 'w') copy_obj.copy_file(self.__write_to, "line_endings.data")
while line: copy_obj.rename(self.__write_to, self.__file)
line = read_obj.read(1000) os.remove(self.__write_to)
# line = re.sub(regexp,"\n",line)
line = line.replace ('\r', '\n')
if self.__replace_illegals:
line = re.sub(illegal_regx, '', line)
# for num in nums:
# line = line.replace(chr(num), '')
write_obj.write(line )
read_obj.close()
write_obj.close()
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy:
copy_obj.copy_file(self.__write_to, "line_endings.data")
copy_obj.rename(self.__write_to, self.__file)
os.remove(self.__write_to)

View File

@ -16,7 +16,9 @@
# # # #
######################################################################### #########################################################################
import sys, os, tempfile import sys, os, tempfile
from calibre.ebooks.rtf2xml import copy from calibre.ebooks.rtf2xml import copy
class Pict: class Pict:
"""Process graphic information""" """Process graphic information"""
def __init__(self, def __init__(self,
@ -36,13 +38,11 @@ class Pict:
self.__ob_count = 0 self.__ob_count = 0
self.__cb_count = 0 self.__cb_count = 0
self.__pict_count = 0 self.__pict_count = 0
self.__in_pict = 0 self.__in_pict = False
self.__already_found_pict = 0 self.__already_found_pict = False
self.__orig_file = orig_file self.__orig_file = orig_file
self.__initiate_pict_dict() self.__initiate_pict_dict()
self.__out_file = out_file self.__out_file = out_file
# this is left over
self.__no_ask = 1
def __initiate_pict_dict(self): def __initiate_pict_dict(self):
self.__pict_dict = { self.__pict_dict = {
@ -71,57 +71,43 @@ class Pict:
self.__out_file)) self.__out_file))
else: else:
dir_name = os.path.dirname(self.__orig_file) dir_name = os.path.dirname(self.__orig_file)
# self.__output_to_file_func()
self.__dir_name = base_name + "_rtf_pict_dir/" self.__dir_name = base_name + "_rtf_pict_dir/"
self.__dir_name = os.path.join(dir_name, self.__dir_name) self.__dir_name = os.path.join(dir_name, self.__dir_name)
if not os.path.isdir(self.__dir_name): if not os.path.isdir(self.__dir_name):
try: try:
os.mkdir(self.__dir_name) os.mkdir(self.__dir_name)
except OSError, msg: except OSError, msg:
msg = str(msg) msg = "%sCouldn't make directory '%s':\n" % (str(msg), self.__dir_name)
msg += "Couldn't make directory '%s':\n" % (self.__dir_name)
raise self.__bug_handler raise self.__bug_handler
else: else:
if self.__no_ask: if self.__run_level > 1:
user_response = 'r' sys.stderr.write('Removing files from old pict directory...\n')
else: all_files = os.listdir(self.__dir_name)
msg = 'Do you want to remove all files in %s?\n' % self.__dir_name for the_file in all_files:
msg += 'Type "r" to remove.\n' the_file = os.path.join(self.__dir_name, the_file)
msg += 'Type any other key to keep files in place.\n' try:
sys.stderr.write(msg) os.remove(the_file)
user_response = raw_input() except OSError:
if user_response == 'r': pass
if self.__run_level > 1: if self.__run_level > 1:
sys.stderr.write('Removing files from old pict directory...\n') sys.stderr.write('Files removed.\n')
all_files = os.listdir(self.__dir_name)
for the_file in all_files:
the_file = os.path.join(self.__dir_name, the_file)
try:
os.remove(the_file)
except OSError:
pass
if self.__run_level > 1:
sys.stderr.write('Files removed.\n')
def __create_pict_file(self): def __create_pict_file(self):
"""Create a file for all the pict data to be written to. """Create a file for all the pict data to be written to.
""" """
self.__pict_file = os.path.join(self.__dir_name, 'picts.rtf') self.__pict_file = os.path.join(self.__dir_name, 'picts.rtf')
write_pic_obj = open(self.__pict_file, 'w')
write_pic_obj.close()
self.__write_pic_obj = open(self.__pict_file, 'a') self.__write_pic_obj = open(self.__pict_file, 'a')
def __in_pict_func(self, line): def __in_pict_func(self, line):
if self.__cb_count == self.__pict_br_count: if self.__cb_count == self.__pict_br_count:
self.__in_pict = 0 self.__in_pict = False
self.__write_pic_obj.write("}\n") self.__write_pic_obj.write("}\n")
return 1 return True
else: else:
action = self.__pict_dict.get(self.__token_info) action = self.__pict_dict.get(self.__token_info)
if action: if action:
line = action(line) self.__write_pic_obj.write(action(line))
self.__write_pic_obj.write(line) return False
return 0
def __default(self, line, write_obj): def __default(self, line, write_obj):
"""Determine if each token marks the beginning of pict data. """Determine if each token marks the beginning of pict data.
@ -142,53 +128,50 @@ class Pict:
write_obj.write('mi<mk<pict-end__\n') write_obj.write('mi<mk<pict-end__\n')
if not self.__already_found_pict: if not self.__already_found_pict:
self.__create_pict_file() self.__create_pict_file()
self.__already_found_pict=1; self.__already_found_pict=True;
self.__print_rtf_header() self.__print_rtf_header()
self.__in_pict = 1 self.__in_pict = 1
self.__pict_br_count = self.__ob_count self.__pict_br_count = self.__ob_count
self.__cb_count = 0 self.__cb_count = 0
self.__write_pic_obj.write("{\\pict\n") self.__write_pic_obj.write("{\\pict\n")
return 0 return False
return 1 return True
def __print_rtf_header(self): def __print_rtf_header(self):
"""Print to pict file the necessary RTF data for the file to be """Print to pict file the necessary RTF data for the file to be
recognized as an RTF file. recognized as an RTF file.
""" """
self.__write_pic_obj.write("{\\rtf1 \n") self.__write_pic_obj.write("{\\rtf1 \n{\\fonttbl\\f0\\null;} \n")
self.__write_pic_obj.write("{\\fonttbl\\f0\\null;} \n") self.__write_pic_obj.write("{\\colortbl\\red255\\green255\\blue255;} \n\\pard \n")
self.__write_pic_obj.write("{\\colortbl\\red255\\green255\\blue255;} \n")
self.__write_pic_obj.write("\\pard \n")
def process_pict(self): def process_pict(self):
self.__make_dir() self.__make_dir()
read_obj = open(self.__file) with open(self.__file) as read_obj:
write_obj = open(self.__write_to, 'w') with open(self.__write_to, 'w') as write_obj:
line_to_read = 'dummy' for line in read_obj:
while line_to_read: self.__token_info = line[:16]
line_to_read = read_obj.readline() if self.__token_info == 'ob<nu<open-brack':
line = line_to_read self.__ob_count = line[-5:-1]
self.__token_info = line[:16] if self.__token_info == 'cb<nu<clos-brack':
if self.__token_info == 'ob<nu<open-brack': self.__cb_count = line[-5:-1]
self.__ob_count = line[-5:-1] if not self.__in_pict:
if self.__token_info == 'cb<nu<clos-brack': to_print = self.__default(line, write_obj)
self.__cb_count = line[-5:-1] if to_print :
if not self.__in_pict: write_obj.write(line)
to_print = self.__default(line, write_obj) else:
if to_print : to_print = self.__in_pict_func(line)
write_obj.write(line) if to_print :
else: write_obj.write(line)
to_print = self.__in_pict_func(line) if self.__already_found_pict:
if to_print : self.__write_pic_obj.write("}\n")
write_obj.write(line) self.__write_pic_obj.close()
if self.__already_found_pict:
self.__write_pic_obj.write("}\n")
self.__write_pic_obj.close()
read_obj.close()
write_obj.close()
copy_obj = copy.Copy(bug_handler = self.__bug_handler) copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy: if self.__copy:
copy_obj.copy_file(self.__write_to, "pict.data") copy_obj.copy_file(self.__write_to, "pict.data")
try:
copy_obj.copy_file(self.__pict_file, "pict.rtf")
except:
pass
copy_obj.rename(self.__write_to, self.__file) copy_obj.rename(self.__write_to, self.__file)
os.remove(self.__write_to) os.remove(self.__write_to)
if self.__pict_count == 0: if self.__pict_count == 0:

View File

@ -15,8 +15,10 @@
# # # #
# # # #
######################################################################### #########################################################################
import os, re, tempfile import os, re, tempfile
from calibre.ebooks.rtf2xml import copy, check_brackets from calibre.ebooks.rtf2xml import copy, check_brackets
class ProcessTokens: class ProcessTokens:
""" """
Process each token on a line and add information that will be useful for Process each token on a line and add information that will be useful for
@ -41,14 +43,16 @@ class ProcessTokens:
self.__bracket_count=0 self.__bracket_count=0
self.__exception_handler = exception_handler self.__exception_handler = exception_handler
self.__bug_handler = bug_handler self.__bug_handler = bug_handler
def compile_expressions(self): def compile_expressions(self):
self.__num_exp = re.compile(r"([a-zA-Z]+)(.*)") self.__num_exp = re.compile(r"([a-zA-Z]+)(.*)")
self.__utf_exp = re.compile(r'(&.*?;)') self.__utf_exp = re.compile(r'(&.*?;)')
def initiate_token_dict(self): def initiate_token_dict(self):
self.__return_code = 0 self.__return_code = 0
self.dict_token={ self.dict_token={
# unicode # unicode
'mshex' : ('nu', '__________', self.__ms_hex_func), 'mshex' : ('nu', '__________', self.__ms_hex_func),
# brackets # brackets
'{' : ('nu', '{', self.ob_func), '{' : ('nu', '{', self.ob_func),
'}' : ('nu', '}', self.cb_func), '}' : ('nu', '}', self.cb_func),
@ -66,6 +70,7 @@ class ProcessTokens:
';' : ('mc', ';', self.ms_sub_func), ';' : ('mc', ';', self.ms_sub_func),
# this must be wrong # this must be wrong
'-' : ('mc', '-', self.ms_sub_func), '-' : ('mc', '-', self.ms_sub_func),
'line' : ('mi', 'hardline-break', self.hardline_func), #calibre
# misc => ml # misc => ml
'*' : ('ml', 'asterisk__', self.default_func), '*' : ('ml', 'asterisk__', self.default_func),
':' : ('ml', 'colon_____', self.default_func), ':' : ('ml', 'colon_____', self.default_func),
@ -73,7 +78,6 @@ class ProcessTokens:
'backslash' : ('nu', '\\', self.text_func), 'backslash' : ('nu', '\\', self.text_func),
'ob' : ('nu', '{', self.text_func), 'ob' : ('nu', '{', self.text_func),
'cb' : ('nu', '}', self.text_func), 'cb' : ('nu', '}', self.text_func),
'line' : ('nu', 'hard-lineb', self.default_func), #calibre
#'line' : ('nu', ' ', self.text_func), calibre #'line' : ('nu', ' ', self.text_func), calibre
# paragraph formatting => pf # paragraph formatting => pf
'page' : ('pf', 'page-break', self.default_func), 'page' : ('pf', 'page-break', self.default_func),
@ -159,15 +163,17 @@ class ProcessTokens:
'rtf' : ('ri', 'rtf_______', self.default_func), 'rtf' : ('ri', 'rtf_______', self.default_func),
'deff' : ('ri', 'deflt-font', self.default_func), 'deff' : ('ri', 'deflt-font', self.default_func),
'mac' : ('ri', 'macintosh_', self.default_func), 'mac' : ('ri', 'macintosh_', self.default_func),
'pc' : ('ri', 'pc________', self.default_func),
'pca' : ('ri', 'pca_______', self.default_func),
'ansi' : ('ri', 'ansi______', self.default_func), 'ansi' : ('ri', 'ansi______', self.default_func),
'ansicpg' : ('ri', 'ansi-codpg', self.default_func), 'ansicpg' : ('ri', 'ansi-codpg', self.default_func),
# notes => nt # notes => nt
'footnote' : ('nt', 'footnote__', self.default_func), 'footnote' : ('nt', 'footnote__', self.default_func),
'ftnalt' : ('nt', 'type______<endnote', self.two_part_func), 'ftnalt' : ('nt', 'type______<endnote', self.two_part_func),
# anchor => an # anchor => an
'tc' : ('an', 'toc_______', self.default_func), 'tc' : ('an', 'toc_______', self.default_func),
'bkmkstt' : ('an', 'book-mk-st', self.default_func), 'bkmkstt' : ('an', 'book-mk-st', self.default_func),
'bkmkstart' : ('an', 'book-mk-st', self.default_func), 'bkmkstart' : ('an', 'book-mk-st', self.default_func),
'bkmkend' : ('an', 'book-mk-en', self.default_func), 'bkmkend' : ('an', 'book-mk-en', self.default_func),
'xe' : ('an', 'index-mark', self.default_func), 'xe' : ('an', 'index-mark', self.default_func),
'rxe' : ('an', 'place_____', self.default_func), 'rxe' : ('an', 'place_____', self.default_func),
@ -347,7 +353,7 @@ class ProcessTokens:
10: 'Kanji numbering without the digit character', 10: 'Kanji numbering without the digit character',
11: 'Kanji numbering with the digit character', 11: 'Kanji numbering with the digit character',
1246: 'phonetic Katakana characters in aiueo order', 1246: 'phonetic Katakana characters in aiueo order',
1346: 'phonetic katakana characters in iroha order', 1346: 'phonetic katakana characters in iroha order',
14: 'double byte character', 14: 'double byte character',
15: 'single byte character', 15: 'single byte character',
16: 'Kanji numbering 3', 16: 'Kanji numbering 3',
@ -392,7 +398,7 @@ class ProcessTokens:
5121 : 'Arabic Algeria', 5121 : 'Arabic Algeria',
15361 : 'Arabic Bahrain', 15361 : 'Arabic Bahrain',
3073 : 'Arabic Egypt', 3073 : 'Arabic Egypt',
1 : 'Arabic General', 1 : 'Arabic General',
2049 : 'Arabic Iraq', 2049 : 'Arabic Iraq',
11265 : 'Arabic Jordan', 11265 : 'Arabic Jordan',
13313 : 'Arabic Kuwait', 13313 : 'Arabic Kuwait',
@ -417,7 +423,7 @@ class ProcessTokens:
1059 : 'Byelorussian', 1059 : 'Byelorussian',
1027 : 'Catalan', 1027 : 'Catalan',
2052 : 'Chinese China', 2052 : 'Chinese China',
4 : 'Chinese General', 4 : 'Chinese General',
3076 : 'Chinese Hong Kong', 3076 : 'Chinese Hong Kong',
4100 : 'Chinese Singapore', 4100 : 'Chinese Singapore',
1028 : 'Chinese Taiwan', 1028 : 'Chinese Taiwan',
@ -431,7 +437,7 @@ class ProcessTokens:
2057 : 'English British', 2057 : 'English British',
4105 : 'English Canada', 4105 : 'English Canada',
9225 : 'English Caribbean', 9225 : 'English Caribbean',
9 : 'English General', 9 : 'English General',
6153 : 'English Ireland', 6153 : 'English Ireland',
8201 : 'English Jamaica', 8201 : 'English Jamaica',
5129 : 'English New Zealand', 5129 : 'English New Zealand',
@ -595,30 +601,37 @@ class ProcessTokens:
num = num[1:] # chop off leading 0, which I added num = num[1:] # chop off leading 0, which I added
num = num.upper() # the mappings store hex in caps num = num.upper() # the mappings store hex in caps
return 'tx<hx<__________<\'%s\n' % num # add an ' for the mappings return 'tx<hx<__________<\'%s\n' % num # add an ' for the mappings
def ms_sub_func(self, pre, token, num): def ms_sub_func(self, pre, token, num):
return 'tx<mc<__________<%s\n' % token return 'tx<mc<__________<%s\n' % token
def hardline_func(self, pre, token, num):
return 'mi<tg<empty_____<%s\n' % token
def default_func(self, pre, token, num): def default_func(self, pre, token, num):
if num == None: if num is None:
num = 'true' num = 'true'
return 'cw<%s<%s<nu<%s\n' % (pre, token, num) return 'cw<%s<%s<nu<%s\n' % (pre, token, num)
def __list_type_func(self, pre, token, num): def __list_type_func(self, pre, token, num):
type = 'arabic' type = 'arabic'
if num == None: if num is None:
type = 'Arabic' type = 'Arabic'
else: else:
try: try:
num = int(num) num = int(num)
except ValueError: except ValueError:
if self.__run_level > 3: if self.__run_level > 3:
msg = 'number "%s" cannot be converted to integer\n' % num msg = 'Number "%s" cannot be converted to integer\n' % num
raise self.__bug_handler, msg raise self.__bug_handler, msg
type = self.__number_type_dict.get(num) type = self.__number_type_dict.get(num)
if type == None: if type is None:
if self.__run_level > 3: if self.__run_level > 3:
msg = 'No type for "%s" in self.__number_type_dict\n' msg = 'No type for "%s" in self.__number_type_dict\n'
raise self.__bug_handler raise self.__bug_handler
type = 'Arabic' type = 'Arabic'
return 'cw<%s<%s<nu<%s\n' % (pre, token, type) return 'cw<%s<%s<nu<%s\n' % (pre, token, type)
def __language_func(self, pre, token, num): def __language_func(self, pre, token, num):
lang_name = self.__language_dict.get(int(re.search('[0-9]+', num).group())) lang_name = self.__language_dict.get(int(re.search('[0-9]+', num).group()))
if not lang_name: if not lang_name:
@ -627,31 +640,36 @@ class ProcessTokens:
msg = 'No entry for number "%s"' % num msg = 'No entry for number "%s"' % num
raise self.__bug_handler, msg raise self.__bug_handler, msg
return 'cw<%s<%s<nu<%s\n' % (pre, token, lang_name) return 'cw<%s<%s<nu<%s\n' % (pre, token, lang_name)
def two_part_func(self, pre, token, num): def two_part_func(self, pre, token, num):
list = token.split("<") list = token.split("<")
token = list[0] token = list[0]
num = list[1] num = list[1]
return 'cw<%s<%s<nu<%s\n' % (pre, token, num) return 'cw<%s<%s<nu<%s\n' % (pre, token, num)
##return 'cw<nu<nu<nu<%s>num<%s\n' % (token, num) ##return 'cw<nu<nu<nu<%s>num<%s\n' % (token, num)
def divide_by_2(self, pre, token, num): def divide_by_2(self, pre, token, num):
num = self.divide_num(num, 2) num = self.divide_num(num, 2)
return 'cw<%s<%s<nu<%s\n' % (pre, token, num) return 'cw<%s<%s<nu<%s\n' % (pre, token, num)
##return 'cw<nu<nu<nu<%s>%s<%s\n' % (token, num, token) ##return 'cw<nu<nu<nu<%s>%s<%s\n' % (token, num, token)
def divide_by_20(self, pre, token, num): def divide_by_20(self, pre, token, num):
num = self.divide_num(num, 20) num = self.divide_num(num, 20)
return 'cw<%s<%s<nu<%s\n' % (pre, token, num) return 'cw<%s<%s<nu<%s\n' % (pre, token, num)
##return 'cw<nu<nu<nu<%s>%s<%s\n' % (token, num, token) ##return 'cw<nu<nu<nu<%s>%s<%s\n' % (token, num, token)
def text_func(self, pre, token, num=None): def text_func(self, pre, token, num=None):
return 'tx<nu<__________<%s\n' % token return 'tx<nu<__________<%s\n' % token
def ob_func(self, pre, token, num=None): def ob_func(self, pre, token, num=None):
self.__bracket_count += 1 self.__bracket_count += 1
##return 'ob<%04d\n' % self.__bracket_count
return 'ob<nu<open-brack<%04d\n' % self.__bracket_count return 'ob<nu<open-brack<%04d\n' % self.__bracket_count
def cb_func(self, pre, token, num=None): def cb_func(self, pre, token, num=None):
##line = 'cb<%04d\n' % self.__bracket_count
line = 'cb<nu<clos-brack<%04d\n' % self.__bracket_count line = 'cb<nu<clos-brack<%04d\n' % self.__bracket_count
self.__bracket_count -= 1 self.__bracket_count -= 1
return line return line
def color_func(self, pre, token, num): def color_func(self, pre, token, num):
third_field = 'nu' third_field = 'nu'
if num[-1] == ';': if num[-1] == ';':
@ -662,6 +680,7 @@ class ProcessTokens:
num = "0" + num num = "0" + num
return 'cw<%s<%s<%s<%s\n' % (pre, token, third_field, num) return 'cw<%s<%s<%s<%s\n' % (pre, token, third_field, num)
##return 'cw<cl<%s<nu<nu<%s>%s<%s\n' % (third_field, token, num, token) ##return 'cw<cl<%s<nu<nu<%s>%s<%s\n' % (third_field, token, num, token)
def bool_st_func(self, pre, token, num): def bool_st_func(self, pre, token, num):
if num is None or num == '' or num == '1': if num is None or num == '' or num == '1':
return 'cw<%s<%s<nu<true\n' % (pre, token) return 'cw<%s<%s<nu<true\n' % (pre, token)
@ -670,24 +689,23 @@ class ProcessTokens:
return 'cw<%s<%s<nu<false\n' % (pre, token) return 'cw<%s<%s<nu<false\n' % (pre, token)
##return 'cw<nu<nu<nu<%s>false<%s\n' % (token, token) ##return 'cw<nu<nu<nu<%s>false<%s\n' % (token, token)
else: else:
msg = 'boolean should have some value module process tokens\n' msg = "boolean should have some value module process tokens\ntoken is %s\n'%s'\n" % (token, num)
msg += 'token is ' + token + "\n"
msg += "'" + num + "'" + "\n"
raise self.__bug_handler, msg raise self.__bug_handler, msg
def __no_sup_sub_func(self, pre, token, num): def __no_sup_sub_func(self, pre, token, num):
the_string = 'cw<ci<subscript_<nu<false\n' the_string = 'cw<ci<subscript_<nu<false\n'
the_string += 'cw<ci<superscrip<nu<false\n' the_string += 'cw<ci<superscrip<nu<false\n'
return the_string return the_string
def divide_num(self, numerator, denominator): def divide_num(self, numerator, denominator):
try: try:
numerator = float(re.search('[0-9.]+', numerator).group()) #calibre why ignore negative number? Wrong in case of \fi
numerator = float(re.search('[0-9.\-]+', numerator).group())
except TypeError, msg: except TypeError, msg:
if self.__run_level > 3: if self.__run_level > 3:
msg = 'no number to process?\n' msg = ('No number to process?\nthis indicates that the token \(\\li\) \
msg += 'this indicates that the token ' should have a number and does not\nnumerator is \
msg += ' \(\\li\) should have a number and does not\n' "%s"\ndenominator is "%s"\n') % (numerator, denominator)
msg += 'numerator is "%s"\n' % numerator
msg += 'denominator is "%s"\n' % denominator
raise self.__bug_handler, msg raise self.__bug_handler, msg
if 5 > self.__return_code: if 5 > self.__return_code:
self.__return_code = 5 self.__return_code = 5
@ -698,9 +716,10 @@ class ProcessTokens:
if string_num[-2:] == ".0": if string_num[-2:] == ".0":
string_num = string_num[:-2] string_num = string_num[:-2]
return string_num return string_num
def split_let_num(self, token): def split_let_num(self, token):
match_obj = re.search(self.__num_exp,token) match_obj = re.search(self.__num_exp,token)
if match_obj != None: if match_obj is not None:
first = match_obj.group(1) first = match_obj.group(1)
second = match_obj.group(2) second = match_obj.group(2)
if not second: if not second:
@ -714,6 +733,7 @@ class ProcessTokens:
raise self.__bug_handler raise self.__bug_handler
return token, 0 return token, 0
return first, second return first, second
def convert_to_hex(self,number): def convert_to_hex(self,number):
"""Convert a string to uppercase hexidecimal""" """Convert a string to uppercase hexidecimal"""
num = int(number) num = int(number)
@ -722,6 +742,7 @@ class ProcessTokens:
return hex_num return hex_num
except: except:
raise self.__bug_handler raise self.__bug_handler
def process_cw(self, token): def process_cw(self, token):
"""Change the value of the control word by determining what dictionary """Change the value of the control word by determining what dictionary
it belongs to""" it belongs to"""
@ -737,89 +758,62 @@ class ProcessTokens:
pre, token, action = self.dict_token.get(token, (None, None, None)) pre, token, action = self.dict_token.get(token, (None, None, None))
if action: if action:
return action(pre, token, num) return action(pre, token, num)
# unused function
def initiate_token_actions(self):
self.action_for_token={
'{' : self.ob_func,
'}' : self.cb_func,
'\\' : self.process_cw,
}
# unused function
def evaluate_token(self,token):
"""Evaluate tokens. Return a value if the token is not a
control word. Otherwise, pass token onto another method
for further evaluation."""
token, action = self.dict_token.get(token[0:1])
if action:
line = action(token)
return line
else :
return 'tx<nu<nu<nu<nu<%s\n' % token
def __check_brackets(self, in_file): def __check_brackets(self, in_file):
self.__check_brack_obj = check_brackets.CheckBrackets\ self.__check_brack_obj = check_brackets.CheckBrackets\
(file = in_file) (file = in_file)
good_br = self.__check_brack_obj.check_brackets()[0] good_br = self.__check_brack_obj.check_brackets()[0]
if not good_br: if not good_br:
return 1 return 1
def process_tokens(self): def process_tokens(self):
"""Main method for handling other methods. """ """Main method for handling other methods. """
first_token = 0
second_token = 0
read_obj = open(self.__file, 'r')
write_obj = open(self.__write_to, 'w')
line_to_read = "dummy"
line_count = 0 line_count = 0
while line_to_read: with open(self.__file, 'r') as read_obj:
line_to_read = read_obj.readline() with open(self.__write_to, 'wb') as write_obj:
token = line_to_read for line in read_obj:
token = token.replace("\n","") token = line.replace("\n","")
if not token: line_count += 1
continue if line_count == 1 and token != '\\{':
line_count += 1 msg = 'Invalid RTF: document doesn\'t start with {\n'
try: raise self.__exception_handler, msg
token.decode('us-ascii') elif line_count == 2 and token[0:4] != '\\rtf':
except UnicodeError, msg: msg = 'Invalid RTF: document doesn\'t start with \\rtf \n'
msg = str(msg) raise self.__exception_handler, msg
msg += 'Invalid RTF: File not ascii encoded.\n'
raise self.__exception_handler, msg the_index = token.find('\\ ')
if not first_token: if token is not None and the_index > -1:
if token != '\\{': msg = 'Invalid RTF: token "\\ " not valid.\n'
msg = 'Invalid RTF: document doesn\'t start with {\n' raise self.__exception_handler, msg
raise self.__exception_handler, msg elif token[:1] == "\\":
first_token = 1 try:
elif first_token and not second_token: token.decode('us-ascii')
if token[0:4] != '\\rtf': except UnicodeError, msg:
msg ='Invalid RTF: document doesn\'t start with \\rtf \n' msg = 'Invalid RTF: Tokens not ascii encoded.\n%s' % str(msg)
raise self.__exception_handler, msg raise self.__exception_handler, msg
second_token = 1 line = self.process_cw(token)
##token = self.evaluate_token(token) if line is not None:
the_index = token.find('\\ ') write_obj.write(line)
if token != None and the_index > -1:
msg ='Invalid RTF: token "\\ " not valid. \n'
raise self.__exception_handler, msg
elif token[0:1] == "\\":
line = self.process_cw(token)
if line != None:
write_obj.write(line)
else:
fields = re.split(self.__utf_exp, token)
for field in fields:
if not field:
continue
if field[0:1] == '&':
write_obj.write('tx<ut<__________<%s\n' % field)
else: else:
write_obj.write('tx<nu<__________<%s\n' % field) fields = re.split(self.__utf_exp, token)
read_obj.close() for field in fields:
write_obj.close() if not field:
continue
if field[0:1] == '&':
write_obj.write('tx<ut<__________<%s\n' % field)
else:
write_obj.write('tx<nu<__________<%s\n' % field)
if not line_count: if not line_count:
msg ='Invalid RTF: file appears to be empty. \n' msg = 'Invalid RTF: file appears to be empty.\n'
raise self.__exception_handler, msg raise self.__exception_handler, msg
copy_obj = copy.Copy(bug_handler = self.__bug_handler) copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy: if self.__copy:
copy_obj.copy_file(self.__write_to, "processed_tokens.data") copy_obj.copy_file(self.__write_to, "processed_tokens.data")
copy_obj.rename(self.__write_to, self.__file) copy_obj.rename(self.__write_to, self.__file)
os.remove(self.__write_to) os.remove(self.__write_to)
bad_brackets = self.__check_brackets(self.__file) bad_brackets = self.__check_brackets(self.__file)
if bad_brackets: if bad_brackets:
msg = 'Invalid RTF: document does not have matching brackets.\n' msg = 'Invalid RTF: document does not have matching brackets.\n'

View File

@ -16,7 +16,10 @@
# # # #
######################################################################### #########################################################################
import os, tempfile import os, tempfile
from calibre.ebooks.rtf2xml import copy from calibre.ebooks.rtf2xml import copy
from calibre.utils.cleantext import clean_ascii_chars
class ReplaceIllegals: class ReplaceIllegals:
""" """
reaplace illegal lower ascii characters reaplace illegal lower ascii characters
@ -30,21 +33,14 @@ class ReplaceIllegals:
self.__copy = copy self.__copy = copy
self.__run_level = run_level self.__run_level = run_level
self.__write_to = tempfile.mktemp() self.__write_to = tempfile.mktemp()
def replace_illegals(self): def replace_illegals(self):
""" """
""" """
nums = [0, 1, 2, 3, 4, 5, 6, 7, 8, 11, 13, 14, 15, 16, 17, 18, 19] with open(self.__file, 'r') as read_obj:
read_obj = open(self.__file, 'r') with open(self.__write_to, 'w') as write_obj:
write_obj = open(self.__write_to, 'w') for line in read_obj:
line_to_read = 1 write_obj.write(clean_ascii_chars(line))
while line_to_read:
line_to_read = read_obj.readline()
line = line_to_read
for num in nums:
line = line.replace(chr(num), '')
write_obj.write(line)
read_obj.close()
write_obj.close()
copy_obj = copy.Copy() copy_obj = copy.Copy()
if self.__copy: if self.__copy:
copy_obj.copy_file(self.__write_to, "replace_illegals.data") copy_obj.copy_file(self.__write_to, "replace_illegals.data")

View File

@ -16,7 +16,10 @@
# # # #
######################################################################### #########################################################################
import os, re, tempfile import os, re, tempfile
from calibre.ebooks.rtf2xml import copy from calibre.ebooks.rtf2xml import copy
from calibre.utils.mreplace import MReplace
class Tokenize: class Tokenize:
"""Tokenize RTF into one line per field. Each line will contain information useful for the rest of the script""" """Tokenize RTF into one line per field. Each line will contain information useful for the rest of the script"""
def __init__(self, def __init__(self,
@ -28,89 +31,175 @@ class Tokenize:
self.__file = in_file self.__file = in_file
self.__bug_handler = bug_handler self.__bug_handler = bug_handler
self.__copy = copy self.__copy = copy
self.__special_tokens = [ '_', '~', "'", '{', '}' ]
self.__write_to = tempfile.mktemp() self.__write_to = tempfile.mktemp()
def __from_ms_to_utf8(self,match_obj):
uni_char = int(match_obj.group(1))
if uni_char < 0:
uni_char += 65536
return '&#x' + str('%X' % uni_char) + ';'
def __neg_unicode_func(self, match_obj):
neg_uni_char = int(match_obj.group(1)) * -1
# sys.stderr.write(str( neg_uni_char))
uni_char = neg_uni_char + 65536
return '&#x' + str('%X' % uni_char) + ';'
def __sub_line_reg(self,line):
line = line.replace("\\\\", "\\backslash ")
line = line.replace("\\~", "\\~ ")
line = line.replace("\\;", "\\; ")
line = line.replace("&", "&amp;")
line = line.replace("<", "&lt;")
line = line.replace(">", "&gt;")
line = line.replace("\\~", "\\~ ")
line = line.replace("\\_", "\\_ ")
line = line.replace("\\:", "\\: ")
line = line.replace("\\-", "\\- ")
# turn into a generic token to eliminate special
# cases and make processing easier
line = line.replace("\\{", "\\ob ")
# turn into a generic token to eliminate special
# cases and make processing easier
line = line.replace("\\}", "\\cb ")
# put a backslash in front of to eliminate special cases and
# make processing easier
line = line.replace("{", "\\{")
# put a backslash in front of to eliminate special cases and
# make processing easier
line = line.replace("}", "\\}")
line = re.sub(self.__utf_exp, self.__from_ms_to_utf8, line)
# line = re.sub( self.__neg_utf_exp, self.__neg_unicode_func, line)
line = re.sub(self.__ms_hex_exp, "\\mshex0\g<1> ", line)
##line = line.replace("\\backslash", "\\\\")
# this is for older RTF
line = re.sub(self.__par_exp, '\\par ', line)
return line
def __compile_expressions(self):
self.__ms_hex_exp = re.compile(r"\\\'(..)")
self.__utf_exp = re.compile(r"\\u(-?\d{3,6}) {0,1}")
self.__splitexp = re.compile(r"(\\[\\{}]|{|}|\\[^\s\\{}&]+(?:\s)?)")
self.__par_exp = re.compile(r'\\$')
self.__mixed_exp = re.compile(r"(\\[a-zA-Z]+\d+)(\D+)")
##self.num_exp = re.compile(r"(\*|:|[a-zA-Z]+)(.*)")
def __create_tokens(self):
self.__compile_expressions() self.__compile_expressions()
read_obj = open(self.__file, 'r') #variables
write_obj = open(self.__write_to, 'w') self.__uc_char = 0
line_to_read = "dummy" self.__uc_bin = False
while line_to_read: self.__uc_value = [1]
line_to_read = read_obj.readline()
line = line_to_read def __reini_utf8_counters(self):
line = line.replace("\n", "") self.__uc_char = 0
line = self.__sub_line_reg(line) self.__uc_bin = False
tokens = re.split(self.__splitexp, line)
##print tokens def __remove_uc_chars(self, startchar, token):
for token in tokens: for i in xrange(startchar, len(token)):
if token != "": if token[i] == " ":
write_obj.write(token + "\n") continue
""" elif self.__uc_char:
match_obj = re.search(self.__mixed_exp, token) self.__uc_char -= 1
if match_obj != None: else:
first = match_obj.group(1) return token[i:]
second = match_obj.group(2) #if only " " and char to skip
write_obj.write(first + "\n") return ''
write_obj.write(second + "\n")
else: def __unicode_process(self, token):
write_obj.write(token + "\n") #change scope in
""" if token == '\{':
read_obj.close() self.__uc_value.append(self.__uc_value[-1])
write_obj.close() #basic error handling
self.__reini_utf8_counters()
return token
#change scope out
elif token == '\}':
self.__uc_value.pop()
self.__reini_utf8_counters()
return token
#add a uc control
elif token[:3] == '\uc':
self.__uc_value[-1] = int(token[3:])
self.__reini_utf8_counters()
return token
#bin data to slip
elif self.__uc_bin:
self.__uc_bin = False
return ''
#uc char to remove
elif self.__uc_char:
#handle \bin tag in case of uc char to skip
if token[:4] == '\bin':
self.__uc_char -=1
self.__uc_bin = True
return ''
elif token[:1] == "\\" :
self.__uc_char -=1
return ''
else:
return self.__remove_uc_chars(0, token)
#go for real \u token
match_obj = self.__utf_exp.match(token)
if match_obj is not None:
self.__reini_utf8_counters()
#get value and handle negative case
uni_char = int(match_obj.group(1))
uni_len = len(match_obj.group(1)) + 2
if uni_char < 0:
uni_char += 65536
uni_char = unichr(uni_char).encode('ascii', 'xmlcharrefreplace')
self.__uc_char = self.__uc_value[-1]
#there is only an unicode char
if len(token)<= uni_len:
return uni_char
#an unicode char and something else
#must be after as it is splited on \
#necessary? maybe for \bin?
elif not self.__uc_char:
return uni_char + token[uni_len:]
#if not uc0 and chars
else:
return uni_char + self.__remove_uc_chars(uni_len, token)
#default
return token
def __sub_reg_split(self,input_file):
input_file = self.__replace_spchar.mreplace(input_file)
input_file = self.__ms_hex_exp.sub("\\mshex0\g<1> ", input_file)
input_file = self.__utf_ud.sub("\\{\\uc0 \g<1>\\}", input_file)
#remove \n in bin data
input_file = self.__bin_exp.sub(lambda x: \
x.group().replace('\n', '') + '\n', input_file)
#split
tokens = re.split(self.__splitexp, input_file)
#remove empty tokens and \n
return filter(lambda x: len(x) > 0 and x != '\n', tokens)
#input_file = re.sub(self.__utf_exp, self.__from_ms_to_utf8, input_file)
# line = re.sub( self.__neg_utf_exp, self.__neg_unicode_func, line)
# this is for older RTF
#line = re.sub(self.__par_exp, '\\par ', line)
#return filter(lambda x: len(x) > 0, \
#(self.__remove_line.sub('', x) for x in tokens))
def __compile_expressions(self):
SIMPLE_RPL = {
"\\\\": "\\backslash ",
"\\~": "\\~ ",
"\\;": "\\; ",
"&": "&amp;",
"<": "&lt;",
">": "&gt;",
"\\~": "\\~ ",
"\\_": "\\_ ",
"\\:": "\\: ",
"\\-": "\\- ",
# turn into a generic token to eliminate special
# cases and make processing easier
"\\{": "\\ob ",
# turn into a generic token to eliminate special
# cases and make processing easier
"\\}": "\\cb ",
# put a backslash in front of to eliminate special cases and
# make processing easier
"{": "\\{",
# put a backslash in front of to eliminate special cases and
# make processing easier
"}": "\\}",
# this is for older RTF
r'\\$': '\\par ',
}
self.__replace_spchar = MReplace(SIMPLE_RPL)
#add ;? in case of char following \u
self.__ms_hex_exp = re.compile(r"\\\'([0-9a-fA-F]{2})") #r"\\\'(..)"
self.__utf_exp = re.compile(r"\\u(-?\d{3,6}) ?")
self.__bin_exp = re.compile(r"(?:\\bin(-?\d{0,10})[\n ]+)[01\n]+")
#manage upr/ud situations
self.__utf_ud = re.compile(r"\\{[\n ]?\\upr[\n ]?(?:\\{.*?\\})[\n ]?" + \
r"\\{[\n ]?\\*[\n ]?\\ud[\n ]?(\\{.*?\\})[\n ]?\\}[\n ]?\\}")
#add \n in split for whole file reading
#why keep backslash whereas \is replaced before?
#remove \n from endline char
self.__splitexp = re.compile(r"(\\[{}]|\n|\\[^\s\\{}&]+(?:[ \t\r\f\v])?)")
#self.__bin_exp = re.compile(r"\\bin(-?\d{1,8}) {0,1}")
#self.__utf_exp = re.compile(r"^\\u(-?\d{3,6})")
#self.__splitexp = re.compile(r"(\\[\\{}]|{|}|\n|\\[^\s\\{}&]+(?:\s)?)")
#self.__par_exp = re.compile(r'\\$')
#self.__remove_line = re.compile(r'\n+')
#self.__mixed_exp = re.compile(r"(\\[a-zA-Z]+\d+)(\D+)")
##self.num_exp = re.compile(r"(\*|:|[a-zA-Z]+)(.*)")
def tokenize(self): def tokenize(self):
"""Main class for handling other methods. Reads in one line \ """Main class for handling other methods. Reads the file \
at a time, usues method self.sub_line to make basic substitutions,\ , uses method self.sub_reg to make basic substitutions,\
uses ? to process tokens""" and process tokens by itself"""
self.__create_tokens() #read
with open(self.__file, 'r') as read_obj:
input_file = read_obj.read()
#process simple replacements and split giving us a correct list
#remove '' and \n in the process
tokens = self.__sub_reg_split(input_file)
#correct unicode
tokens = map(self.__unicode_process, tokens)
#remove empty items created by removing \uc
tokens = filter(lambda x: len(x) > 0, tokens)
#write
with open(self.__write_to, 'wb') as write_obj:
write_obj.write('\n'.join(tokens))
#Move and copy
copy_obj = copy.Copy(bug_handler = self.__bug_handler) copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy: if self.__copy:
copy_obj.copy_file(self.__write_to, "tokenize.data") copy_obj.copy_file(self.__write_to, "tokenize.data")
copy_obj.rename(self.__write_to, self.__file) copy_obj.rename(self.__write_to, self.__file)
os.remove(self.__write_to) os.remove(self.__write_to)
#self.__special_tokens = [ '_', '~', "'", '{', '}' ]

View File

@ -1,3 +1,6 @@
from functions import textile, textile_restricted, Textile from functions import textile, textile_restricted, Textile
if False:
textile, textile_restricted, Textile
__all__ = ['textile', 'textile_restricted'] __all__ = ['textile', 'textile_restricted']

View File

@ -425,7 +425,7 @@ class Textile(object):
text = text.split('\n\n') text = text.split('\n\n')
tag = 'p' tag = 'p'
atts = cite = graf = ext = '' atts = cite = graf = ext = c1 = ''
out = [] out = []

View File

@ -1,4 +1,8 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
''' '''
Read content from txt file. Read content from txt file.
@ -7,15 +11,10 @@ Read content from txt file.
import os, re import os, re
from calibre import prepare_string_for_xml, isbytestring from calibre import prepare_string_for_xml, isbytestring
from calibre.ebooks.markdown import markdown
from calibre.ebooks.textile import textile
from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.txt.heuristicprocessor import TXTHeuristicProcessor from calibre.ebooks.txt.heuristicprocessor import TXTHeuristicProcessor
from calibre.ebooks.conversion.preprocess import DocAnalysis from calibre.ebooks.conversion.preprocess import DocAnalysis
from calibre.utils.cleantext import clean_ascii_chars
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
HTML_TEMPLATE = u'<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s</title></head><body>\n%s\n</body></html>' HTML_TEMPLATE = u'<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s</title></head><body>\n%s\n</body></html>'
@ -35,9 +34,7 @@ def clean_txt(txt):
# Remove excessive line breaks. # Remove excessive line breaks.
txt = re.sub('\n{3,}', '\n\n', txt) txt = re.sub('\n{3,}', '\n\n', txt)
#remove ASCII invalid chars : 0 to 8 and 11-14 to 24 #remove ASCII invalid chars : 0 to 8 and 11-14 to 24
chars = list(range(8)) + [0x0B, 0x0E, 0x0F] + list(range(0x10, 0x19)) txt = clean_ascii_chars(txt)
illegal_chars = re.compile(u'|'.join(map(unichr, chars)))
txt = illegal_chars.sub('', txt)
return txt return txt
@ -75,6 +72,7 @@ def convert_heuristic(txt, title='', epub_split_size_kb=0):
return tp.convert(txt, title, epub_split_size_kb) return tp.convert(txt, title, epub_split_size_kb)
def convert_markdown(txt, title='', disable_toc=False): def convert_markdown(txt, title='', disable_toc=False):
from calibre.ebooks.markdown import markdown
md = markdown.Markdown( md = markdown.Markdown(
extensions=['footnotes', 'tables', 'toc'], extensions=['footnotes', 'tables', 'toc'],
extension_configs={"toc": {"disable_toc": disable_toc}}, extension_configs={"toc": {"disable_toc": disable_toc}},
@ -82,6 +80,7 @@ def convert_markdown(txt, title='', disable_toc=False):
return HTML_TEMPLATE % (title, md.convert(txt)) return HTML_TEMPLATE % (title, md.convert(txt))
def convert_textile(txt, title=''): def convert_textile(txt, title=''):
from calibre.ebooks.textile import textile
html = textile(txt, encoding='utf-8') html = textile(txt, encoding='utf-8')
return HTML_TEMPLATE % (title, html) return HTML_TEMPLATE % (title, html)

View File

@ -269,10 +269,14 @@ def question_dialog(parent, title, msg, det_msg='', show_copy_button=True,
return d.exec_() == yes_button return d.exec_() == yes_button
def info_dialog(parent, title, msg, det_msg='', show=False): def info_dialog(parent, title, msg, det_msg='', show=False,
show_copy_button=True):
d = MessageBox(QMessageBox.Information, title, msg, QMessageBox.Ok, d = MessageBox(QMessageBox.Information, title, msg, QMessageBox.Ok,
parent, det_msg) parent, det_msg)
d.setIconPixmap(QPixmap(I('dialog_information.png'))) d.setIconPixmap(QPixmap(I('dialog_information.png')))
if not show_copy_button:
d.cb.setVisible(False)
if show: if show:
return d.exec_() return d.exec_()
return d return d

View File

@ -27,14 +27,17 @@ class PluginWidget(QWidget, Ui_Form):
def __init__(self, parent=None): def __init__(self, parent=None):
QWidget.__init__(self, parent) QWidget.__init__(self, parent)
self.setupUi(self) self.setupUi(self)
from calibre.library.catalog import FIELDS
self.all_fields = []
for x in FIELDS :
if x != 'all':
self.all_fields.append(x)
QListWidgetItem(x, self.db_fields)
def initialize(self, name, db): #not working properly to update def initialize(self, name, db): #not working properly to update
from calibre.library.catalog import FIELDS
self.all_fields = [x for x in FIELDS if x != 'all']
#add custom columns
self.all_fields.extend([x for x in sorted(db.custom_field_keys())])
#populate
for x in self.all_fields:
QListWidgetItem(x, self.db_fields)
self.name = name self.name = name
fields = gprefs.get(name+'_db_fields', self.all_fields) fields = gprefs.get(name+'_db_fields', self.all_fields)
# Restore the activated db_fields from last use # Restore the activated db_fields from last use

View File

@ -0,0 +1,21 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
__license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from PyQt4.Qt import QDialog
from calibre.gui2.dialogs.drm_error_ui import Ui_Dialog
class DRMErrorMessage(QDialog, Ui_Dialog):
def __init__(self, parent=None, title=None):
QDialog.__init__(self, parent)
self.setupUi(self)
if title is not None:
t = unicode(self.msg.text())
self.msg.setText('<h2>%s</h2>%s'%(title, t))
self.resize(self.sizeHint())

View File

@ -0,0 +1,102 @@
<?xml version="1.0" encoding="UTF-8"?>
<ui version="4.0">
<class>Dialog</class>
<widget class="QDialog" name="Dialog">
<property name="geometry">
<rect>
<x>0</x>
<y>0</y>
<width>417</width>
<height>235</height>
</rect>
</property>
<property name="windowTitle">
<string>This book is DRMed</string>
</property>
<layout class="QGridLayout" name="gridLayout">
<item row="0" column="0">
<widget class="QLabel" name="label">
<property name="sizePolicy">
<sizepolicy hsizetype="Preferred" vsizetype="Preferred">
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="maximumSize">
<size>
<width>132</width>
<height>16777215</height>
</size>
</property>
<property name="text">
<string/>
</property>
<property name="pixmap">
<pixmap resource="../../../../resources/images.qrc">:/images/document-encrypt.png</pixmap>
</property>
</widget>
</item>
<item row="0" column="1">
<widget class="QLabel" name="msg">
<property name="text">
<string>&lt;p&gt;This book is locked by &lt;b&gt;DRM&lt;/b&gt;. To learn more about DRM and why you cannot read or convert this book in calibre,
&lt;a href=&quot;http://bugs.calibre-ebook.com/wiki/DRM&quot;&gt;click here&lt;/a&gt;.</string>
</property>
<property name="wordWrap">
<bool>true</bool>
</property>
<property name="openExternalLinks">
<bool>true</bool>
</property>
</widget>
</item>
<item row="1" column="0" colspan="2">
<widget class="QDialogButtonBox" name="buttonBox">
<property name="orientation">
<enum>Qt::Horizontal</enum>
</property>
<property name="standardButtons">
<set>QDialogButtonBox::Close</set>
</property>
</widget>
</item>
</layout>
</widget>
<resources>
<include location="../../../../resources/images.qrc"/>
</resources>
<connections>
<connection>
<sender>buttonBox</sender>
<signal>accepted()</signal>
<receiver>Dialog</receiver>
<slot>accept()</slot>
<hints>
<hint type="sourcelabel">
<x>248</x>
<y>254</y>
</hint>
<hint type="destinationlabel">
<x>157</x>
<y>274</y>
</hint>
</hints>
</connection>
<connection>
<sender>buttonBox</sender>
<signal>rejected()</signal>
<receiver>Dialog</receiver>
<slot>reject()</slot>
<hints>
<hint type="sourcelabel">
<x>316</x>
<y>260</y>
</hint>
<hint type="destinationlabel">
<x>286</x>
<y>274</y>
</hint>
</hints>
</connection>
</connections>
</ui>

View File

@ -15,7 +15,7 @@ from calibre.ebooks.metadata import string_to_authors, authors_to_string
from calibre.ebooks.metadata.book.base import composite_formatter from calibre.ebooks.metadata.book.base import composite_formatter
from calibre.ebooks.metadata.meta import get_metadata from calibre.ebooks.metadata.meta import get_metadata
from calibre.gui2.custom_column_widgets import populate_metadata_page from calibre.gui2.custom_column_widgets import populate_metadata_page
from calibre.gui2 import error_dialog from calibre.gui2 import error_dialog, ResizableDialog
from calibre.gui2.progress_indicator import ProgressIndicator from calibre.gui2.progress_indicator import ProgressIndicator
from calibre.utils.config import dynamic from calibre.utils.config import dynamic
from calibre.utils.titlecase import titlecase from calibre.utils.titlecase import titlecase
@ -49,7 +49,7 @@ def get_cover_data(path):
class MyBlockingBusy(QDialog): class MyBlockingBusy(QDialog): # {{{
do_one_signal = pyqtSignal() do_one_signal = pyqtSignal()
@ -241,8 +241,9 @@ class MyBlockingBusy(QDialog):
self.current_index += 1 self.current_index += 1
self.do_one_signal.emit() self.do_one_signal.emit()
# }}}
class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog): class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):
s_r_functions = { '' : lambda x: x, s_r_functions = { '' : lambda x: x,
_('Lower Case') : lambda x: icu_lower(x), _('Lower Case') : lambda x: icu_lower(x),
@ -261,9 +262,8 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
] ]
def __init__(self, window, rows, model, tab): def __init__(self, window, rows, model, tab):
QDialog.__init__(self, window) ResizableDialog.__init__(self, window)
Ui_MetadataBulkDialog.__init__(self) Ui_MetadataBulkDialog.__init__(self)
self.setupUi(self)
self.model = model self.model = model
self.db = model.db self.db = model.db
self.ids = [self.db.id(r) for r in rows] self.ids = [self.db.id(r) for r in rows]

File diff suppressed because it is too large Load Diff

View File

@ -823,7 +823,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
if book.series_index is not None: if book.series_index is not None:
self.series_index.setValue(book.series_index) self.series_index.setValue(book.series_index)
if book.has_cover: if book.has_cover:
if d.opt_auto_download_cover.isChecked() and book.has_cover: if d.opt_auto_download_cover.isChecked():
self.fetch_cover() self.fetch_cover()
else: else:
self.fetch_cover_button.setFocus(Qt.OtherFocusReason) self.fetch_cover_button.setFocus(Qt.OtherFocusReason)

View File

@ -4,7 +4,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import time, os import time, os
from PyQt4.Qt import SIGNAL, QUrl, QAbstractListModel, Qt, \ from PyQt4.Qt import SIGNAL, QUrl, QAbstractListModel, Qt, \
QVariant, QInputDialog QVariant
from calibre.web.feeds.recipes import compile_recipe from calibre.web.feeds.recipes import compile_recipe
from calibre.web.feeds.news import AutomaticNewsRecipe from calibre.web.feeds.news import AutomaticNewsRecipe
@ -256,24 +256,61 @@ class %(classname)s(%(base_class)s):
def add_builtin_recipe(self): def add_builtin_recipe(self):
from calibre.web.feeds.recipes.collection import \ from calibre.web.feeds.recipes.collection import \
get_builtin_recipe_by_title, get_builtin_recipe_titles get_builtin_recipe_collection, get_builtin_recipe_by_id
items = sorted(get_builtin_recipe_titles(), key=sort_key) from PyQt4.Qt import QDialog, QVBoxLayout, QListWidgetItem, \
QListWidget, QDialogButtonBox, QSize
d = QDialog(self)
d.l = QVBoxLayout()
d.setLayout(d.l)
d.list = QListWidget(d)
d.list.doubleClicked.connect(lambda x: d.accept())
d.l.addWidget(d.list)
d.bb = QDialogButtonBox(QDialogButtonBox.Ok|QDialogButtonBox.Cancel,
Qt.Horizontal, d)
d.bb.accepted.connect(d.accept)
d.bb.rejected.connect(d.reject)
d.l.addWidget(d.bb)
d.setWindowTitle(_('Choose builtin recipe'))
items = []
for r in get_builtin_recipe_collection():
id_ = r.get('id', '')
title = r.get('title', '')
lang = r.get('language', '')
if id_ and title:
items.append((title + ' [%s]'%lang, id_))
title, ok = QInputDialog.getItem(self, _('Pick recipe'), _('Pick the recipe to customize'), items.sort(key=lambda x:sort_key(x[0]))
items, 0, False) for title, id_ in items:
if ok: item = QListWidgetItem(title)
title = unicode(title) item.setData(Qt.UserRole, id_)
profile = get_builtin_recipe_by_title(title) d.list.addItem(item)
if self._model.has_title(title):
if question_dialog(self, _('Replace recipe?'), d.resize(QSize(450, 400))
_('A custom recipe named %s already exists. Do you want to ' ret = d.exec_()
'replace it?')%title): d.list.doubleClicked.disconnect()
self._model.replace_by_title(title, profile) if ret != d.Accepted:
else: return
return
items = list(d.list.selectedItems())
if not items:
return
item = items[-1]
id_ = unicode(item.data(Qt.UserRole).toString())
title = unicode(item.data(Qt.DisplayRole).toString()).rpartition(' [')[0]
profile = get_builtin_recipe_by_id(id_)
if profile is None:
raise Exception('Something weird happened')
if self._model.has_title(title):
if question_dialog(self, _('Replace recipe?'),
_('A custom recipe named %s already exists. Do you want to '
'replace it?')%title):
self._model.replace_by_title(title, profile)
else: else:
self.model.add(title, profile) return
else:
self.model.add(title, profile)
self.clear() self.clear()

View File

@ -8,9 +8,9 @@ __docformat__ = 'restructuredtext en'
from functools import partial from functools import partial
from PyQt4.Qt import QIcon, Qt, QWidget, QToolBar, QSize, \ from PyQt4.Qt import QIcon, Qt, QWidget, QToolBar, QSize, \
pyqtSignal, QToolButton, QPushButton, \ pyqtSignal, QToolButton, QMenu, QCheckBox, \
QObject, QVBoxLayout, QSizePolicy, QLabel, QHBoxLayout, QActionGroup, \ QObject, QVBoxLayout, QSizePolicy, QLabel, QHBoxLayout, QActionGroup
QMenu
from calibre.constants import __appname__ from calibre.constants import __appname__
from calibre.gui2.search_box import SearchBox2, SavedSearchBox from calibre.gui2.search_box import SearchBox2, SavedSearchBox
@ -178,7 +178,9 @@ class SearchBar(QWidget): # {{{
x.setToolTip(_("<p>Search the list of books by title, author, publisher, tags, comments, etc.<br><br>Words separated by spaces are ANDed")) x.setToolTip(_("<p>Search the list of books by title, author, publisher, tags, comments, etc.<br><br>Words separated by spaces are ANDed"))
l.addWidget(x) l.addWidget(x)
self.search_button = QPushButton(_('&Go!')) self.search_button = QToolButton()
self.search_button.setToolButtonStyle(Qt.ToolButtonTextOnly)
self.search_button.setText(_('&Go!'))
l.addWidget(self.search_button) l.addWidget(self.search_button)
self.search_button.setSizePolicy(QSizePolicy.Minimum, self.search_button.setSizePolicy(QSizePolicy.Minimum,
QSizePolicy.Minimum) QSizePolicy.Minimum)
@ -192,6 +194,12 @@ class SearchBar(QWidget): # {{{
l.addWidget(x) l.addWidget(x)
x.setToolTip(_("Reset Quick Search")) x.setToolTip(_("Reset Quick Search"))
x = parent.search_highlight_only = QCheckBox()
x.setText(_('&Highlight'))
x.setToolTip(_('Highlight matched books in the book list, instead '
'of restricting the book list to the matches.'))
l.addWidget(x)
x = parent.saved_search = SavedSearchBox(self) x = parent.saved_search = SavedSearchBox(self)
x.setMaximumSize(QSize(150, 16777215)) x.setMaximumSize(QSize(150, 16777215))
x.setMinimumContentsLength(15) x.setMinimumContentsLength(15)

View File

@ -10,7 +10,7 @@ from contextlib import closing
from operator import attrgetter from operator import attrgetter
from PyQt4.Qt import QAbstractTableModel, Qt, pyqtSignal, QIcon, QImage, \ from PyQt4.Qt import QAbstractTableModel, Qt, pyqtSignal, QIcon, QImage, \
QModelIndex, QVariant, QDate QModelIndex, QVariant, QDate, QColor
from calibre.gui2 import NONE, config, UNDEFINED_QDATE from calibre.gui2 import NONE, config, UNDEFINED_QDATE
from calibre.utils.pyparsing import ParseException from calibre.utils.pyparsing import ParseException
@ -93,6 +93,9 @@ class BooksModel(QAbstractTableModel): # {{{
self.bool_no_icon = QIcon(I('list_remove.png')) self.bool_no_icon = QIcon(I('list_remove.png'))
self.bool_blank_icon = QIcon(I('blank.png')) self.bool_blank_icon = QIcon(I('blank.png'))
self.device_connected = False self.device_connected = False
self.rows_matching = set()
self.lowest_row_matching = None
self.highlight_only = False
self.read_config() self.read_config()
def change_alignment(self, colname, alignment): def change_alignment(self, colname, alignment):
@ -229,9 +232,27 @@ class BooksModel(QAbstractTableModel): # {{{
self.endInsertRows() self.endInsertRows()
self.count_changed() self.count_changed()
def set_highlight_only(self, toWhat):
self.highlight_only = toWhat
if self.last_search:
self.research()
def search(self, text, reset=True): def search(self, text, reset=True):
try: try:
self.db.search(text) if self.highlight_only:
self.db.search('')
if not text:
self.rows_matching = set()
self.lowest_row_matching = None
else:
self.rows_matching = self.db.search(text, return_matches=True)
if self.rows_matching:
self.lowest_row_matching = self.db.row(self.rows_matching[0])
self.rows_matching = set(self.rows_matching)
else:
self.rows_matching = set()
self.lowest_row_matching = None
self.db.search(text)
except ParseException as e: except ParseException as e:
self.searched.emit(e.msg) self.searched.emit(e.msg)
return return
@ -337,8 +358,9 @@ class BooksModel(QAbstractTableModel): # {{{
name, val = mi.format_field(key) name, val = mi.format_field(key)
if mi.metadata_for_field(key)['datatype'] == 'comments': if mi.metadata_for_field(key)['datatype'] == 'comments':
name += ':html' name += ':html'
if val: if val and name not in data:
data[name] = val data[name] = val
return data return data
@ -651,6 +673,9 @@ class BooksModel(QAbstractTableModel): # {{{
return NONE return NONE
if role in (Qt.DisplayRole, Qt.EditRole): if role in (Qt.DisplayRole, Qt.EditRole):
return self.column_to_dc_map[col](index.row()) return self.column_to_dc_map[col](index.row())
elif role == Qt.BackgroundColorRole:
if self.id(index) in self.rows_matching:
return QColor('lightgreen')
elif role == Qt.DecorationRole: elif role == Qt.DecorationRole:
if self.column_to_dc_decorator_map[col] is not None: if self.column_to_dc_decorator_map[col] is not None:
return self.column_to_dc_decorator_map[index.column()](index.row()) return self.column_to_dc_decorator_map[index.column()](index.row())

View File

@ -680,8 +680,14 @@ class BooksView(QTableView): # {{{
def set_editable(self, editable, supports_backloading): def set_editable(self, editable, supports_backloading):
self._model.set_editable(editable) self._model.set_editable(editable)
def search_proxy(self, txt):
self._model.search(txt)
if self._model.lowest_row_matching is not None:
self.select_rows([self._model.lowest_row_matching], using_ids=False)
self.setFocus(Qt.OtherFocusReason)
def connect_to_search_box(self, sb, search_done): def connect_to_search_box(self, sb, search_done):
sb.search.connect(self._model.search) sb.search.connect(self.search_proxy)
self._search_done = search_done self._search_done = search_done
self._model.searched.connect(self.search_done) self._model.searched.connect(self.search_done)

View File

@ -15,7 +15,8 @@ from calibre.gui2.preferences.plugins_ui import Ui_Form
from calibre.customize.ui import initialized_plugins, is_disabled, enable_plugin, \ from calibre.customize.ui import initialized_plugins, is_disabled, enable_plugin, \
disable_plugin, plugin_customization, add_plugin, \ disable_plugin, plugin_customization, add_plugin, \
remove_plugin remove_plugin
from calibre.gui2 import NONE, error_dialog, info_dialog, choose_files from calibre.gui2 import NONE, error_dialog, info_dialog, choose_files, \
question_dialog
class PluginModel(QAbstractItemModel): # {{{ class PluginModel(QAbstractItemModel): # {{{
@ -76,6 +77,16 @@ class PluginModel(QAbstractItemModel): # {{{
return self.index(j, 0, parent) return self.index(j, 0, parent)
return QModelIndex() return QModelIndex()
def plugin_to_index_by_properties(self, plugin):
for i, category in enumerate(self.categories):
parent = self.index(i, 0, QModelIndex())
for j, p in enumerate(self._data[category]):
if plugin.name == p.name and plugin.type == p.type and \
plugin.author == p.author and plugin.version == p.version:
return self.index(j, 0, parent)
return QModelIndex()
def refresh_plugin(self, plugin, rescan=False): def refresh_plugin(self, plugin, rescan=False):
if rescan: if rescan:
self.populate() self.populate()
@ -132,7 +143,6 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
self.toggle_plugin_button.clicked.connect(self.toggle_plugin) self.toggle_plugin_button.clicked.connect(self.toggle_plugin)
self.customize_plugin_button.clicked.connect(self.customize_plugin) self.customize_plugin_button.clicked.connect(self.customize_plugin)
self.remove_plugin_button.clicked.connect(self.remove_plugin) self.remove_plugin_button.clicked.connect(self.remove_plugin)
self.button_plugin_browse.clicked.connect(self.find_plugin)
self.button_plugin_add.clicked.connect(self.add_plugin) self.button_plugin_add.clicked.connect(self.add_plugin)
def toggle_plugin(self, *args): def toggle_plugin(self, *args):
@ -149,23 +159,39 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
self.modify_plugin(op='remove') self.modify_plugin(op='remove')
def add_plugin(self): def add_plugin(self):
path = unicode(self.plugin_path.text()) path = choose_files(self, 'add a plugin dialog', _('Add plugin'),
if path and os.access(path, os.R_OK) and path.lower().endswith('.zip'): filters=[(_('Plugins'), ['zip'])], all_files=False,
add_plugin(path) select_only_single_file=True)
if not path:
return
path = path[0]
if path and os.access(path, os.R_OK) and path.lower().endswith('.zip'):
if not question_dialog(self, _('Are you sure?'), '<p>' + \
_('Installing plugins is a <b>security risk</b>. '
'Plugins can contain a virus/malware. '
'Only install it if you got it from a trusted source.'
' Are you sure you want to proceed?'),
show_copy_button=False):
return
plugin = add_plugin(path)
self._plugin_model.populate() self._plugin_model.populate()
self._plugin_model.reset() self._plugin_model.reset()
self.changed_signal.emit() self.changed_signal.emit()
self.plugin_path.setText('') info_dialog(self, _('Success'),
_('Plugin <b>{0}</b> successfully installed under <b>'
' {1} plugins</b>. You may have to restart calibre '
'for the plugin to take effect.').format(plugin.name, plugin.type),
show=True, show_copy_button=False)
idx = self._plugin_model.plugin_to_index_by_properties(plugin)
if idx.isValid():
self.plugin_view.scrollTo(idx,
self.plugin_view.PositionAtCenter)
self.plugin_view.scrollTo(idx,
self.plugin_view.PositionAtCenter)
else: else:
error_dialog(self, _('No valid plugin path'), error_dialog(self, _('No valid plugin path'),
_('%s is not a valid plugin path')%path).exec_() _('%s is not a valid plugin path')%path).exec_()
def find_plugin(self):
path = choose_files(self, 'choose plugin dialog', _('Choose plugin'),
filters=[('Plugins', ['zip'])], all_files=False,
select_only_single_file=True)
if path:
self.plugin_path.setText(path[0])
def modify_plugin(self, op=''): def modify_plugin(self, op=''):
index = self.plugin_view.currentIndex() index = self.plugin_view.currentIndex()
@ -191,10 +217,13 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
if plugin.do_user_config(): if plugin.do_user_config():
self._plugin_model.refresh_plugin(plugin) self._plugin_model.refresh_plugin(plugin)
elif op == 'remove': elif op == 'remove':
msg = _('Plugin {0} successfully removed').format(plugin.name)
if remove_plugin(plugin): if remove_plugin(plugin):
self._plugin_model.populate() self._plugin_model.populate()
self._plugin_model.reset() self._plugin_model.reset()
self.changed_signal.emit() self.changed_signal.emit()
info_dialog(self, _('Success'), msg, show=True,
show_copy_button=False)
else: else:
error_dialog(self, _('Cannot remove builtin plugin'), error_dialog(self, _('Cannot remove builtin plugin'),
plugin.name + _(' cannot be removed. It is a ' plugin.name + _(' cannot be removed. It is a '

View File

@ -72,64 +72,14 @@
</layout> </layout>
</item> </item>
<item> <item>
<widget class="QGroupBox" name="groupBox_4"> <widget class="QPushButton" name="button_plugin_add">
<property name="title"> <property name="text">
<string>Add new plugin</string> <string>&amp;Add a new plugin</string>
</property>
<property name="icon">
<iconset resource="../../../../resources/images.qrc">
<normaloff>:/images/plugins.png</normaloff>:/images/plugins.png</iconset>
</property> </property>
<layout class="QVBoxLayout" name="verticalLayout_5">
<item>
<layout class="QHBoxLayout" name="horizontalLayout_5">
<item>
<widget class="QLabel" name="label_14">
<property name="text">
<string>Plugin &amp;file:</string>
</property>
<property name="buddy">
<cstring>plugin_path</cstring>
</property>
</widget>
</item>
<item>
<widget class="QLineEdit" name="plugin_path"/>
</item>
<item>
<widget class="QToolButton" name="button_plugin_browse">
<property name="text">
<string>...</string>
</property>
<property name="icon">
<iconset resource="../../../../resources/images.qrc">
<normaloff>:/images/document_open.png</normaloff>:/images/document_open.png</iconset>
</property>
</widget>
</item>
</layout>
</item>
<item>
<layout class="QHBoxLayout" name="horizontalLayout_4">
<item>
<spacer name="horizontalSpacer_2">
<property name="orientation">
<enum>Qt::Horizontal</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>40</width>
<height>20</height>
</size>
</property>
</spacer>
</item>
<item>
<widget class="QPushButton" name="button_plugin_add">
<property name="text">
<string>&amp;Add</string>
</property>
</widget>
</item>
</layout>
</item>
</layout>
</widget> </widget>
</item> </item>
</layout> </layout>

View File

@ -37,7 +37,10 @@ class BaseModel(QAbstractListModel):
dont_remove_from=set(['toolbar-device'])) dont_remove_from=set(['toolbar-device']))
if name is None: if name is None:
return FakeAction('--- '+_('Separator')+' ---', None) return FakeAction('--- '+_('Separator')+' ---', None)
return gui.iactions[name] try:
return gui.iactions[name]
except:
return None
def rowCount(self, parent): def rowCount(self, parent):
return len(self._data) return len(self._data)
@ -124,7 +127,8 @@ class CurrentModel(BaseModel):
BaseModel.__init__(self) BaseModel.__init__(self)
self.gprefs_name = 'action-layout-'+key self.gprefs_name = 'action-layout-'+key
current = gprefs[self.gprefs_name] current = gprefs[self.gprefs_name]
self._data = [self.name_to_action(x, gui) for x in current] self._data = [self.name_to_action(x, gui) for x in current]
self._data = [x for x in self._data if x is not None]
self.key = key self.key = key
self.gui = gui self.gui = gui

View File

@ -16,6 +16,7 @@ from calibre.gui2 import config
from calibre.gui2.dialogs.confirm_delete import confirm from calibre.gui2.dialogs.confirm_delete import confirm
from calibre.gui2.dialogs.saved_search_editor import SavedSearchEditor from calibre.gui2.dialogs.saved_search_editor import SavedSearchEditor
from calibre.gui2.dialogs.search import SearchDialog from calibre.gui2.dialogs.search import SearchDialog
from calibre.utils.config import dynamic
from calibre.utils.search_query_parser import saved_searches from calibre.utils.search_query_parser import saved_searches
from calibre.utils.icu import sort_key from calibre.utils.icu import sort_key
@ -375,6 +376,9 @@ class SearchBoxMixin(object): # {{{
unicode(self.search.toolTip()))) unicode(self.search.toolTip())))
self.advanced_search_button.setStatusTip(self.advanced_search_button.toolTip()) self.advanced_search_button.setStatusTip(self.advanced_search_button.toolTip())
self.clear_button.setStatusTip(self.clear_button.toolTip()) self.clear_button.setStatusTip(self.clear_button.toolTip())
self.search_highlight_only.stateChanged.connect(self.highlight_only_changed)
self.search_highlight_only.setChecked(
dynamic.get('search_highlight_only', False))
def focus_search_box(self, *args): def focus_search_box(self, *args):
self.search.setFocus(Qt.OtherFocusReason) self.search.setFocus(Qt.OtherFocusReason)
@ -401,6 +405,11 @@ class SearchBoxMixin(object): # {{{
def focus_to_library(self): def focus_to_library(self):
self.current_view().setFocus(Qt.OtherFocusReason) self.current_view().setFocus(Qt.OtherFocusReason)
def highlight_only_changed(self, toWhat):
dynamic.set('search_highlight_only', toWhat)
self.current_view().model().set_highlight_only(toWhat)
self.focus_to_library()
# }}} # }}}
class SavedSearchBoxMixin(object): # {{{ class SavedSearchBoxMixin(object): # {{{

View File

@ -150,7 +150,7 @@ class Delegate(QStyledItemDelegate):
custom = [] custom = []
if editor.custom.isChecked(): if editor.custom.isChecked():
for x in ('1', '2'): for x in ('1', '2'):
sc = getattr(editor, 'shortcut'+x) sc = getattr(editor, 'shortcut'+x, None)
if sc is not None: if sc is not None:
custom.append(sc) custom.append(sc)
@ -266,6 +266,11 @@ class ShortcutConfig(QWidget):
self.view.scrollTo(index) self.view.scrollTo(index)
@property
def is_editing(self):
return self.view.state() == self.view.EditingState
if __name__ == '__main__': if __name__ == '__main__':
from calibre.gui2 import is_ok_to_use_qt from calibre.gui2 import is_ok_to_use_qt
from calibre.gui2.viewer.keys import SHORTCUTS from calibre.gui2.viewer.keys import SHORTCUTS

View File

@ -19,7 +19,7 @@ from PyQt4.Qt import Qt, SIGNAL, QTimer, \
QMessageBox, QHelpEvent QMessageBox, QHelpEvent
from calibre import prints from calibre import prints
from calibre.constants import __appname__, isosx, DEBUG from calibre.constants import __appname__, isosx
from calibre.ptempfile import PersistentTemporaryFile from calibre.ptempfile import PersistentTemporaryFile
from calibre.utils.config import prefs, dynamic from calibre.utils.config import prefs, dynamic
from calibre.utils.ipc.server import Server from calibre.utils.ipc.server import Server
@ -103,7 +103,15 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{
self.gui_debug = gui_debug self.gui_debug = gui_debug
acmap = OrderedDict() acmap = OrderedDict()
for action in interface_actions(): for action in interface_actions():
ac = action.load_actual_plugin(self) try:
ac = action.load_actual_plugin(self)
except:
# Ignore errors in loading user supplied plugins
import traceback
traceback.print_exc()
if ac.plugin_path is None:
raise
ac.plugin_path = action.plugin_path ac.plugin_path = action.plugin_path
ac.interface_action_base_plugin = action ac.interface_action_base_plugin = action
if ac.name in acmap: if ac.name in acmap:
@ -460,12 +468,8 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{
try: try:
if 'calibre.ebooks.DRMError' in job.details: if 'calibre.ebooks.DRMError' in job.details:
if not minz: if not minz:
d = error_dialog(self, _('Conversion Error'), from calibre.gui2.dialogs.drm_error import DRMErrorMessage
_('<p>Could not convert: %s<p>It is a ' d = DRMErrorMessage(self, job.description.split(':')[-1])
'<a href="%s">DRM</a>ed book. You must first remove the '
'DRM using third party tools.')%\
(job.description.split(':')[-1],
'http://bugs.calibre-ebook.com/wiki/DRM'))
d.setModal(False) d.setModal(False)
d.show() d.show()
self._modeless_dialogs.append(d) self._modeless_dialogs.append(d)
@ -582,9 +586,6 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{
# Goes here, because if cf is valid, db is valid. # Goes here, because if cf is valid, db is valid.
db.prefs['field_metadata'] = db.field_metadata.all_metadata() db.prefs['field_metadata'] = db.field_metadata.all_metadata()
db.commit_dirty_cache() db.commit_dirty_cache()
if DEBUG and db.gm_count > 0:
print 'get_metadata cache: {0:d} calls, {1:4.2f}% misses'.format(
db.gm_count, (db.gm_missed*100.0)/db.gm_count)
for action in self.iactions.values(): for action in self.iactions.values():
if not action.shutting_down(): if not action.shutting_down():
return return

View File

@ -120,6 +120,13 @@ class ConfigDialog(QDialog, Ui_Dialog):
def accept(self, *args): def accept(self, *args):
if self.shortcut_config.is_editing:
from calibre.gui2 import info_dialog
info_dialog(self, _('Still editing'),
_('You are in the middle of editing a keyboard shortcut'
' first complete that, by clicking outside the '
' shortcut editing box.'), show=True)
return
c = config() c = config()
c.set('serif_family', unicode(self.serif_family.currentFont().family())) c.set('serif_family', unicode(self.serif_family.currentFont().family()))
c.set('sans_family', unicode(self.sans_family.currentFont().family())) c.set('sans_family', unicode(self.sans_family.currentFont().family()))
@ -279,7 +286,7 @@ class Document(QWebPage): # {{{
@pyqtSignature("") @pyqtSignature("")
def init_hyphenate(self): def init_hyphenate(self):
if self.hyphenate: if self.hyphenate and getattr(self, 'loaded_lang', ''):
self.javascript('do_hyphenation("%s")'%self.loaded_lang) self.javascript('do_hyphenation("%s")'%self.loaded_lang)
def after_load(self): def after_load(self):

View File

@ -26,6 +26,7 @@ from calibre.gui2.search_box import SearchBox2
from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata import MetaInformation
from calibre.customize.ui import available_input_formats from calibre.customize.ui import available_input_formats
from calibre.gui2.viewer.dictionary import Lookup from calibre.gui2.viewer.dictionary import Lookup
from calibre import as_unicode
class TOCItem(QStandardItem): class TOCItem(QStandardItem):
@ -626,13 +627,12 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
QApplication.processEvents() QApplication.processEvents()
if worker.exception is not None: if worker.exception is not None:
if isinstance(worker.exception, DRMError): if isinstance(worker.exception, DRMError):
error_dialog(self, _('DRM Error'), from calibre.gui2.dialogs.drm_error import DRMErrorMessage
_('<p>This book is protected by <a href="%s">DRM</a>') DRMErrorMessage(self).exec_()
%'http://wiki.mobileread.com/wiki/DRM').exec_()
else: else:
r = getattr(worker.exception, 'reason', worker.exception) r = getattr(worker.exception, 'reason', worker.exception)
error_dialog(self, _('Could not open ebook'), error_dialog(self, _('Could not open ebook'),
unicode(r), det_msg=worker.traceback, show=True) as_unicode(r), det_msg=worker.traceback, show=True)
self.close_progress_indicator() self.close_progress_indicator()
else: else:
self.metadata.show_opf(self.iterator.opf, os.path.splitext(pathtoebook)[1][1:]) self.metadata.show_opf(self.iterator.opf, os.path.splitext(pathtoebook)[1][1:])

View File

@ -411,7 +411,8 @@ class ResultCache(SearchQueryParser): # {{{
if isinstance(location, list): if isinstance(location, list):
if allow_recursion: if allow_recursion:
for loc in location: for loc in location:
matches |= self.get_matches(loc, query, allow_recursion=False) matches |= self.get_matches(loc, query, candidates,
allow_recursion=False)
return matches return matches
raise ParseException(query, len(query), 'Recursive query group detected', self) raise ParseException(query, len(query), 'Recursive query group detected', self)
@ -419,11 +420,11 @@ class ResultCache(SearchQueryParser): # {{{
fm = self.field_metadata[location] fm = self.field_metadata[location]
# take care of dates special case # take care of dates special case
if fm['datatype'] == 'datetime': if fm['datatype'] == 'datetime':
return self.get_dates_matches(location, query.lower()) return self.get_dates_matches(location, query.lower(), candidates)
# take care of numbers special case # take care of numbers special case
if fm['datatype'] in ('rating', 'int', 'float'): if fm['datatype'] in ('rating', 'int', 'float'):
return self.get_numeric_matches(location, query.lower()) return self.get_numeric_matches(location, query.lower(), candidates)
# take care of the 'count' operator for is_multiples # take care of the 'count' operator for is_multiples
if fm['is_multiple'] and \ if fm['is_multiple'] and \
@ -431,7 +432,8 @@ class ResultCache(SearchQueryParser): # {{{
query[1:1] in '=<>!': query[1:1] in '=<>!':
vf = lambda item, loc=fm['rec_index'], ms=fm['is_multiple']:\ vf = lambda item, loc=fm['rec_index'], ms=fm['is_multiple']:\
len(item[loc].split(ms)) if item[loc] is not None else 0 len(item[loc].split(ms)) if item[loc] is not None else 0
return self.get_numeric_matches(location, query[1:], val_func=vf) return self.get_numeric_matches(location, query[1:],
candidates, val_func=vf)
# everything else, or 'all' matches # everything else, or 'all' matches
matchkind = CONTAINS_MATCH matchkind = CONTAINS_MATCH
@ -598,7 +600,6 @@ class ResultCache(SearchQueryParser): # {{{
def set(self, row, col, val, row_is_id=False): def set(self, row, col, val, row_is_id=False):
id = row if row_is_id else self._map_filtered[row] id = row if row_is_id else self._map_filtered[row]
self._data[id][self.FIELD_MAP['all_metadata']] = None
self._data[id][col] = val self._data[id][col] = val
def get(self, row, col, row_is_id=False): def get(self, row, col, row_is_id=False):
@ -629,7 +630,6 @@ class ResultCache(SearchQueryParser): # {{{
self._data[id] = CacheRow(db, self.composites, self._data[id] = CacheRow(db, self.composites,
db.conn.get('SELECT * from meta2 WHERE id=?', (id,))[0]) db.conn.get('SELECT * from meta2 WHERE id=?', (id,))[0])
self._data[id].append(db.book_on_device_string(id)) self._data[id].append(db.book_on_device_string(id))
self._data[id].append(None)
except IndexError: except IndexError:
return None return None
try: try:
@ -646,7 +646,6 @@ class ResultCache(SearchQueryParser): # {{{
self._data[id] = CacheRow(db, self.composites, self._data[id] = CacheRow(db, self.composites,
db.conn.get('SELECT * from meta2 WHERE id=?', (id,))[0]) db.conn.get('SELECT * from meta2 WHERE id=?', (id,))[0])
self._data[id].append(db.book_on_device_string(id)) self._data[id].append(db.book_on_device_string(id))
self._data[id].append(None)
self._map[0:0] = ids self._map[0:0] = ids
self._map_filtered[0:0] = ids self._map_filtered[0:0] = ids
@ -671,7 +670,6 @@ class ResultCache(SearchQueryParser): # {{{
for item in self._data: for item in self._data:
if item is not None: if item is not None:
item.append(db.book_on_device_string(item[0])) item.append(db.book_on_device_string(item[0]))
item.append(None)
self._map = [i[0] for i in self._data if i is not None] self._map = [i[0] for i in self._data if i is not None]
if field is not None: if field is not None:
self.sort(field, ascending) self.sort(field, ascending)

View File

@ -1524,19 +1524,32 @@ class EPUB_MOBI(CatalogPlugin):
this_title['formats'] = formats this_title['formats'] = formats
# Add user notes to be displayed in header # Add user notes to be displayed in header
# Special case handling for datetime fields # Special case handling for datetime fields and lists
if self.opts.header_note_source_field: if self.opts.header_note_source_field:
field_md = self.__db.metadata_for_field(self.opts.header_note_source_field) field_md = self.__db.metadata_for_field(self.opts.header_note_source_field)
notes = self.__db.get_field(record['id'], notes = self.__db.get_field(record['id'],
self.opts.header_note_source_field, self.opts.header_note_source_field,
index_is_id=True) index_is_id=True)
if notes and field_md['datatype'] == 'datetime':
# Reformat date fields to match UI presentation: dd MMM YYYY
notes = format_date(notes,'dd MMM yyyy')
if notes: if notes:
if field_md['datatype'] == 'text':
if isinstance(notes,list):
notes = ' &middot; '.join(notes)
elif field_md['datatype'] == 'datetime':
notes = format_date(notes,'dd MMM yyyy')
elif field_md['datatype'] == 'composite':
m = re.match(r'\[(.+)\]$', notes)
if m is not None:
# Sniff for special pseudo-list string "[<item, item>]"
bracketed_content = m.group(1)
if ',' in bracketed_content:
# Recast the comma-separated items as a list
items = bracketed_content.split(',')
items = [i.strip() for i in items]
notes = ' &middot; '.join(items)
else:
notes = bracketed_content
this_title['notes'] = {'source':field_md['name'], this_title['notes'] = {'source':field_md['name'],
'content':notes} 'content':notes}
titles.append(this_title) titles.append(this_title)

View File

@ -298,10 +298,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
base, base,
prefer_custom=True) prefer_custom=True)
self.FIELD_MAP['ondevice'] = base+1 self.FIELD_MAP['ondevice'] = base = base+1
self.field_metadata.set_field_record_index('ondevice', base+1, prefer_custom=False) self.field_metadata.set_field_record_index('ondevice', base, prefer_custom=False)
self.FIELD_MAP['all_metadata'] = base+2
self.field_metadata.set_field_record_index('all_metadata', base+2, prefer_custom=False)
script = ''' script = '''
DROP VIEW IF EXISTS meta2; DROP VIEW IF EXISTS meta2;
@ -343,10 +341,6 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
self.has_id = self.data.has_id self.has_id = self.data.has_id
self.count = self.data.count self.count = self.data.count
# Count times get_metadata is called, and how many times in the cache
self.gm_count = 0
self.gm_missed = 0
for prop in ('author_sort', 'authors', 'comment', 'comments', 'isbn', for prop in ('author_sort', 'authors', 'comment', 'comments', 'isbn',
'publisher', 'rating', 'series', 'series_index', 'tags', 'publisher', 'rating', 'series', 'series_index', 'tags',
'title', 'timestamp', 'uuid', 'pubdate', 'ondevice'): 'title', 'timestamp', 'uuid', 'pubdate', 'ondevice'):
@ -690,19 +684,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
''' '''
row = self.data._data[idx] if index_is_id else self.data[idx] row = self.data._data[idx] if index_is_id else self.data[idx]
fm = self.FIELD_MAP fm = self.FIELD_MAP
self.gm_count += 1
mi = row[self.FIELD_MAP['all_metadata']]
if mi is not None:
if get_cover:
# Always get the cover, because the value can be wrong if the
# original mi was from the OPF
mi.cover = self.cover(idx, index_is_id=index_is_id, as_path=True)
return mi
self.gm_missed += 1
mi = Metadata(None) mi = Metadata(None)
self.data.set(idx, fm['all_metadata'], mi, row_is_id = index_is_id)
aut_list = row[fm['au_map']] aut_list = row[fm['au_map']]
aut_list = [p.split(':::') for p in aut_list.split(':#:')] aut_list = [p.split(':::') for p in aut_list.split(':#:')]
@ -724,6 +706,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
formats = row[fm['formats']] formats = row[fm['formats']]
if not formats: if not formats:
formats = None formats = None
else:
formats = formats.split(',')
mi.formats = formats mi.formats = formats
tags = row[fm['tags']] tags = row[fm['tags']]
if tags: if tags:
@ -1387,7 +1371,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
if r is not None: if r is not None:
if (now - r[self.FIELD_MAP['timestamp']]) > delta: if (now - r[self.FIELD_MAP['timestamp']]) > delta:
tags = r[self.FIELD_MAP['tags']] tags = r[self.FIELD_MAP['tags']]
if tags and tag in tags.lower(): if tags and tag in [x.strip() for x in
tags.lower().split(',')]:
yield r[self.FIELD_MAP['id']] yield r[self.FIELD_MAP['id']]
def get_next_series_num_for(self, series): def get_next_series_num_for(self, series):

View File

@ -162,15 +162,6 @@ class FieldMetadata(dict):
'search_terms':['tags', 'tag'], 'search_terms':['tags', 'tag'],
'is_custom':False, 'is_custom':False,
'is_category':True}), 'is_category':True}),
('all_metadata',{'table':None,
'column':None,
'datatype':None,
'is_multiple':None,
'kind':'field',
'name':None,
'search_terms':[],
'is_custom':False,
'is_category':False}),
('author_sort',{'table':None, ('author_sort',{'table':None,
'column':None, 'column':None,
'datatype':'text', 'datatype':'text',

View File

@ -110,6 +110,7 @@ class cmd_commit(_cmd_commit):
suffix = 'The fix will be in the next release.' suffix = 'The fix will be in the next release.'
action = action+'ed' action = action+'ed'
msg = '%s in branch %s. %s'%(action, nick, suffix) msg = '%s in branch %s. %s'%(action, nick, suffix)
msg = msg.replace('Fixesed', 'Fixed')
server = xmlrpclib.ServerProxy(url) server = xmlrpclib.ServerProxy(url)
server.ticket.update(int(bug), msg, server.ticket.update(int(bug), msg,
{'status':'closed', 'resolution':'fixed'}, {'status':'closed', 'resolution':'fixed'},

View File

@ -3,7 +3,7 @@ __license__ = 'GPL 3'
__copyright__ = '2010, sengian <sengian1@gmail.com>' __copyright__ = '2010, sengian <sengian1@gmail.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import re import re, htmlentitydefs
_ascii_pat = None _ascii_pat = None
@ -21,3 +21,32 @@ def clean_ascii_chars(txt, charlist=None):
pat = re.compile(u'|'.join(map(unichr, charlist))) pat = re.compile(u'|'.join(map(unichr, charlist)))
return pat.sub('', txt) return pat.sub('', txt)
##
# Fredrik Lundh: http://effbot.org/zone/re-sub.htm#unescape-html
# Removes HTML or XML character references and entities from a text string.
#
# @param text The HTML (or XML) source text.
# @return The plain text, as a Unicode string, if necessary.
def unescape(text, rm=False, rchar=u''):
def fixup(m, rm=rm, rchar=rchar):
text = m.group(0)
if text[:2] == "&#":
# character reference
try:
if text[:3] == "&#x":
return unichr(int(text[3:-1], 16))
else:
return unichr(int(text[2:-1]))
except ValueError:
pass
else:
# named entity
try:
text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
except KeyError:
pass
if rm:
return rchar #replace by char
return text # leave as is
return re.sub("&#?\w+;", fixup, text)

View File

@ -18,6 +18,24 @@ class _Parser(object):
LEX_NUM = 4 LEX_NUM = 4
LEX_EOF = 5 LEX_EOF = 5
def _python(self, func):
locals = {}
exec func in locals
if 'evaluate' not in locals:
self.error('no evaluate function in python')
try:
result = locals['evaluate'](self.parent.kwargs)
if isinstance(result, (float, int)):
result = unicode(result)
elif isinstance(result, list):
result = ','.join(result)
elif isinstance(result, str):
result = unicode(result)
return result
except Exception as e:
self.error('python function threw exception: ' + e.msg)
def _strcmp(self, x, y, lt, eq, gt): def _strcmp(self, x, y, lt, eq, gt):
v = strcmp(x, y) v = strcmp(x, y)
if v < 0: if v < 0:
@ -79,6 +97,7 @@ class _Parser(object):
'field' : (1, lambda s, x: s.parent.get_value(x, [], s.parent.kwargs)), 'field' : (1, lambda s, x: s.parent.get_value(x, [], s.parent.kwargs)),
'multiply' : (2, partial(_math, op='*')), 'multiply' : (2, partial(_math, op='*')),
'print' : (-1, _print), 'print' : (-1, _print),
'python' : (1, _python),
'strcat' : (-1, _concat), 'strcat' : (-1, _concat),
'strcmp' : (5, _strcmp), 'strcmp' : (5, _strcmp),
'substr' : (3, lambda s, x, y, z: x[int(y): len(x) if int(z) == 0 else int(z)]), 'substr' : (3, lambda s, x, y, z: x[int(y): len(x) if int(z) == 0 else int(z)]),
@ -362,7 +381,7 @@ class TemplateFormatter(string.Formatter):
(r'\'.*?((?<!\\)\')', lambda x,t: (3, t[1:-1])), (r'\'.*?((?<!\\)\')', lambda x,t: (3, t[1:-1])),
(r'\n#.*?(?=\n)', None), (r'\n#.*?(?=\n)', None),
(r'\s', None) (r'\s', None)
]) ], flags=re.DOTALL)
def _eval_program(self, val, prog): def _eval_program(self, val, prog):
# keep a cache of the lex'ed program under the theory that re-lexing # keep a cache of the lex'ed program under the theory that re-lexing

View File

@ -92,7 +92,10 @@ def identify_data(data):
or raises an Exception if data is not an image. or raises an Exception if data is not an image.
''' '''
img = Image() img = Image()
img.load(data) if hasattr(img, 'identify'):
img.identify(data)
else:
img.load(data)
width, height = img.size width, height = img.size
fmt = img.format fmt = img.format
return (width, height, fmt) return (width, height, fmt)

View File

@ -456,6 +456,26 @@ magick_Image_load(magick_Image *self, PyObject *args, PyObject *kwargs) {
// }}} // }}}
// Image.identify {{{
static PyObject *
magick_Image_identify(magick_Image *self, PyObject *args, PyObject *kwargs) {
const char *data;
Py_ssize_t dlen;
MagickBooleanType res;
NULL_CHECK(NULL)
if (!PyArg_ParseTuple(args, "s#", &data, &dlen)) return NULL;
res = MagickPingImageBlob(self->wand, data, dlen);
if (!res)
return magick_set_exception(self->wand);
Py_RETURN_NONE;
}
// }}}
// Image.open {{{ // Image.open {{{
static PyObject * static PyObject *
magick_Image_read(magick_Image *self, PyObject *args, PyObject *kwargs) { magick_Image_read(magick_Image *self, PyObject *args, PyObject *kwargs) {
@ -993,6 +1013,10 @@ static PyMethodDef magick_Image_methods[] = {
{"destroy", (PyCFunction)magick_Image_destroy, METH_VARARGS, {"destroy", (PyCFunction)magick_Image_destroy, METH_VARARGS,
"Destroy the underlying ImageMagick Wand. WARNING: After using this method, all methods on this object will raise an exception."}, "Destroy the underlying ImageMagick Wand. WARNING: After using this method, all methods on this object will raise an exception."},
{"identify", (PyCFunction)magick_Image_identify, METH_VARARGS,
"Identify an image from a byte buffer (string)"
},
{"load", (PyCFunction)magick_Image_load, METH_VARARGS, {"load", (PyCFunction)magick_Image_load, METH_VARARGS,
"Load an image from a byte buffer (string)" "Load an image from a byte buffer (string)"
}, },

View File

@ -5,5 +5,52 @@ __license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import glob
from calibre.constants import plugins, iswindows, filesystem_encoding
from calibre.ptempfile import TemporaryDirectory
from calibre import CurrentDir
from calibre.utils.magick import Image, PixelWand
class Unavailable(Exception):
pass
class NoRaster(Exception):
pass
def extract_raster_image(wmf_data):
try:
wmf, wmf_err = plugins['wmf']
except KeyError:
raise Unavailable('libwmf not available on this platform')
if wmf_err:
raise Unavailable(wmf_err)
if iswindows:
import sys, os
appdir = sys.app_dir
if isinstance(appdir, unicode):
appdir = appdir.encode(filesystem_encoding)
fdir = os.path.join(appdir, 'wmffonts')
wmf.set_font_dir(fdir)
data = ''
with TemporaryDirectory('wmf2png') as tdir:
with CurrentDir(tdir):
wmf.render(wmf_data)
images = list(sorted(glob.glob('*.png')))
if not images:
raise NoRaster('No raster images in WMF')
data = open(images[0], 'rb').read()
im = Image()
im.load(data)
pw = PixelWand()
pw.color = '#ffffff'
im.rotate(pw, 180)
return im.export('png')

View File

@ -4,6 +4,7 @@
#include <libwmf/api.h> #include <libwmf/api.h>
#include <libwmf/svg.h> #include <libwmf/svg.h>
//#include <libwmf/gd.h>
typedef struct { typedef struct {
char *data; char *data;
@ -13,7 +14,7 @@ typedef struct {
//This code is taken mostly from the Abiword wmf plugin //This code is taken mostly from the Abiword wmf plugin
// Buffer read {{{
// returns unsigned char cast to int, or EOF // returns unsigned char cast to int, or EOF
static int wmf_WMF_read(void * context) { static int wmf_WMF_read(void * context) {
char c; char c;
@ -22,11 +23,11 @@ static int wmf_WMF_read(void * context) {
if (info->pos == info->len) if (info->pos == info->len)
return EOF; return EOF;
c = info->data[pos]; c = info->data[info->pos];
info->pos++; info->pos++;
return (int)c; return (int)((unsigned char)c);
} }
// returns (-1) on error, else 0 // returns (-1) on error, else 0
@ -44,8 +45,17 @@ static long wmf_WMF_tell(void * context) {
return (long) info->pos; return (long) info->pos;
} }
// }}}
char _png_name_buf[100];
char *wmf_png_name(void *ctxt) {
int *num = (int*)ctxt;
*num = *num + 1;
snprintf(_png_name_buf, 90, "%04d.png", *num);
return _png_name_buf;
}
#define CLEANUP if(API) { if (stream) wmf_free(API, stream); wmf_api_destroy(API); }; #define CLEANUP if(API) { if (stream) wmf_free(API, stream); wmf_api_destroy(API); };
static PyObject * static PyObject *
@ -66,9 +76,9 @@ wmf_render(PyObject *self, PyObject *args) {
unsigned int max_width = 1600; unsigned int max_width = 1600;
unsigned int max_height = 1200; unsigned int max_height = 1200;
unsigned long max_flags = 0;
static const char* Default_Description = "wmf2svg"; static const char* Default_Description = "wmf2svg";
int fname_counter = 0;
wmf_error_t err; wmf_error_t err;
@ -125,6 +135,8 @@ wmf_render(PyObject *self, PyObject *args) {
ddata->Description = (char *)Default_Description; ddata->Description = (char *)Default_Description;
ddata->bbox = bbox; ddata->bbox = bbox;
ddata->image.context = (void *)&fname_counter;
ddata->image.name = wmf_png_name;
wmf_display_size(API, &disp_width, &disp_height, 96, 96); wmf_display_size(API, &disp_width, &disp_height, 96, 96);
@ -156,9 +168,9 @@ wmf_render(PyObject *self, PyObject *args) {
ddata->height = (unsigned int) ceil ((double) wmf_height); ddata->height = (unsigned int) ceil ((double) wmf_height);
} }
ddata->flags |= WMF_SVG_INLINE_IMAGES; // Needs GD
//ddata->flags |= WMF_SVG_INLINE_IMAGES;
ddata->flags |= WMF_GD_OUTPUT_MEMORY | WMF_GD_OWN_BUFFER; //ddata->flags |= WMF_GD_OUTPUT_MEMORY | WMF_GD_OWN_BUFFER;
err = wmf_play(API, 0, &(bbox)); err = wmf_play(API, 0, &(bbox));
@ -178,11 +190,32 @@ wmf_render(PyObject *self, PyObject *args) {
return ans; return ans;
} }
#ifdef _WIN32
void set_libwmf_fontdir(const char *);
static PyObject *
wmf_setfontdir(PyObject *self, PyObject *args) {
char *path;
if (!PyArg_ParseTuple(args, "s", &path))
return NULL;
set_libwmf_fontdir(path);
Py_RETURN_NONE;
}
#endif
static PyMethodDef wmf_methods[] = { static PyMethodDef wmf_methods[] = {
{"render", wmf_render, METH_VARARGS, {"render", wmf_render, METH_VARARGS,
"render(path) -> Render wmf as svg." "render(data) -> Render wmf as svg."
}, },
#ifdef _WIN32
{"set_font_dir", wmf_setfontdir, METH_VARARGS,
"set_font_dir(path) -> Set the path to the fonts dir on windows, must be called at least once before using render()"
},
#endif
{NULL} /* Sentinel */ {NULL} /* Sentinel */
}; };

View File

@ -982,9 +982,12 @@ class ZipFile:
zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH]) zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
if fname != zinfo.orig_filename: if fname != zinfo.orig_filename:
raise BadZipfile, \ print ('WARNING: Header (%r) and directory (%r) filenames do not'
'File name in directory "%s" and header "%s" differ.' % ( ' match inside ZipFile')%(fname, zinfo.orig_filename)
zinfo.orig_filename, fname) print 'Using directory filename %r'%zinfo.orig_filename
#raise BadZipfile, \
# 'File name in directory "%r" and header "%r" differ.' % (
# zinfo.orig_filename, fname)
# check for encrypted flag & handle password # check for encrypted flag & handle password
is_encrypted = zinfo.flag_bits & 0x1 is_encrypted = zinfo.flag_bits & 0x1

View File

@ -108,7 +108,6 @@ def download_builtin_recipe(urn):
br = browser() br = browser()
return br.open_novisit('http://status.calibre-ebook.com/recipe/'+urn).read() return br.open_novisit('http://status.calibre-ebook.com/recipe/'+urn).read()
def get_builtin_recipe_by_title(title, log=None, download_recipe=False): def get_builtin_recipe_by_title(title, log=None, download_recipe=False):
for x in get_builtin_recipe_collection(): for x in get_builtin_recipe_collection():
if x.get('title') == title: if x.get('title') == title:
@ -127,6 +126,24 @@ def get_builtin_recipe_by_title(title, log=None, download_recipe=False):
'Failed to download recipe, using builtin version') 'Failed to download recipe, using builtin version')
return P('recipes/%s.recipe'%urn, data=True) return P('recipes/%s.recipe'%urn, data=True)
def get_builtin_recipe_by_id(id_, log=None, download_recipe=False):
for x in get_builtin_recipe_collection():
if x.get('id') == id_:
urn = x.get('id')[8:]
if download_recipe:
try:
if log is not None:
log('Trying to get latest version of recipe:', urn)
return download_builtin_recipe(urn)
except:
if log is None:
import traceback
traceback.print_exc()
else:
log.exception(
'Failed to download recipe, using builtin version')
return P('recipes/%s.recipe'%urn, data=True)
class SchedulerConfig(object): class SchedulerConfig(object):
def __init__(self): def __init__(self):