Merge from trunk

This commit is contained in:
Charles Haley 2011-02-01 15:18:04 +00:00
commit fdb842b036
20 changed files with 606 additions and 158 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 358 B

View File

@ -1,25 +1,25 @@
# -*- coding: utf-8
__license__ = 'GPL v3' __license__ = 'GPL v3'
__author__ = 'Luis Hernandez' __author__ = 'Luis Hernandez'
__copyright__ = 'Luis Hernandez<tolyluis@gmail.com>' __copyright__ = 'Luis Hernandez<tolyluis@gmail.com>'
description = 'Periódico gratuito en español - v0.8 - 27 Jan 2011' __version__ = 'v0.85'
__date__ = '31 January 2011'
''' '''
www.20minutos.es www.20minutos.es
''' '''
import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1294946868(BasicNewsRecipe): class AdvancedUserRecipe1294946868(BasicNewsRecipe):
title = u'20 Minutos' title = u'20 Minutos new'
publisher = u'Grupo 20 Minutos' publisher = u'Grupo 20 Minutos'
__author__ = 'Luis Hernández' __author__ = 'Luis Hernandez'
description = 'Periódico gratuito en español' description = 'Free spanish newspaper'
cover_url = 'http://estaticos.20minutos.es/mmedia/especiales/corporativo/css/img/logotipos_grupo20minutos.gif' cover_url = 'http://estaticos.20minutos.es/mmedia/especiales/corporativo/css/img/logotipos_grupo20minutos.gif'
oldest_article = 5 oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 100
remove_javascript = True remove_javascript = True
@ -29,6 +29,7 @@ class AdvancedUserRecipe1294946868(BasicNewsRecipe):
encoding = 'ISO-8859-1' encoding = 'ISO-8859-1'
language = 'es' language = 'es'
timefmt = '[%a, %d %b, %Y]' timefmt = '[%a, %d %b, %Y]'
remove_empty_feeds = True
keep_only_tags = [ keep_only_tags = [
dict(name='div', attrs={'id':['content','vinetas',]}) dict(name='div', attrs={'id':['content','vinetas',]})
@ -43,13 +44,21 @@ class AdvancedUserRecipe1294946868(BasicNewsRecipe):
remove_tags = [ remove_tags = [
dict(name='ol', attrs={'class':['navigation',]}) dict(name='ol', attrs={'class':['navigation',]})
,dict(name='span', attrs={'class':['action']}) ,dict(name='span', attrs={'class':['action']})
,dict(name='div', attrs={'class':['twitter comments-list hidden','related-news','col','photo-gallery','calendario','article-comment','postto estirar','otras_vinetas estirar','kment','user-actions']}) ,dict(name='div', attrs={'class':['twitter comments-list hidden','related-news','col','photo-gallery','photo-gallery side-art-block','calendario','article-comment','postto estirar','otras_vinetas estirar','kment','user-actions']})
,dict(name='div', attrs={'id':['twitter-destacados','eco-tabs','inner','vineta_calendario','vinetistas clearfix','otras_vinetas estirar','MIN1','main','SUP1','INT']}) ,dict(name='div', attrs={'id':['twitter-destacados','eco-tabs','inner','vineta_calendario','vinetistas clearfix','otras_vinetas estirar','MIN1','main','SUP1','INT']})
,dict(name='ul', attrs={'class':['article-user-actions','stripped-list']}) ,dict(name='ul', attrs={'class':['article-user-actions','stripped-list']})
,dict(name='ul', attrs={'id':['site-links']}) ,dict(name='ul', attrs={'id':['site-links']})
,dict(name='li', attrs={'class':['puntuacion','enviar','compartir']}) ,dict(name='li', attrs={'class':['puntuacion','enviar','compartir']})
] ]
extra_css = """
p{text-align: justify; font-size: 100%}
body{ text-align: left; font-size:100% }
h3{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
"""
preprocess_regexps = [(re.compile(r'<a href="http://estaticos.*?[0-999]px;" target="_blank">', re.DOTALL), lambda m: '')]
feeds = [ feeds = [
(u'Portada' , u'http://www.20minutos.es/rss/') (u'Portada' , u'http://www.20minutos.es/rss/')
,(u'Nacional' , u'http://www.20minutos.es/rss/nacional/') ,(u'Nacional' , u'http://www.20minutos.es/rss/nacional/')
@ -65,6 +74,6 @@ class AdvancedUserRecipe1294946868(BasicNewsRecipe):
,(u'Empleo' , u'http://www.20minutos.es/rss/empleo/') ,(u'Empleo' , u'http://www.20minutos.es/rss/empleo/')
,(u'Cine' , u'http://www.20minutos.es/rss/cine/') ,(u'Cine' , u'http://www.20minutos.es/rss/cine/')
,(u'Musica' , u'http://www.20minutos.es/rss/musica/') ,(u'Musica' , u'http://www.20minutos.es/rss/musica/')
,(u'Vinetas' , u'http://www.20minutos.es/rss/vinetas/') ,(u'Vinetas' , u'http://www.20minutos.es/rss/vinetas/')
,(u'Comunidad20' , u'http://www.20minutos.es/rss/zona20/') ,(u'Comunidad20' , u'http://www.20minutos.es/rss/zona20/')
] ]

View File

@ -0,0 +1,71 @@
__license__ = 'GPL v3'
__author__ = 'Luis Hernandez'
__copyright__ = 'Luis Hernandez<tolyluis@gmail.com>'
__version__ = 'v1.2'
__date__ = '31 January 2011'
'''
http://www.cincodias.com/
'''
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1294946868(BasicNewsRecipe):
title = u'Cinco Dias'
publisher = u'Grupo Prisa'
__author__ = 'Luis Hernandez'
description = 'spanish web about money and bussiness, free edition'
cover_url = 'http://www.prisa.com/images/logos/logo_cinco_dias.gif'
oldest_article = 2
max_articles_per_feed = 100
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
language = 'es'
remove_empty_feeds = True
encoding = 'ISO-8859-1'
timefmt = '[%a, %d %b, %Y]'
keep_only_tags = [
dict(name='div', attrs={'class':['cab_articulo cab_noticia','pos_3','txt_noticia','mod_despiece']})
,dict(name='p', attrs={'class':['cintillo']})
]
remove_tags_before = dict(name='div' , attrs={'class':['publi_h']})
remove_tags_after = dict(name='div' , attrs={'class':['tab_util util_estadisticas']})
remove_tags = [
dict(name='div', attrs={'class':['util-1','util-2','util-3','inner estirar','inner1','inner2','inner3','cont','tab_util util_estadisticas','tab_util util_enviar','mod_list_inf','mod_similares','mod_divisas','mod_sectores','mod_termometro','mod post','mod_img','mod_txt','nivel estirar','barra estirar','info_brujula btnBrujula','utilidad_brujula estirar']})
,dict(name='li', attrs={'class':['lnk-fcbook','lnk-retweet','lnk-meneame','desplegable','comentarios','list-options','estirar']})
,dict(name='ul', attrs={'class':['lista-izquierda','list-options','estirar']})
,dict(name='p', attrs={'class':['autor']})
]
extra_css = """
p{text-align: justify; font-size: 100%}
body{ text-align: left; font-size:100% }
h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
h3{font-family: sans-serif; font-size:100%; font-style: italic; text-align: justify; }
"""
feeds = [
(u'Ultima Hora' , u'http://www.cincodias.com/rss/feed.html?feedId=17029')
,(u'Empresas' , u'http://www.cincodias.com/rss/feed.html?feedId=19')
,(u'Mercados' , u'http://www.cincodias.com/rss/feed.html?feedId=20')
,(u'Economia' , u'http://www.cincodias.com/rss/feed.html?feedId=21')
,(u'Tecnorama' , u'http://www.cincodias.com/rss/feed.html?feedId=17230')
,(u'Tecnologia' , u'http://www.cincodias.com/rss/feed.html?feedId=17106')
,(u'Finanzas Personales' , u'http://www.cincodias.com/rss/feed.html?feedId=22')
,(u'Fiscalidad' , u'http://www.cincodias.com/rss/feed.html?feedId=17107')
,(u'Vivienda' , u'http://www.cincodias.com/rss/feed.html?feedId=17108')
,(u'Tendencias' , u'http://www.cincodias.com/rss/feed.html?feedId=17109')
,(u'Empleo' , u'http://www.cincodias.com/rss/feed.html?feedId=17110')
,(u'IBEX 35' , u'http://www.cincodias.com/rss/feed.html?feedId=17125')
,(u'Sectores' , u'http://www.cincodias.com/rss/feed.html?feedId=17126')
,(u'Opinion' , u'http://www.cincodias.com/rss/feed.html?feedId=17105')
]

View File

@ -1,73 +1,92 @@
#!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
''' '''
latimes.com www.latimes.com
''' '''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class LATimes(BasicNewsRecipe): class LATimes(BasicNewsRecipe):
title = u'The Los Angeles Times' title = 'Los Angeles Times'
__author__ = u'Darko Miletic and Sujata Raman' __author__ = 'Darko Miletic'
description = u'News from Los Angeles' description = 'The Los Angeles Times is a leading source of news on Southern California, entertainment, movies, television, music, politics, business, health, technology, travel, sports, environment, economics, autos, jobs, real estate and other topics affecting California'
oldest_article = 7 publisher = 'Tribune Company'
max_articles_per_feed = 100 category = 'news, politics, USA, Los Angeles, world'
language = 'en' oldest_article = 2
max_articles_per_feed = 200
no_stylesheets = True no_stylesheets = True
encoding = 'utf8'
use_embedded_content = False use_embedded_content = False
encoding = 'utf-8' language = 'en'
lang = 'en-US' remove_empty_feeds = True
publication_type = 'newspaper'
masthead_url = 'http://www.latimes.com/images/logo.png'
cover_url = 'http://www.latimes.com/includes/sectionfronts/A1.pdf'
extra_css = """
body{font-family: Georgia,"Times New Roman",Times,serif }
img{margin-bottom: 0.4em; margin-top: 0.8em; display:block}
h2{font-size: 1.1em}
.deckhead{font-size: small; text-transform: uppercase}
.small{color: gray; font-size: small}
.date,.time,.copyright{font-size: x-small; color:gray; font-style:italic;}
"""
conversion_options = { conversion_options = {
'comment' : description 'comment' : description
, 'language' : lang , 'tags' : category
} , 'publisher' : publisher
, 'language' : language
, 'linearize_tables' : 'Yes'
}
extra_css = ''' keep_only_tags = [
h1{font-family :Georgia,"Times New Roman",Times,serif; font-size:large; } dict(name='div', attrs={'class':'story'})
h2{font-family :Georgia,"Times New Roman",Times,serif; font-size:x-small;} ,dict(attrs={'class':['entry-header','time','entry-content']})
.story{font-family :Georgia,"Times New Roman",Times,serif; font-size: x-small;} ]
.entry-body{font-family :Georgia,"Times New Roman",Times,serif; font-size: x-small;} remove_tags_after=dict(name='p', attrs={'class':'copyright'})
.entry-more{font-family :Georgia,"Times New Roman",Times,serif; font-size: x-small;} remove_tags = [
.credit{color:#666666; font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;} dict(name=['meta','link','iframe','object','embed'])
.small{color:#666666; font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;} ,dict(attrs={'class':['toolSet','articlerail','googleAd','entry-footer-left','entry-footer-right','entry-footer-social','google-ad-story-bottom','sphereTools']})
.byline{font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;} ,dict(attrs={'id':['article-promo','googleads','moduleArticleToolsContainer','gallery-subcontent']})
.date{font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;color:#930000; font-style:italic;} ]
.time{font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;color:#930000; font-style:italic;} remove_attributes=['lang','xmlns:fb','xmlns:og','border','xtags','i','article_body']
.copyright{font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;color:#930000; }
.subhead{font-family :Georgia,"Times New Roman",Times,serif; font-size:x-small;}
'''
# recursions = 1
# match_regexps = [r'http://www.latimes.com/.*page=[2-9]']
keep_only_tags = [dict(name='div', attrs={'class':["story" ,"entry"] })]
remove_tags = [ dict(name='div', attrs={'class':['articlerail',"sphereTools","tools","toppaginate","entry-footer-left","entry-footer-right"]}), feeds = [
dict(name='div', attrs={'id':["moduleArticleToolsContainer",]}), (u'Top News' , u'http://feeds.latimes.com/latimes/news' )
dict(name='p', attrs={'class':["entry-footer",]}), ,(u'Local News' , u'http://feeds.latimes.com/latimes/news/local' )
dict(name='ul', attrs={'class':"article-nav clearfix"}), ,(u'National' , u'http://feeds.latimes.com/latimes/news/nationworld/nation' )
dict(name=['iframe']) ,(u'National Politics' , u'http://feeds.latimes.com/latimes/news/politics/' )
] ,(u'Business' , u'http://feeds.latimes.com/latimes/business' )
,(u'Education' , u'http://feeds.latimes.com/latimes/news/education' )
,(u'Environment' , u'http://feeds.latimes.com/latimes/news/science/environment' )
feeds = [(u'News', u'http://feeds.latimes.com/latimes/news') ,(u'Religion' , u'http://feeds.latimes.com/latimes/features/religion' )
,(u'Local','http://feeds.latimes.com/latimes/news/local') ,(u'Science' , u'http://feeds.latimes.com/latimes/news/science' )
,(u'MostEmailed','http://feeds.latimes.com/MostEmailed') ,(u'Technology' , u'http://feeds.latimes.com/latimes/technology' )
,(u'Politics','http://feeds.latimes.com/latimes/news/local/politics/cal/') ,(u'Africa' , u'http://feeds.latimes.com/latimes/africa' )
,('OrangeCounty','http://feeds.latimes.com/latimes/news/local/orange/') ,(u'Asia' , u'http://feeds.latimes.com/latimes/asia' )
,('National','http://feeds.latimes.com/latimes/news/nationworld/nation') ,(u'Europe' , u'http://feeds.latimes.com/latimes/europe' )
,('Politics','http://feeds.latimes.com/latimes/news/politics/') ,(u'Latin America' , u'http://feeds.latimes.com/latimes/latinamerica' )
,('Business','http://feeds.latimes.com/latimes/business') ,(u'Middle East' , u'http://feeds.latimes.com/latimes/middleeast' )
,('Sports','http://feeds.latimes.com/latimes/sports/') ,(u'Arts&Culture' , u'http://feeds.feedburner.com/latimes/entertainment/news/arts' )
,('Entertainment','http://feeds.latimes.com/latimes/entertainment/') ,(u'Entertainment News' , u'http://feeds.feedburner.com/latimes/entertainment/news/' )
] ,(u'Movie News' , u'http://feeds.feedburner.com/latimes/entertainment/news/movies/' )
,(u'Movie Reviews' , u'http://feeds.feedburner.com/movies/reviews/' )
,(u'Music News' , u'http://feeds.feedburner.com/latimes/entertainment/news/music/' )
,(u'Pop Album Reviews' , u'http://feeds.feedburner.com/latimes/pop-album-reviews' )
,(u'Restaurant Reviews' , u'http://feeds.feedburner.com/latimes/restaurant/reviews' )
,(u'Theatar and Dance' , u'http://feeds.feedburner.com/latimes/theaterdance' )
,(u'Autos' , u'http://feeds.latimes.com/latimes/classified/automotive/highway1/')
,(u'Books' , u'http://feeds.latimes.com/features/books' )
,(u'Food' , u'http://feeds.latimes.com/latimes/features/food/' )
,(u'Health' , u'http://feeds.latimes.com/latimes/features/health/' )
,(u'Real Estate' , u'http://feeds.latimes.com/latimes/classified/realestate/' )
,(u'Commentary' , u'http://feeds2.feedburner.com/latimes/news/opinion/commentary/' )
,(u'Sports' , u'http://feeds.latimes.com/latimes/sports/' )
]
def get_article_url(self, article): def get_article_url(self, article):
ans = article.get('feedburner_origlink').rpartition('?')[0] ans = BasicNewsRecipe.get_article_url(self, article).rpartition('?')[0]
try: try:
self.log('Looking for full story link in', ans) self.log('Looking for full story link in', ans)
@ -83,4 +102,22 @@ class LATimes(BasicNewsRecipe):
pass pass
return ans return ans
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('img'):
if not item.has_key('alt'):
item['alt'] = 'image'
for item in soup.findAll('a'):
limg = item.find('img')
if item.string is not None:
str = item.string
item.replaceWith(str)
else:
if limg:
item.name ='div'
item.attrs =[]
else:
str = self.tag_to_string(item)
item.replaceWith(str)
return soup

View File

@ -139,6 +139,13 @@ class CHMReader(CHMFile):
if self.hhc_path not in files and files: if self.hhc_path not in files and files:
self.hhc_path = files[0] self.hhc_path = files[0]
if self.hhc_path == '.hhc' and self.hhc_path not in files:
from calibre import walk
for x in walk(output_dir):
if os.path.basename(x).lower() in ('index.htm', 'index.html'):
self.hhc_path = os.path.relpath(x, output_dir)
break
def _reformat(self, data, htmlpath): def _reformat(self, data, htmlpath):
try: try:
data = xml_to_unicode(data, strip_encoding_pats=True)[0] data = xml_to_unicode(data, strip_encoding_pats=True)[0]

View File

@ -175,6 +175,19 @@ class EPUBInput(InputFormatPlugin):
raise ValueError( raise ValueError(
'EPUB files with DTBook markup are not supported') 'EPUB files with DTBook markup are not supported')
for x in list(opf.iterspine()):
ref = x.get('idref', None)
if ref is None:
x.getparent().remove(x)
continue
for y in opf.itermanifest():
if y.get('id', None) == ref and y.get('media-type', None) in \
('application/vnd.adobe-page-template+xml',):
p = x.getparent()
if p is not None:
p.remove(x)
break
with open('content.opf', 'wb') as nopf: with open('content.opf', 'wb') as nopf:
nopf.write(opf.render()) nopf.write(opf.render())

View File

@ -0,0 +1,61 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
__license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import re
from calibre.customize import Plugin
class Source(Plugin):
type = _('Metadata source')
author = 'Kovid Goyal'
supported_platforms = ['windows', 'osx', 'linux']
result_of_identify_is_complete = True
def get_author_tokens(self, authors):
'Take a list of authors and return a list of tokens useful for a '
'AND search query'
# Leave ' in there for Irish names
pat = re.compile(r'[-,:;+!@#$%^&*(){}.`~"\s\[\]/]')
for au in authors:
for tok in au.split():
yield pat.sub('', tok)
def split_jobs(self, jobs, num):
'Split a list of jobs into at most num groups, as evenly as possible'
groups = [[] for i in range(num)]
jobs = list(jobs)
while jobs:
for gr in groups:
try:
job = jobs.pop()
except IndexError:
break
gr.append(job)
return [g for g in groups if g]
def identify(self, log, result_queue, abort, title=None, authors=None, identifiers={}):
'''
Identify a book by its title/author/isbn/etc.
:param log: A log object, use it to output debugging information/errors
:param result_queue: A result Queue, results should be put into it.
Each result is a Metadata object
:param abort: If abort.is_set() returns True, abort further processing
and return as soon as possible
:param title: The title of the book, can be None
:param authors: A list of authors of the book, can be None
:param identifiers: A dictionary of other identifiers, most commonly
{'isbn':'1234...'}
:return: None if no errors occurred, otherwise a unicode representation
of the error suitable for showing to the user
'''
return None

View File

@ -0,0 +1,215 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
__license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import time
from urllib import urlencode
from functools import partial
from threading import Thread
from lxml import etree
from calibre.ebooks.metadata.sources import Source
from calibre.ebooks.metadata.book.base import Metadata
from calibre.utils.date import parse_date, utcnow
from calibre import browser, as_unicode
NAMESPACES = {
'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
'atom' : 'http://www.w3.org/2005/Atom',
'dc': 'http://purl.org/dc/terms'
}
XPath = partial(etree.XPath, namespaces=NAMESPACES)
total_results = XPath('//openSearch:totalResults')
start_index = XPath('//openSearch:startIndex')
items_per_page = XPath('//openSearch:itemsPerPage')
entry = XPath('//atom:entry')
entry_id = XPath('descendant::atom:id')
creator = XPath('descendant::dc:creator')
identifier = XPath('descendant::dc:identifier')
title = XPath('descendant::dc:title')
date = XPath('descendant::dc:date')
publisher = XPath('descendant::dc:publisher')
subject = XPath('descendant::dc:subject')
description = XPath('descendant::dc:description')
language = XPath('descendant::dc:language')
def to_metadata(browser, log, entry_):
def get_text(extra, x):
try:
ans = x(extra)
if ans:
ans = ans[0].text
if ans and ans.strip():
return ans.strip()
except:
log.exception('Programming error:')
return None
id_url = entry_id(entry_)[0].text
title_ = ': '.join([x.text for x in title(entry_)]).strip()
authors = [x.text.strip() for x in creator(entry_) if x.text]
if not authors:
authors = [_('Unknown')]
if not id_url or not title:
# Silently discard this entry
return None
mi = Metadata(title_, authors)
try:
raw = browser.open(id_url).read()
feed = etree.fromstring(raw)
extra = entry(feed)[0]
except:
log.exception('Failed to get additional details for', mi.title)
return mi
mi.comments = get_text(extra, description)
#mi.language = get_text(extra, language)
mi.publisher = get_text(extra, publisher)
# Author sort
for x in creator(extra):
for key, val in x.attrib.items():
if key.endswith('file-as') and val and val.strip():
mi.author_sort = val
break
# ISBN
isbns = []
for x in identifier(extra):
t = str(x.text).strip()
if t[:5].upper() in ('ISBN:', 'LCCN:', 'OCLC:'):
if t[:5].upper() == 'ISBN:':
isbns.append(t[5:])
if isbns:
mi.isbn = sorted(isbns, key=len)[-1]
# Tags
try:
btags = [x.text for x in subject(extra) if x.text]
tags = []
for t in btags:
tags.extend([y.strip() for y in t.split('/')])
tags = list(sorted(list(set(tags))))
except:
log.exception('Failed to parse tags:')
tags = []
if tags:
mi.tags = [x.replace(',', ';') for x in tags]
# pubdate
pubdate = get_text(extra, date)
if pubdate:
try:
default = utcnow().replace(day=15)
mi.pubdate = parse_date(pubdate, assume_utc=True, default=default)
except:
log.exception('Failed to parse pubdate')
return mi
class Worker(Thread):
def __init__(self, log, entries, abort, result_queue):
self.browser, self.log, self.entries = browser(), log, entries
self.abort, self.result_queue = abort, result_queue
Thread.__init__(self)
self.daemon = True
def run(self):
for i in self.entries:
try:
ans = to_metadata(self.browser, self.log, i)
if ans is not None:
self.result_queue.put(ans)
except:
self.log.exception(
'Failed to get metadata for identify entry:',
etree.tostring(i))
if self.abort.is_set():
break
class GoogleBooks(Source):
name = 'Google Books'
def create_query(self, log, title=None, authors=None, identifiers={},
start_index=1):
BASE_URL = 'http://books.google.com/books/feeds/volumes?'
isbn = identifiers.get('isbn', None)
q = ''
if isbn is not None:
q += 'isbn:'+isbn
elif title or authors:
def build_term(prefix, parts):
return ' '.join('in'+prefix + ':' + x for x in parts)
if title is not None:
q += build_term('title', title.split())
if authors:
q += ('+' if q else '')+build_term('author',
self.get_author_tokens(authors))
if isinstance(q, unicode):
q = q.encode('utf-8')
if not q:
return None
return BASE_URL+urlencode({
'q':q,
'max-results':20,
'start-index':start_index,
'min-viewability':'none',
})
def identify(self, log, result_queue, abort, title=None, authors=None, identifiers={}):
query = self.create_query(log, title=title, authors=authors,
identifiers=identifiers)
try:
raw = browser().open_novisit(query).read()
except Exception, e:
log.exception('Failed to make identify query: %r'%query)
return as_unicode(e)
try:
parser = etree.XMLParser(recover=True, no_network=True)
feed = etree.fromstring(raw, parser=parser)
entries = entry(feed)
except Exception, e:
log.exception('Failed to parse identify results')
return as_unicode(e)
groups = self.split_jobs(entries, 5) # At most 5 threads
if not groups:
return
workers = [Worker(log, entries, abort, result_queue) for entries in
groups]
if abort.is_set():
return
for worker in workers: worker.start()
has_alive_worker = True
while has_alive_worker and not abort.is_set():
has_alive_worker = False
for worker in workers:
if worker.is_alive():
has_alive_worker = True
time.sleep(0.1)
return None

View File

@ -83,6 +83,7 @@ class RTFInput(InputFormatPlugin):
os.mkdir(debug_dir) os.mkdir(debug_dir)
debug_dir = 'rtfdebug' debug_dir = 'rtfdebug'
run_lev = 4 run_lev = 4
self.log('Running RTFParser in debug mode')
except: except:
pass pass
parser = ParseRtf( parser = ParseRtf(
@ -230,22 +231,6 @@ class RTFInput(InputFormatPlugin):
with open('styles.css', 'ab') as f: with open('styles.css', 'ab') as f:
f.write(css) f.write(css)
# def preprocess(self, fname):
# self.log('\tPreprocessing to convert unicode characters')
# try:
# data = open(fname, 'rb').read()
# from calibre.ebooks.rtf.preprocess import RtfTokenizer, RtfTokenParser
# tokenizer = RtfTokenizer(data)
# tokens = RtfTokenParser(tokenizer.tokens)
# data = tokens.toRTF()
# fname = 'preprocessed.rtf'
# with open(fname, 'wb') as f:
# f.write(data)
# except:
# self.log.exception(
# 'Failed to preprocess RTF to convert unicode sequences, ignoring...')
# return fname
def convert_borders(self, doc): def convert_borders(self, doc):
border_styles = [] border_styles = []
style_map = {} style_map = {}
@ -280,8 +265,6 @@ class RTFInput(InputFormatPlugin):
self.opts = options self.opts = options
self.log = log self.log = log
self.log('Converting RTF to XML...') self.log('Converting RTF to XML...')
#Name of the preprocesssed RTF file
# fname = self.preprocess(stream.name)
try: try:
xml = self.generate_xml(stream.name) xml = self.generate_xml(stream.name)
except RtfInvalidCodeException, e: except RtfInvalidCodeException, e:
@ -335,3 +318,4 @@ class RTFInput(InputFormatPlugin):
opf.render(open('metadata.opf', 'wb')) opf.render(open('metadata.opf', 'wb'))
return os.path.abspath('metadata.opf') return os.path.abspath('metadata.opf')

View File

@ -238,6 +238,8 @@ class ParseRtf:
bug_handler = RtfInvalidCodeException, bug_handler = RtfInvalidCodeException,
) )
enc = 'cp' + encode_obj.get_codepage() enc = 'cp' + encode_obj.get_codepage()
if enc == 'cp10000':
enc = 'mac_roman'
msg = 'Exception in token processing' msg = 'Exception in token processing'
if check_encoding_obj.check_encoding(self.__file, enc): if check_encoding_obj.check_encoding(self.__file, enc):
file_name = self.__file if isinstance(self.__file, str) \ file_name = self.__file if isinstance(self.__file, str) \

View File

@ -15,8 +15,10 @@
# # # #
# # # #
######################################################################### #########################################################################
import sys, os, tempfile, re import sys, os, tempfile, re
from calibre.ebooks.rtf2xml import copy from calibre.ebooks.rtf2xml import copy
class Colors: class Colors:
""" """
Change lines with color info from color numbers to the actual color names. Change lines with color info from color numbers to the actual color names.
@ -40,8 +42,10 @@ class Colors:
self.__file = in_file self.__file = in_file
self.__copy = copy self.__copy = copy
self.__bug_handler = bug_handler self.__bug_handler = bug_handler
self.__line = 0
self.__write_to = tempfile.mktemp() self.__write_to = tempfile.mktemp()
self.__run_level = run_level self.__run_level = run_level
def __initiate_values(self): def __initiate_values(self):
""" """
Initiate all values. Initiate all values.
@ -61,6 +65,7 @@ class Colors:
self.__color_num = 1 self.__color_num = 1
self.__line_color_exp = re.compile(r'bdr-color_:(\d+)') self.__line_color_exp = re.compile(r'bdr-color_:(\d+)')
# cw<bd<bor-par-to<nu<bdr-hair__|bdr-li-wid:0.50|bdr-sp-wid:1.00|bdr-color_:2 # cw<bd<bor-par-to<nu<bdr-hair__|bdr-li-wid:0.50|bdr-sp-wid:1.00|bdr-color_:2
def __before_color_func(self, line): def __before_color_func(self, line):
""" """
Requires: Requires:
@ -76,6 +81,7 @@ class Colors:
if self.__token_info == 'mi<mk<clrtbl-beg': if self.__token_info == 'mi<mk<clrtbl-beg':
self.__state = 'in_color_table' self.__state = 'in_color_table'
self.__write_obj.write(line) self.__write_obj.write(line)
def __default_color_func(self, line): def __default_color_func(self, line):
""" """
Requires: Requires:
@ -87,6 +93,7 @@ class Colors:
""" """
hex_num = line[-3:-1] hex_num = line[-3:-1]
self.__color_string += hex_num self.__color_string += hex_num
def __blue_func(self, line): def __blue_func(self, line):
""" """
Requires: Requires:
@ -109,6 +116,7 @@ class Colors:
) )
self.__color_num += 1 self.__color_num += 1
self.__color_string = '#' self.__color_string = '#'
def __in_color_func(self, line): def __in_color_func(self, line):
""" """
Requires: Requires:
@ -127,12 +135,13 @@ class Colors:
self.__state = 'after_color_table' self.__state = 'after_color_table'
else: else:
action = self.__state_dict.get(self.__token_info) action = self.__state_dict.get(self.__token_info)
if action == None: if action is None:
sys.stderr.write('in module colors.py\n' sys.stderr.write('in module colors.py\n'
'function is self.__in_color_func\n' 'function is self.__in_color_func\n'
'no action for %s' % self.__token_info 'no action for %s' % self.__token_info
) )
action(line) action(line)
def __after_color_func(self, line): def __after_color_func(self, line):
""" """
Check the to see if it contains color info. If it does, extract the Check the to see if it contains color info. If it does, extract the
@ -180,6 +189,7 @@ class Colors:
else: else:
self.__write_obj.write(line) self.__write_obj.write(line)
# cw<bd<bor-par-to<nu<bdr-hair__|bdr-li-wid:0.50|bdr-sp-wid:1.00|bdr-color_:2 # cw<bd<bor-par-to<nu<bdr-hair__|bdr-li-wid:0.50|bdr-sp-wid:1.00|bdr-color_:2
def __sub_from_line_color(self, match_obj): def __sub_from_line_color(self, match_obj):
num = match_obj.group(1) num = match_obj.group(1)
try: try:
@ -191,25 +201,27 @@ class Colors:
else: else:
return 'bdr-color_:no-value' return 'bdr-color_:no-value'
hex_num = self.__figure_num(num) hex_num = self.__figure_num(num)
return_value = 'bdr-color_:%s' % hex_num return 'bdr-color_:%s' % hex_num
return return_value
def __figure_num(self, num): def __figure_num(self, num):
if num == 0: if num == 0:
hex_num = 'false' hex_num = 'false'
else: else:
hex_num = self.__color_dict.get(num) hex_num = self.__color_dict.get(num)
if hex_num == None: if hex_num is None:
if self.__run_level > 3:
msg = 'no value in self.__color_dict for key %s\n' % num
raise self.__bug_hanlder, msg
if hex_num == None:
hex_num = '0' hex_num = '0'
if self.__run_level > 5:
msg = 'no value in self.__color_dict' \
'for key %s at line %d\n' % (num, self.__line)
raise self.__bug_handler, msg
return hex_num return hex_num
def __do_nothing_func(self, line): def __do_nothing_func(self, line):
""" """
Bad RTF will have text in the color table Bad RTF will have text in the color table
""" """
pass pass
def convert_colors(self): def convert_colors(self):
""" """
Requires: Requires:
@ -226,20 +238,16 @@ class Colors:
info, and substitute the number with the hex number. info, and substitute the number with the hex number.
""" """
self.__initiate_values() self.__initiate_values()
read_obj = open(self.__file, 'r') with open(self.__file, 'r') as read_obj:
self.__write_obj = open(self.__write_to, 'w') with open(self.__write_to, 'w') as self.__write_obj:
line_to_read = 1 for line in read_obj:
while line_to_read: self.__line+=1
line_to_read = read_obj.readline() self.__token_info = line[:16]
line = line_to_read action = self.__state_dict.get(self.__state)
self.__token_info = line[:16] if action is None:
action = self.__state_dict.get(self.__state) sys.stderr.write('no matching state in module fonts.py\n')
if action == None: sys.stderr.write(self.__state + '\n')
sys.stderr.write('no no matching state in module fonts.py\n') action(line)
sys.stderr.write(self.__state + '\n')
action(line)
read_obj.close()
self.__write_obj.close()
copy_obj = copy.Copy(bug_handler = self.__bug_handler) copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy: if self.__copy:
copy_obj.copy_file(self.__write_to, "color.data") copy_obj.copy_file(self.__write_to, "color.data")

View File

@ -33,13 +33,13 @@ class ConvertToTags:
self.__copy = copy self.__copy = copy
self.__dtd_path = dtd_path self.__dtd_path = dtd_path
self.__no_dtd = no_dtd self.__no_dtd = no_dtd
if encoding != 'mac_roman': self.__encoding = 'cp' + encoding
self.__encoding = 'cp' + encoding if encoding == 'mac_roman':
else:
self.__encoding = 'mac_roman' self.__encoding = 'mac_roman'
self.__indent = indent self.__indent = indent
self.__run_level = run_level self.__run_level = run_level
self.__write_to = tempfile.mktemp() self.__write_to = tempfile.mktemp()
self.__convert_utf = False
def __initiate_values(self): def __initiate_values(self):
""" """
@ -213,7 +213,8 @@ class ConvertToTags:
if not check_encoding_obj.check_encoding(self.__file, verbose=False): if not check_encoding_obj.check_encoding(self.__file, verbose=False):
self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>') self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
elif not check_encoding_obj.check_encoding(self.__file, self.__encoding): elif not check_encoding_obj.check_encoding(self.__file, self.__encoding):
self.__write_obj.write('<?xml version="1.0" encoding="%s" ?>' % self.__encoding) self.__write_obj.write('<?xml version="1.0" encoding="UTF-8" ?>')
self.__convert_utf = True
else: else:
self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>') self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
sys.stderr.write('Bad RTF encoding, revert to US-ASCII chars and' sys.stderr.write('Bad RTF encoding, revert to US-ASCII chars and'
@ -253,15 +254,28 @@ class ConvertToTags:
an empty tag function. an empty tag function.
""" """
self.__initiate_values() self.__initiate_values()
self.__write_obj = open(self.__write_to, 'w') with open(self.__write_to, 'w') as self.__write_obj:
self.__write_dec() self.__write_dec()
with open(self.__file, 'r') as read_obj: with open(self.__file, 'r') as read_obj:
for line in read_obj: for line in read_obj:
self.__token_info = line[:16] self.__token_info = line[:16]
action = self.__state_dict.get(self.__token_info) action = self.__state_dict.get(self.__token_info)
if action is not None: if action is not None:
action(line) action(line)
self.__write_obj.close() self.__write_obj.close()
#convert all encodings to UTF8 to avoid unsupported encodings in lxml
if self.__convert_utf:
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
copy_obj.rename(self.__write_to, self.__file)
with open(self.__file, 'r') as read_obj:
with open(self.__write_to, 'w') as write_obj:
file = read_obj.read()
try:
file = file.decode(self.__encoding)
write_obj.write(file.encode('utf-8'))
except:
sys.stderr.write('Conversion to UTF-8 is not possible,'
' encoding should be very carefully checked')
copy_obj = copy.Copy(bug_handler = self.__bug_handler) copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy: if self.__copy:
copy_obj.copy_file(self.__write_to, "convert_to_tags.data") copy_obj.copy_file(self.__write_to, "convert_to_tags.data")

View File

@ -75,12 +75,16 @@ class DefaultEncoding:
self._encoding() self._encoding()
self.__datafetched = True self.__datafetched = True
code_page = 'ansicpg' + self.__code_page code_page = 'ansicpg' + self.__code_page
if self.__code_page == '10000':
self.__code_page = 'mac_roman'
return self.__platform, code_page, self.__default_num return self.__platform, code_page, self.__default_num
def get_codepage(self): def get_codepage(self):
if not self.__datafetched: if not self.__datafetched:
self._encoding() self._encoding()
self.__datafetched = True self.__datafetched = True
if self.__code_page == '10000':
self.__code_page = 'mac_roman'
return self.__code_page return self.__code_page
def get_platform(self): def get_platform(self):

View File

@ -16,7 +16,9 @@
# # # #
######################################################################### #########################################################################
import sys, os, tempfile import sys, os, tempfile
from calibre.ebooks.rtf2xml import copy from calibre.ebooks.rtf2xml import copy
class Fonts: class Fonts:
""" """
Change lines with font info from font numbers to the actual font names. Change lines with font info from font numbers to the actual font names.
@ -45,6 +47,7 @@ class Fonts:
self.__default_font_num = default_font_num self.__default_font_num = default_font_num
self.__write_to = tempfile.mktemp() self.__write_to = tempfile.mktemp()
self.__run_level = run_level self.__run_level = run_level
def __initiate_values(self): def __initiate_values(self):
""" """
Initiate all values. Initiate all values.
@ -67,6 +70,7 @@ class Fonts:
self.__font_table = {} self.__font_table = {}
# individual font written # individual font written
self.__wrote_ind_font = 0 self.__wrote_ind_font = 0
def __default_func(self, line): def __default_func(self, line):
""" """
Requires: Requires:
@ -79,6 +83,7 @@ class Fonts:
if self.__token_info == 'mi<mk<fonttb-beg': if self.__token_info == 'mi<mk<fonttb-beg':
self.__state = 'font_table' self.__state = 'font_table'
self.__write_obj.write(line) self.__write_obj.write(line)
def __font_table_func(self, line): def __font_table_func(self, line):
""" """
Requires: Requires:
@ -101,6 +106,7 @@ class Fonts:
self.__font_num = self.__default_font_num self.__font_num = self.__default_font_num
self.__text_line = '' self.__text_line = ''
##self.__write_obj.write(line) ##self.__write_obj.write(line)
def __font_in_table_func(self, line): def __font_in_table_func(self, line):
""" """
Requires: Requires:
@ -138,6 +144,7 @@ class Fonts:
elif self.__token_info == 'mi<mk<fonttb-end': elif self.__token_info == 'mi<mk<fonttb-end':
self.__found_end_font_table_func() self.__found_end_font_table_func()
self.__state = 'after_font_table' self.__state = 'after_font_table'
def __found_end_font_table_func(self): def __found_end_font_table_func(self):
""" """
Required: Required:
@ -150,7 +157,8 @@ class Fonts:
if not self.__wrote_ind_font: if not self.__wrote_ind_font:
self.__write_obj.write( self.__write_obj.write(
'mi<tg<empty-att_' 'mi<tg<empty-att_'
'<font-in-table<name>Times<num>0\n' ) '<font-in-table<name>Times<num>0\n')
def __after_font_table_func(self, line): def __after_font_table_func(self, line):
""" """
Required: Required:
@ -169,7 +177,7 @@ class Fonts:
if self.__token_info == 'cw<ci<font-style': if self.__token_info == 'cw<ci<font-style':
font_num = line[20:-1] font_num = line[20:-1]
font_name = self.__font_table.get(font_num) font_name = self.__font_table.get(font_num)
if font_name == None: if font_name is None:
if self.__run_level > 3: if self.__run_level > 3:
msg = 'no value for %s in self.__font_table\n' % font_num msg = 'no value for %s in self.__font_table\n' % font_num
raise self.__bug_handler, msg raise self.__bug_handler, msg
@ -182,6 +190,7 @@ class Fonts:
) )
else: else:
self.__write_obj.write(line) self.__write_obj.write(line)
def convert_fonts(self): def convert_fonts(self):
""" """
Required: Required:
@ -197,20 +206,15 @@ class Fonts:
info. Substitute a font name for a font number. info. Substitute a font name for a font number.
""" """
self.__initiate_values() self.__initiate_values()
read_obj = open(self.__file, 'r') with open(self.__file, 'r') as read_obj:
self.__write_obj = open(self.__write_to, 'w') with open(self.__write_to, 'w') as self.__write_obj:
line_to_read = 1 for line in read_obj:
while line_to_read: self.__token_info = line[:16]
line_to_read = read_obj.readline() action = self.__state_dict.get(self.__state)
line = line_to_read if action is None:
self.__token_info = line[:16] sys.stderr.write('no matching state in module fonts.py\n' \
action = self.__state_dict.get(self.__state) + self.__state + '\n')
if action == None: action(line)
sys.stderr.write('no no matching state in module fonts.py\n')
sys.stderr.write(self.__state + '\n')
action(line)
read_obj.close()
self.__write_obj.close()
default_font_name = self.__font_table.get(self.__default_font_num) default_font_name = self.__font_table.get(self.__default_font_num)
if not default_font_name: if not default_font_name:
default_font_name = 'Not Defined' default_font_name = 'Not Defined'

View File

@ -43,7 +43,7 @@ class GetCharMap:
def get_char_map(self, map): def get_char_map(self, map):
if map == 'ansicpg0': if map == 'ansicpg0':
map = 'ansicpg1250' map = 'ansicpg1250'
if map in ('ansicpg10000', '10000'): if map == 'ansicpg10000':
map = 'mac_roman' map = 'mac_roman'
found_map = False found_map = False
map_dict = {} map_dict = {}

View File

@ -126,12 +126,6 @@ class Tokenize:
tokens = re.split(self.__splitexp, input_file) tokens = re.split(self.__splitexp, input_file)
#remove empty tokens and \n #remove empty tokens and \n
return filter(lambda x: len(x) > 0 and x != '\n', tokens) return filter(lambda x: len(x) > 0 and x != '\n', tokens)
#input_file = re.sub(self.__utf_exp, self.__from_ms_to_utf8, input_file)
# line = re.sub( self.__neg_utf_exp, self.__neg_unicode_func, line)
# this is for older RTF
#line = re.sub(self.__par_exp, '\\par ', line)
#return filter(lambda x: len(x) > 0, \
#(self.__remove_line.sub('', x) for x in tokens))
def __compile_expressions(self): def __compile_expressions(self):
SIMPLE_RPL = { SIMPLE_RPL = {
@ -160,7 +154,7 @@ class Tokenize:
} }
self.__replace_spchar = MReplace(SIMPLE_RPL) self.__replace_spchar = MReplace(SIMPLE_RPL)
#add ;? in case of char following \u #add ;? in case of char following \u
self.__ms_hex_exp = re.compile(r"\\\'([0-9a-fA-F]{2})") #r"\\\'(..)" self.__ms_hex_exp = re.compile(r"\\\'([0-9a-fA-F]{2})")
self.__utf_exp = re.compile(r"\\u(-?\d{3,6}) ?") self.__utf_exp = re.compile(r"\\u(-?\d{3,6}) ?")
self.__bin_exp = re.compile(r"(?:\\bin(-?\d{0,10})[\n ]+)[01\n]+") self.__bin_exp = re.compile(r"(?:\\bin(-?\d{0,10})[\n ]+)[01\n]+")
#manage upr/ud situations #manage upr/ud situations
@ -172,14 +166,21 @@ class Tokenize:
self.__splitexp = re.compile(r"(\\[{}]|\n|\\[^\s\\{}&]+(?:[ \t\r\f\v])?)") self.__splitexp = re.compile(r"(\\[{}]|\n|\\[^\s\\{}&]+(?:[ \t\r\f\v])?)")
#this is for old RTF #this is for old RTF
self.__par_exp = re.compile(r'\\\n+') self.__par_exp = re.compile(r'\\\n+')
# self.__par_exp = re.compile(r'\\$') #handle cw using a digit as argument and without space as delimiter
self.__cwdigit_exp = re.compile(r"(\\[a-zA-Z]+[\-0-9]+)([^0-9 \\]+)")
#self.__bin_exp = re.compile(r"\\bin(-?\d{1,8}) {0,1}") #self.__bin_exp = re.compile(r"\\bin(-?\d{1,8}) {0,1}")
#self.__utf_exp = re.compile(r"^\\u(-?\d{3,6})") #self.__utf_exp = re.compile(r"^\\u(-?\d{3,6})")
#self.__splitexp = re.compile(r"(\\[\\{}]|{|}|\n|\\[^\s\\{}&]+(?:\s)?)") #self.__splitexp = re.compile(r"(\\[\\{}]|{|}|\n|\\[^\s\\{}&]+(?:\s)?)")
#self.__remove_line = re.compile(r'\n+') #self.__remove_line = re.compile(r'\n+')
#self.__mixed_exp = re.compile(r"(\\[a-zA-Z]+\d+)(\D+)")
##self.num_exp = re.compile(r"(\*|:|[a-zA-Z]+)(.*)") ##self.num_exp = re.compile(r"(\*|:|[a-zA-Z]+)(.*)")
def __correct_spliting(self, token):
match_obj = re.search(self.__cwdigit_exp, token)
if match_obj is None:
return token
else:
return '%s\n%s' % (match_obj.group(1), match_obj.group(2))
def tokenize(self): def tokenize(self):
"""Main class for handling other methods. Reads the file \ """Main class for handling other methods. Reads the file \
, uses method self.sub_reg to make basic substitutions,\ , uses method self.sub_reg to make basic substitutions,\
@ -187,7 +188,7 @@ class Tokenize:
#read #read
with open(self.__file, 'r') as read_obj: with open(self.__file, 'r') as read_obj:
input_file = read_obj.read() input_file = read_obj.read()
#process simple replacements and split giving us a correct list #process simple replacements and split giving us a correct list
#remove '' and \n in the process #remove '' and \n in the process
tokens = self.__sub_reg_split(input_file) tokens = self.__sub_reg_split(input_file)
@ -195,7 +196,9 @@ class Tokenize:
tokens = map(self.__unicode_process, tokens) tokens = map(self.__unicode_process, tokens)
#remove empty items created by removing \uc #remove empty items created by removing \uc
tokens = filter(lambda x: len(x) > 0, tokens) tokens = filter(lambda x: len(x) > 0, tokens)
#handles bothersome cases
tokens = map(self.__correct_spliting, tokens)
#write #write
with open(self.__write_to, 'wb') as write_obj: with open(self.__write_to, 'wb') as write_obj:
write_obj.write('\n'.join(tokens)) write_obj.write('\n'.join(tokens))
@ -203,11 +206,9 @@ class Tokenize:
copy_obj = copy.Copy(bug_handler = self.__bug_handler) copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy: if self.__copy:
copy_obj.copy_file(self.__write_to, "tokenize.data") copy_obj.copy_file(self.__write_to, "tokenize.data")
# if self.__out_file:
# self.__file = self.__out_file
copy_obj.rename(self.__write_to, self.__file) copy_obj.rename(self.__write_to, self.__file)
os.remove(self.__write_to) os.remove(self.__write_to)
#self.__special_tokens = [ '_', '~', "'", '{', '}' ] #self.__special_tokens = [ '_', '~', "'", '{', '}' ]
# import sys # import sys
@ -223,4 +224,4 @@ class Tokenize:
# if __name__ == '__main__': # if __name__ == '__main__':
# sys.exit(main()) # sys.exit(main())

View File

@ -429,10 +429,12 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
old_extensions.add(ext) old_extensions.add(ext)
for ext in new_extensions: for ext in new_extensions:
self.db.add_format(self.row, ext, open(paths[ext], 'rb'), notify=False) self.db.add_format(self.row, ext, open(paths[ext], 'rb'), notify=False)
db_extensions = set([f.lower() for f in self.db.formats(self.row).split(',')]) dbfmts = self.db.formats(self.row)
db_extensions = set([f.lower() for f in (dbfmts.split(',') if dbfmts
else [])])
extensions = new_extensions.union(old_extensions) extensions = new_extensions.union(old_extensions)
for ext in db_extensions: for ext in db_extensions:
if ext not in extensions: if ext not in extensions and ext in self.original_formats:
self.db.remove_format(self.row, ext, notify=False) self.db.remove_format(self.row, ext, notify=False)
def show_format(self, item, *args): def show_format(self, item, *args):
@ -576,6 +578,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
self.orig_date = qt_to_dt(self.date.date()) self.orig_date = qt_to_dt(self.date.date())
exts = self.db.formats(row) exts = self.db.formats(row)
self.original_formats = []
if exts: if exts:
exts = exts.split(',') exts = exts.split(',')
for ext in exts: for ext in exts:
@ -586,6 +589,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
if size is None: if size is None:
continue continue
Format(self.formats, ext, size, timestamp=timestamp) Format(self.formats, ext, size, timestamp=timestamp)
self.original_formats.append(ext.lower())
self.initialize_combos() self.initialize_combos()

View File

@ -472,6 +472,7 @@ class FormatsManager(QWidget): # {{{
def initialize(self, db, id_): def initialize(self, db, id_):
self.changed = False self.changed = False
exts = db.formats(id_, index_is_id=True) exts = db.formats(id_, index_is_id=True)
self.original_val = set([])
if exts: if exts:
exts = exts.split(',') exts = exts.split(',')
for ext in exts: for ext in exts:
@ -482,6 +483,7 @@ class FormatsManager(QWidget): # {{{
if size is None: if size is None:
continue continue
Format(self.formats, ext, size, timestamp=timestamp) Format(self.formats, ext, size, timestamp=timestamp)
self.original_val.add(ext.lower())
def commit(self, db, id_): def commit(self, db, id_):
if not self.changed: if not self.changed:
@ -500,11 +502,12 @@ class FormatsManager(QWidget): # {{{
for ext in new_extensions: for ext in new_extensions:
db.add_format(id_, ext, open(paths[ext], 'rb'), notify=False, db.add_format(id_, ext, open(paths[ext], 'rb'), notify=False,
index_is_id=True) index_is_id=True)
db_extensions = set([f.lower() for f in db.formats(id_, dbfmts = db.formats(id_, index_is_id=True)
index_is_id=True).split(',')]) db_extensions = set([f.lower() for f in (dbfmts.split(',') if dbfmts
else [])])
extensions = new_extensions.union(old_extensions) extensions = new_extensions.union(old_extensions)
for ext in db_extensions: for ext in db_extensions:
if ext not in extensions: if ext not in extensions and ext in self.original_val:
db.remove_format(id_, ext, notify=False, index_is_id=True) db.remove_format(id_, ext, notify=False, index_is_id=True)
self.changed = False self.changed = False

View File

@ -232,6 +232,7 @@ class BIBTEX(CatalogPlugin): # {{{
help = _('The fields to output when cataloging books in the ' help = _('The fields to output when cataloging books in the '
'database. Should be a comma-separated list of fields.\n' 'database. Should be a comma-separated list of fields.\n'
'Available fields: %s.\n' 'Available fields: %s.\n'
'plus user-created custom fields.\n'
'Example: %s=title,authors,tags\n' 'Example: %s=title,authors,tags\n'
"Default: '%%default'\n" "Default: '%%default'\n"
"Applies to: BIBTEX output format")%(', '.join(FIELDS), "Applies to: BIBTEX output format")%(', '.join(FIELDS),
@ -269,7 +270,7 @@ class BIBTEX(CatalogPlugin): # {{{
dest = 'bib_cit', dest = 'bib_cit',
action = None, action = None,
help = _('The template for citation creation from database fields.\n' help = _('The template for citation creation from database fields.\n'
' Should be a template with {} enclosed fields.\n' 'Should be a template with {} enclosed fields.\n'
'Available fields: %s.\n' 'Available fields: %s.\n'
"Default: '%%default'\n" "Default: '%%default'\n"
"Applies to: BIBTEX output format")%', '.join(TEMPLATE_ALLOWED_FIELDS)), "Applies to: BIBTEX output format")%', '.join(TEMPLATE_ALLOWED_FIELDS)),
@ -344,7 +345,7 @@ class BIBTEX(CatalogPlugin): # {{{
if field == 'authors' : if field == 'authors' :
bibtex_entry.append(u'author = "%s"' % bibtexdict.bibtex_author_format(item)) bibtex_entry.append(u'author = "%s"' % bibtexdict.bibtex_author_format(item))
elif field in ['title', 'publisher', 'cover', 'uuid', elif field in ['title', 'publisher', 'cover', 'uuid', 'ondevice',
'author_sort', 'series'] : 'author_sort', 'series'] :
bibtex_entry.append(u'%s = "%s"' % (field, bibtexdict.utf8ToBibtex(item))) bibtex_entry.append(u'%s = "%s"' % (field, bibtexdict.utf8ToBibtex(item)))
@ -378,7 +379,7 @@ class BIBTEX(CatalogPlugin): # {{{
if calibre_files: if calibre_files:
files = [u':%s:%s' % (format, format.rpartition('.')[2].upper())\ files = [u':%s:%s' % (format, format.rpartition('.')[2].upper())\
for format in item] for format in item]
bibtex_entry.append(u'files = "%s"' % u', '.join(files)) bibtex_entry.append(u'file = "%s"' % u', '.join(files))
elif field == 'series_index' : elif field == 'series_index' :
bibtex_entry.append(u'volume = "%s"' % int(item)) bibtex_entry.append(u'volume = "%s"' % int(item))
@ -474,6 +475,8 @@ class BIBTEX(CatalogPlugin): # {{{
if opts.verbose: if opts.verbose:
opts_dict = vars(opts) opts_dict = vars(opts)
log("%s(): Generating %s" % (self.name,self.fmt)) log("%s(): Generating %s" % (self.name,self.fmt))
if opts.connected_device['is_device_connected']:
log(" connected_device: %s" % opts.connected_device['name'])
if opts_dict['search_text']: if opts_dict['search_text']:
log(" --search='%s'" % opts_dict['search_text']) log(" --search='%s'" % opts_dict['search_text'])
@ -548,6 +551,7 @@ class BIBTEX(CatalogPlugin): # {{{
as outfile: as outfile:
#File header #File header
nb_entries = len(data) nb_entries = len(data)
#check in book strict if all is ok else throw a warning into log #check in book strict if all is ok else throw a warning into log
if bib_entry == 'book' : if bib_entry == 'book' :
nb_books = len(filter(check_entry_book_valid, data)) nb_books = len(filter(check_entry_book_valid, data))
@ -555,6 +559,11 @@ class BIBTEX(CatalogPlugin): # {{{
log(" WARNING: only %d entries in %d are book compatible" % (nb_books, nb_entries)) log(" WARNING: only %d entries in %d are book compatible" % (nb_books, nb_entries))
nb_entries = nb_books nb_entries = nb_books
# If connected device, add 'On Device' values to data
if opts.connected_device['is_device_connected'] and 'ondevice' in fields:
for entry in data:
entry['ondevice'] = db.catalog_plugin_on_device_temp_mapping[entry['id']]['ondevice']
outfile.write(u'%%%Calibre catalog\n%%%{0} entries in catalog\n\n'.format(nb_entries)) outfile.write(u'%%%Calibre catalog\n%%%{0} entries in catalog\n\n'.format(nb_entries))
outfile.write(u'@preamble{"This catalog of %d entries was generated by calibre on %s"}\n\n' outfile.write(u'@preamble{"This catalog of %d entries was generated by calibre on %s"}\n\n'
% (nb_entries, nowf().strftime("%A, %d. %B %Y %H:%M").decode(preferred_encoding))) % (nb_entries, nowf().strftime("%A, %d. %B %Y %H:%M").decode(preferred_encoding)))

View File

@ -391,6 +391,8 @@ Take your pick:
* A tribute to the SONY Librie which was the first e-ink based e-book reader * A tribute to the SONY Librie which was the first e-ink based e-book reader
* My wife chose it ;-) * My wife chose it ;-)
|app| is pronounced as cal-i-ber *not* ca-libre. If you're wondering, |app| is the British/commonwealth spelling for caliber. Being Indian, that's the natural spelling for me.
Why does |app| show only some of my fonts on OS X? Why does |app| show only some of my fonts on OS X?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|app| embeds fonts in ebook files it creates. E-book files support embedding only TrueType (.ttf) fonts. Most fonts on OS X systems are in .dfont format, thus they cannot be embedded. |app| shows only TrueType fonts found on your system. You can obtain many TrueType fonts on the web. Simply download the .ttf files and add them to the Library/Fonts directory in your home directory. |app| embeds fonts in ebook files it creates. E-book files support embedding only TrueType (.ttf) fonts. Most fonts on OS X systems are in .dfont format, thus they cannot be embedded. |app| shows only TrueType fonts found on your system. You can obtain many TrueType fonts on the web. Simply download the .ttf files and add them to the Library/Fonts directory in your home directory.