mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge from trunk
This commit is contained in:
commit
fdb842b036
BIN
resources/images/news/latimes.png
Normal file
BIN
resources/images/news/latimes.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 358 B |
@ -1,25 +1,25 @@
|
|||||||
# -*- coding: utf-8
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__author__ = 'Luis Hernandez'
|
__author__ = 'Luis Hernandez'
|
||||||
__copyright__ = 'Luis Hernandez<tolyluis@gmail.com>'
|
__copyright__ = 'Luis Hernandez<tolyluis@gmail.com>'
|
||||||
description = 'Periódico gratuito en español - v0.8 - 27 Jan 2011'
|
__version__ = 'v0.85'
|
||||||
|
__date__ = '31 January 2011'
|
||||||
|
|
||||||
'''
|
'''
|
||||||
www.20minutos.es
|
www.20minutos.es
|
||||||
'''
|
'''
|
||||||
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class AdvancedUserRecipe1294946868(BasicNewsRecipe):
|
class AdvancedUserRecipe1294946868(BasicNewsRecipe):
|
||||||
|
|
||||||
title = u'20 Minutos'
|
title = u'20 Minutos new'
|
||||||
publisher = u'Grupo 20 Minutos'
|
publisher = u'Grupo 20 Minutos'
|
||||||
|
|
||||||
__author__ = 'Luis Hernández'
|
__author__ = 'Luis Hernandez'
|
||||||
description = 'Periódico gratuito en español'
|
description = 'Free spanish newspaper'
|
||||||
cover_url = 'http://estaticos.20minutos.es/mmedia/especiales/corporativo/css/img/logotipos_grupo20minutos.gif'
|
cover_url = 'http://estaticos.20minutos.es/mmedia/especiales/corporativo/css/img/logotipos_grupo20minutos.gif'
|
||||||
|
|
||||||
oldest_article = 5
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
|
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
@ -29,6 +29,7 @@ class AdvancedUserRecipe1294946868(BasicNewsRecipe):
|
|||||||
encoding = 'ISO-8859-1'
|
encoding = 'ISO-8859-1'
|
||||||
language = 'es'
|
language = 'es'
|
||||||
timefmt = '[%a, %d %b, %Y]'
|
timefmt = '[%a, %d %b, %Y]'
|
||||||
|
remove_empty_feeds = True
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='div', attrs={'id':['content','vinetas',]})
|
dict(name='div', attrs={'id':['content','vinetas',]})
|
||||||
@ -43,13 +44,21 @@ class AdvancedUserRecipe1294946868(BasicNewsRecipe):
|
|||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='ol', attrs={'class':['navigation',]})
|
dict(name='ol', attrs={'class':['navigation',]})
|
||||||
,dict(name='span', attrs={'class':['action']})
|
,dict(name='span', attrs={'class':['action']})
|
||||||
,dict(name='div', attrs={'class':['twitter comments-list hidden','related-news','col','photo-gallery','calendario','article-comment','postto estirar','otras_vinetas estirar','kment','user-actions']})
|
,dict(name='div', attrs={'class':['twitter comments-list hidden','related-news','col','photo-gallery','photo-gallery side-art-block','calendario','article-comment','postto estirar','otras_vinetas estirar','kment','user-actions']})
|
||||||
,dict(name='div', attrs={'id':['twitter-destacados','eco-tabs','inner','vineta_calendario','vinetistas clearfix','otras_vinetas estirar','MIN1','main','SUP1','INT']})
|
,dict(name='div', attrs={'id':['twitter-destacados','eco-tabs','inner','vineta_calendario','vinetistas clearfix','otras_vinetas estirar','MIN1','main','SUP1','INT']})
|
||||||
,dict(name='ul', attrs={'class':['article-user-actions','stripped-list']})
|
,dict(name='ul', attrs={'class':['article-user-actions','stripped-list']})
|
||||||
,dict(name='ul', attrs={'id':['site-links']})
|
,dict(name='ul', attrs={'id':['site-links']})
|
||||||
,dict(name='li', attrs={'class':['puntuacion','enviar','compartir']})
|
,dict(name='li', attrs={'class':['puntuacion','enviar','compartir']})
|
||||||
]
|
]
|
||||||
|
|
||||||
|
extra_css = """
|
||||||
|
p{text-align: justify; font-size: 100%}
|
||||||
|
body{ text-align: left; font-size:100% }
|
||||||
|
h3{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
|
||||||
|
"""
|
||||||
|
|
||||||
|
preprocess_regexps = [(re.compile(r'<a href="http://estaticos.*?[0-999]px;" target="_blank">', re.DOTALL), lambda m: '')]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Portada' , u'http://www.20minutos.es/rss/')
|
(u'Portada' , u'http://www.20minutos.es/rss/')
|
||||||
,(u'Nacional' , u'http://www.20minutos.es/rss/nacional/')
|
,(u'Nacional' , u'http://www.20minutos.es/rss/nacional/')
|
||||||
@ -65,6 +74,6 @@ class AdvancedUserRecipe1294946868(BasicNewsRecipe):
|
|||||||
,(u'Empleo' , u'http://www.20minutos.es/rss/empleo/')
|
,(u'Empleo' , u'http://www.20minutos.es/rss/empleo/')
|
||||||
,(u'Cine' , u'http://www.20minutos.es/rss/cine/')
|
,(u'Cine' , u'http://www.20minutos.es/rss/cine/')
|
||||||
,(u'Musica' , u'http://www.20minutos.es/rss/musica/')
|
,(u'Musica' , u'http://www.20minutos.es/rss/musica/')
|
||||||
,(u'Vinetas' , u'http://www.20minutos.es/rss/vinetas/')
|
,(u'Vinetas' , u'http://www.20minutos.es/rss/vinetas/')
|
||||||
,(u'Comunidad20' , u'http://www.20minutos.es/rss/zona20/')
|
,(u'Comunidad20' , u'http://www.20minutos.es/rss/zona20/')
|
||||||
]
|
]
|
||||||
|
71
resources/recipes/cinco_dias.recipe
Normal file
71
resources/recipes/cinco_dias.recipe
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__author__ = 'Luis Hernandez'
|
||||||
|
__copyright__ = 'Luis Hernandez<tolyluis@gmail.com>'
|
||||||
|
__version__ = 'v1.2'
|
||||||
|
__date__ = '31 January 2011'
|
||||||
|
|
||||||
|
'''
|
||||||
|
http://www.cincodias.com/
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AdvancedUserRecipe1294946868(BasicNewsRecipe):
|
||||||
|
|
||||||
|
title = u'Cinco Dias'
|
||||||
|
publisher = u'Grupo Prisa'
|
||||||
|
|
||||||
|
__author__ = 'Luis Hernandez'
|
||||||
|
description = 'spanish web about money and bussiness, free edition'
|
||||||
|
|
||||||
|
cover_url = 'http://www.prisa.com/images/logos/logo_cinco_dias.gif'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
|
||||||
|
remove_javascript = True
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
|
||||||
|
language = 'es'
|
||||||
|
remove_empty_feeds = True
|
||||||
|
encoding = 'ISO-8859-1'
|
||||||
|
timefmt = '[%a, %d %b, %Y]'
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'class':['cab_articulo cab_noticia','pos_3','txt_noticia','mod_despiece']})
|
||||||
|
,dict(name='p', attrs={'class':['cintillo']})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags_before = dict(name='div' , attrs={'class':['publi_h']})
|
||||||
|
remove_tags_after = dict(name='div' , attrs={'class':['tab_util util_estadisticas']})
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div', attrs={'class':['util-1','util-2','util-3','inner estirar','inner1','inner2','inner3','cont','tab_util util_estadisticas','tab_util util_enviar','mod_list_inf','mod_similares','mod_divisas','mod_sectores','mod_termometro','mod post','mod_img','mod_txt','nivel estirar','barra estirar','info_brujula btnBrujula','utilidad_brujula estirar']})
|
||||||
|
,dict(name='li', attrs={'class':['lnk-fcbook','lnk-retweet','lnk-meneame','desplegable','comentarios','list-options','estirar']})
|
||||||
|
,dict(name='ul', attrs={'class':['lista-izquierda','list-options','estirar']})
|
||||||
|
,dict(name='p', attrs={'class':['autor']})
|
||||||
|
]
|
||||||
|
|
||||||
|
extra_css = """
|
||||||
|
p{text-align: justify; font-size: 100%}
|
||||||
|
body{ text-align: left; font-size:100% }
|
||||||
|
h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
|
||||||
|
h3{font-family: sans-serif; font-size:100%; font-style: italic; text-align: justify; }
|
||||||
|
"""
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Ultima Hora' , u'http://www.cincodias.com/rss/feed.html?feedId=17029')
|
||||||
|
,(u'Empresas' , u'http://www.cincodias.com/rss/feed.html?feedId=19')
|
||||||
|
,(u'Mercados' , u'http://www.cincodias.com/rss/feed.html?feedId=20')
|
||||||
|
,(u'Economia' , u'http://www.cincodias.com/rss/feed.html?feedId=21')
|
||||||
|
,(u'Tecnorama' , u'http://www.cincodias.com/rss/feed.html?feedId=17230')
|
||||||
|
,(u'Tecnologia' , u'http://www.cincodias.com/rss/feed.html?feedId=17106')
|
||||||
|
,(u'Finanzas Personales' , u'http://www.cincodias.com/rss/feed.html?feedId=22')
|
||||||
|
,(u'Fiscalidad' , u'http://www.cincodias.com/rss/feed.html?feedId=17107')
|
||||||
|
,(u'Vivienda' , u'http://www.cincodias.com/rss/feed.html?feedId=17108')
|
||||||
|
,(u'Tendencias' , u'http://www.cincodias.com/rss/feed.html?feedId=17109')
|
||||||
|
,(u'Empleo' , u'http://www.cincodias.com/rss/feed.html?feedId=17110')
|
||||||
|
,(u'IBEX 35' , u'http://www.cincodias.com/rss/feed.html?feedId=17125')
|
||||||
|
,(u'Sectores' , u'http://www.cincodias.com/rss/feed.html?feedId=17126')
|
||||||
|
,(u'Opinion' , u'http://www.cincodias.com/rss/feed.html?feedId=17105')
|
||||||
|
]
|
@ -1,73 +1,92 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
latimes.com
|
www.latimes.com
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class LATimes(BasicNewsRecipe):
|
class LATimes(BasicNewsRecipe):
|
||||||
title = u'The Los Angeles Times'
|
title = 'Los Angeles Times'
|
||||||
__author__ = u'Darko Miletic and Sujata Raman'
|
__author__ = 'Darko Miletic'
|
||||||
description = u'News from Los Angeles'
|
description = 'The Los Angeles Times is a leading source of news on Southern California, entertainment, movies, television, music, politics, business, health, technology, travel, sports, environment, economics, autos, jobs, real estate and other topics affecting California'
|
||||||
oldest_article = 7
|
publisher = 'Tribune Company'
|
||||||
max_articles_per_feed = 100
|
category = 'news, politics, USA, Los Angeles, world'
|
||||||
language = 'en'
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 200
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
encoding = 'utf8'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'utf-8'
|
language = 'en'
|
||||||
lang = 'en-US'
|
remove_empty_feeds = True
|
||||||
|
publication_type = 'newspaper'
|
||||||
|
masthead_url = 'http://www.latimes.com/images/logo.png'
|
||||||
|
cover_url = 'http://www.latimes.com/includes/sectionfronts/A1.pdf'
|
||||||
|
extra_css = """
|
||||||
|
body{font-family: Georgia,"Times New Roman",Times,serif }
|
||||||
|
img{margin-bottom: 0.4em; margin-top: 0.8em; display:block}
|
||||||
|
h2{font-size: 1.1em}
|
||||||
|
.deckhead{font-size: small; text-transform: uppercase}
|
||||||
|
.small{color: gray; font-size: small}
|
||||||
|
.date,.time,.copyright{font-size: x-small; color:gray; font-style:italic;}
|
||||||
|
"""
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment' : description
|
||||||
, 'language' : lang
|
, 'tags' : category
|
||||||
}
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
, 'linearize_tables' : 'Yes'
|
||||||
|
}
|
||||||
|
|
||||||
extra_css = '''
|
keep_only_tags = [
|
||||||
h1{font-family :Georgia,"Times New Roman",Times,serif; font-size:large; }
|
dict(name='div', attrs={'class':'story'})
|
||||||
h2{font-family :Georgia,"Times New Roman",Times,serif; font-size:x-small;}
|
,dict(attrs={'class':['entry-header','time','entry-content']})
|
||||||
.story{font-family :Georgia,"Times New Roman",Times,serif; font-size: x-small;}
|
]
|
||||||
.entry-body{font-family :Georgia,"Times New Roman",Times,serif; font-size: x-small;}
|
remove_tags_after=dict(name='p', attrs={'class':'copyright'})
|
||||||
.entry-more{font-family :Georgia,"Times New Roman",Times,serif; font-size: x-small;}
|
remove_tags = [
|
||||||
.credit{color:#666666; font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;}
|
dict(name=['meta','link','iframe','object','embed'])
|
||||||
.small{color:#666666; font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;}
|
,dict(attrs={'class':['toolSet','articlerail','googleAd','entry-footer-left','entry-footer-right','entry-footer-social','google-ad-story-bottom','sphereTools']})
|
||||||
.byline{font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;}
|
,dict(attrs={'id':['article-promo','googleads','moduleArticleToolsContainer','gallery-subcontent']})
|
||||||
.date{font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;color:#930000; font-style:italic;}
|
]
|
||||||
.time{font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;color:#930000; font-style:italic;}
|
remove_attributes=['lang','xmlns:fb','xmlns:og','border','xtags','i','article_body']
|
||||||
.copyright{font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;color:#930000; }
|
|
||||||
.subhead{font-family :Georgia,"Times New Roman",Times,serif; font-size:x-small;}
|
|
||||||
'''
|
|
||||||
|
|
||||||
# recursions = 1
|
|
||||||
# match_regexps = [r'http://www.latimes.com/.*page=[2-9]']
|
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class':["story" ,"entry"] })]
|
|
||||||
|
|
||||||
|
|
||||||
remove_tags = [ dict(name='div', attrs={'class':['articlerail',"sphereTools","tools","toppaginate","entry-footer-left","entry-footer-right"]}),
|
feeds = [
|
||||||
dict(name='div', attrs={'id':["moduleArticleToolsContainer",]}),
|
(u'Top News' , u'http://feeds.latimes.com/latimes/news' )
|
||||||
dict(name='p', attrs={'class':["entry-footer",]}),
|
,(u'Local News' , u'http://feeds.latimes.com/latimes/news/local' )
|
||||||
dict(name='ul', attrs={'class':"article-nav clearfix"}),
|
,(u'National' , u'http://feeds.latimes.com/latimes/news/nationworld/nation' )
|
||||||
dict(name=['iframe'])
|
,(u'National Politics' , u'http://feeds.latimes.com/latimes/news/politics/' )
|
||||||
]
|
,(u'Business' , u'http://feeds.latimes.com/latimes/business' )
|
||||||
|
,(u'Education' , u'http://feeds.latimes.com/latimes/news/education' )
|
||||||
|
,(u'Environment' , u'http://feeds.latimes.com/latimes/news/science/environment' )
|
||||||
feeds = [(u'News', u'http://feeds.latimes.com/latimes/news')
|
,(u'Religion' , u'http://feeds.latimes.com/latimes/features/religion' )
|
||||||
,(u'Local','http://feeds.latimes.com/latimes/news/local')
|
,(u'Science' , u'http://feeds.latimes.com/latimes/news/science' )
|
||||||
,(u'MostEmailed','http://feeds.latimes.com/MostEmailed')
|
,(u'Technology' , u'http://feeds.latimes.com/latimes/technology' )
|
||||||
,(u'Politics','http://feeds.latimes.com/latimes/news/local/politics/cal/')
|
,(u'Africa' , u'http://feeds.latimes.com/latimes/africa' )
|
||||||
,('OrangeCounty','http://feeds.latimes.com/latimes/news/local/orange/')
|
,(u'Asia' , u'http://feeds.latimes.com/latimes/asia' )
|
||||||
,('National','http://feeds.latimes.com/latimes/news/nationworld/nation')
|
,(u'Europe' , u'http://feeds.latimes.com/latimes/europe' )
|
||||||
,('Politics','http://feeds.latimes.com/latimes/news/politics/')
|
,(u'Latin America' , u'http://feeds.latimes.com/latimes/latinamerica' )
|
||||||
,('Business','http://feeds.latimes.com/latimes/business')
|
,(u'Middle East' , u'http://feeds.latimes.com/latimes/middleeast' )
|
||||||
,('Sports','http://feeds.latimes.com/latimes/sports/')
|
,(u'Arts&Culture' , u'http://feeds.feedburner.com/latimes/entertainment/news/arts' )
|
||||||
,('Entertainment','http://feeds.latimes.com/latimes/entertainment/')
|
,(u'Entertainment News' , u'http://feeds.feedburner.com/latimes/entertainment/news/' )
|
||||||
]
|
,(u'Movie News' , u'http://feeds.feedburner.com/latimes/entertainment/news/movies/' )
|
||||||
|
,(u'Movie Reviews' , u'http://feeds.feedburner.com/movies/reviews/' )
|
||||||
|
,(u'Music News' , u'http://feeds.feedburner.com/latimes/entertainment/news/music/' )
|
||||||
|
,(u'Pop Album Reviews' , u'http://feeds.feedburner.com/latimes/pop-album-reviews' )
|
||||||
|
,(u'Restaurant Reviews' , u'http://feeds.feedburner.com/latimes/restaurant/reviews' )
|
||||||
|
,(u'Theatar and Dance' , u'http://feeds.feedburner.com/latimes/theaterdance' )
|
||||||
|
,(u'Autos' , u'http://feeds.latimes.com/latimes/classified/automotive/highway1/')
|
||||||
|
,(u'Books' , u'http://feeds.latimes.com/features/books' )
|
||||||
|
,(u'Food' , u'http://feeds.latimes.com/latimes/features/food/' )
|
||||||
|
,(u'Health' , u'http://feeds.latimes.com/latimes/features/health/' )
|
||||||
|
,(u'Real Estate' , u'http://feeds.latimes.com/latimes/classified/realestate/' )
|
||||||
|
,(u'Commentary' , u'http://feeds2.feedburner.com/latimes/news/opinion/commentary/' )
|
||||||
|
,(u'Sports' , u'http://feeds.latimes.com/latimes/sports/' )
|
||||||
|
]
|
||||||
|
|
||||||
def get_article_url(self, article):
|
def get_article_url(self, article):
|
||||||
ans = article.get('feedburner_origlink').rpartition('?')[0]
|
ans = BasicNewsRecipe.get_article_url(self, article).rpartition('?')[0]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.log('Looking for full story link in', ans)
|
self.log('Looking for full story link in', ans)
|
||||||
@ -83,4 +102,22 @@ class LATimes(BasicNewsRecipe):
|
|||||||
pass
|
pass
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
for item in soup.findAll('img'):
|
||||||
|
if not item.has_key('alt'):
|
||||||
|
item['alt'] = 'image'
|
||||||
|
for item in soup.findAll('a'):
|
||||||
|
limg = item.find('img')
|
||||||
|
if item.string is not None:
|
||||||
|
str = item.string
|
||||||
|
item.replaceWith(str)
|
||||||
|
else:
|
||||||
|
if limg:
|
||||||
|
item.name ='div'
|
||||||
|
item.attrs =[]
|
||||||
|
else:
|
||||||
|
str = self.tag_to_string(item)
|
||||||
|
item.replaceWith(str)
|
||||||
|
return soup
|
||||||
|
@ -139,6 +139,13 @@ class CHMReader(CHMFile):
|
|||||||
if self.hhc_path not in files and files:
|
if self.hhc_path not in files and files:
|
||||||
self.hhc_path = files[0]
|
self.hhc_path = files[0]
|
||||||
|
|
||||||
|
if self.hhc_path == '.hhc' and self.hhc_path not in files:
|
||||||
|
from calibre import walk
|
||||||
|
for x in walk(output_dir):
|
||||||
|
if os.path.basename(x).lower() in ('index.htm', 'index.html'):
|
||||||
|
self.hhc_path = os.path.relpath(x, output_dir)
|
||||||
|
break
|
||||||
|
|
||||||
def _reformat(self, data, htmlpath):
|
def _reformat(self, data, htmlpath):
|
||||||
try:
|
try:
|
||||||
data = xml_to_unicode(data, strip_encoding_pats=True)[0]
|
data = xml_to_unicode(data, strip_encoding_pats=True)[0]
|
||||||
|
@ -175,6 +175,19 @@ class EPUBInput(InputFormatPlugin):
|
|||||||
raise ValueError(
|
raise ValueError(
|
||||||
'EPUB files with DTBook markup are not supported')
|
'EPUB files with DTBook markup are not supported')
|
||||||
|
|
||||||
|
for x in list(opf.iterspine()):
|
||||||
|
ref = x.get('idref', None)
|
||||||
|
if ref is None:
|
||||||
|
x.getparent().remove(x)
|
||||||
|
continue
|
||||||
|
for y in opf.itermanifest():
|
||||||
|
if y.get('id', None) == ref and y.get('media-type', None) in \
|
||||||
|
('application/vnd.adobe-page-template+xml',):
|
||||||
|
p = x.getparent()
|
||||||
|
if p is not None:
|
||||||
|
p.remove(x)
|
||||||
|
break
|
||||||
|
|
||||||
with open('content.opf', 'wb') as nopf:
|
with open('content.opf', 'wb') as nopf:
|
||||||
nopf.write(opf.render())
|
nopf.write(opf.render())
|
||||||
|
|
||||||
|
61
src/calibre/ebooks/metadata/sources/base.py
Normal file
61
src/calibre/ebooks/metadata/sources/base.py
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from calibre.customize import Plugin
|
||||||
|
|
||||||
|
class Source(Plugin):
|
||||||
|
|
||||||
|
type = _('Metadata source')
|
||||||
|
author = 'Kovid Goyal'
|
||||||
|
|
||||||
|
supported_platforms = ['windows', 'osx', 'linux']
|
||||||
|
|
||||||
|
result_of_identify_is_complete = True
|
||||||
|
|
||||||
|
def get_author_tokens(self, authors):
|
||||||
|
'Take a list of authors and return a list of tokens useful for a '
|
||||||
|
'AND search query'
|
||||||
|
# Leave ' in there for Irish names
|
||||||
|
pat = re.compile(r'[-,:;+!@#$%^&*(){}.`~"\s\[\]/]')
|
||||||
|
for au in authors:
|
||||||
|
for tok in au.split():
|
||||||
|
yield pat.sub('', tok)
|
||||||
|
|
||||||
|
def split_jobs(self, jobs, num):
|
||||||
|
'Split a list of jobs into at most num groups, as evenly as possible'
|
||||||
|
groups = [[] for i in range(num)]
|
||||||
|
jobs = list(jobs)
|
||||||
|
while jobs:
|
||||||
|
for gr in groups:
|
||||||
|
try:
|
||||||
|
job = jobs.pop()
|
||||||
|
except IndexError:
|
||||||
|
break
|
||||||
|
gr.append(job)
|
||||||
|
return [g for g in groups if g]
|
||||||
|
|
||||||
|
def identify(self, log, result_queue, abort, title=None, authors=None, identifiers={}):
|
||||||
|
'''
|
||||||
|
Identify a book by its title/author/isbn/etc.
|
||||||
|
|
||||||
|
:param log: A log object, use it to output debugging information/errors
|
||||||
|
:param result_queue: A result Queue, results should be put into it.
|
||||||
|
Each result is a Metadata object
|
||||||
|
:param abort: If abort.is_set() returns True, abort further processing
|
||||||
|
and return as soon as possible
|
||||||
|
:param title: The title of the book, can be None
|
||||||
|
:param authors: A list of authors of the book, can be None
|
||||||
|
:param identifiers: A dictionary of other identifiers, most commonly
|
||||||
|
{'isbn':'1234...'}
|
||||||
|
:return: None if no errors occurred, otherwise a unicode representation
|
||||||
|
of the error suitable for showing to the user
|
||||||
|
|
||||||
|
'''
|
||||||
|
return None
|
||||||
|
|
215
src/calibre/ebooks/metadata/sources/google.py
Normal file
215
src/calibre/ebooks/metadata/sources/google.py
Normal file
@ -0,0 +1,215 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import time
|
||||||
|
from urllib import urlencode
|
||||||
|
from functools import partial
|
||||||
|
from threading import Thread
|
||||||
|
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
|
from calibre.ebooks.metadata.sources import Source
|
||||||
|
from calibre.ebooks.metadata.book.base import Metadata
|
||||||
|
from calibre.utils.date import parse_date, utcnow
|
||||||
|
from calibre import browser, as_unicode
|
||||||
|
|
||||||
|
NAMESPACES = {
|
||||||
|
'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
|
||||||
|
'atom' : 'http://www.w3.org/2005/Atom',
|
||||||
|
'dc': 'http://purl.org/dc/terms'
|
||||||
|
}
|
||||||
|
XPath = partial(etree.XPath, namespaces=NAMESPACES)
|
||||||
|
|
||||||
|
total_results = XPath('//openSearch:totalResults')
|
||||||
|
start_index = XPath('//openSearch:startIndex')
|
||||||
|
items_per_page = XPath('//openSearch:itemsPerPage')
|
||||||
|
entry = XPath('//atom:entry')
|
||||||
|
entry_id = XPath('descendant::atom:id')
|
||||||
|
creator = XPath('descendant::dc:creator')
|
||||||
|
identifier = XPath('descendant::dc:identifier')
|
||||||
|
title = XPath('descendant::dc:title')
|
||||||
|
date = XPath('descendant::dc:date')
|
||||||
|
publisher = XPath('descendant::dc:publisher')
|
||||||
|
subject = XPath('descendant::dc:subject')
|
||||||
|
description = XPath('descendant::dc:description')
|
||||||
|
language = XPath('descendant::dc:language')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def to_metadata(browser, log, entry_):
|
||||||
|
|
||||||
|
def get_text(extra, x):
|
||||||
|
try:
|
||||||
|
ans = x(extra)
|
||||||
|
if ans:
|
||||||
|
ans = ans[0].text
|
||||||
|
if ans and ans.strip():
|
||||||
|
return ans.strip()
|
||||||
|
except:
|
||||||
|
log.exception('Programming error:')
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
id_url = entry_id(entry_)[0].text
|
||||||
|
title_ = ': '.join([x.text for x in title(entry_)]).strip()
|
||||||
|
authors = [x.text.strip() for x in creator(entry_) if x.text]
|
||||||
|
if not authors:
|
||||||
|
authors = [_('Unknown')]
|
||||||
|
if not id_url or not title:
|
||||||
|
# Silently discard this entry
|
||||||
|
return None
|
||||||
|
|
||||||
|
mi = Metadata(title_, authors)
|
||||||
|
try:
|
||||||
|
raw = browser.open(id_url).read()
|
||||||
|
feed = etree.fromstring(raw)
|
||||||
|
extra = entry(feed)[0]
|
||||||
|
except:
|
||||||
|
log.exception('Failed to get additional details for', mi.title)
|
||||||
|
return mi
|
||||||
|
|
||||||
|
mi.comments = get_text(extra, description)
|
||||||
|
#mi.language = get_text(extra, language)
|
||||||
|
mi.publisher = get_text(extra, publisher)
|
||||||
|
|
||||||
|
# Author sort
|
||||||
|
for x in creator(extra):
|
||||||
|
for key, val in x.attrib.items():
|
||||||
|
if key.endswith('file-as') and val and val.strip():
|
||||||
|
mi.author_sort = val
|
||||||
|
break
|
||||||
|
# ISBN
|
||||||
|
isbns = []
|
||||||
|
for x in identifier(extra):
|
||||||
|
t = str(x.text).strip()
|
||||||
|
if t[:5].upper() in ('ISBN:', 'LCCN:', 'OCLC:'):
|
||||||
|
if t[:5].upper() == 'ISBN:':
|
||||||
|
isbns.append(t[5:])
|
||||||
|
if isbns:
|
||||||
|
mi.isbn = sorted(isbns, key=len)[-1]
|
||||||
|
|
||||||
|
# Tags
|
||||||
|
try:
|
||||||
|
btags = [x.text for x in subject(extra) if x.text]
|
||||||
|
tags = []
|
||||||
|
for t in btags:
|
||||||
|
tags.extend([y.strip() for y in t.split('/')])
|
||||||
|
tags = list(sorted(list(set(tags))))
|
||||||
|
except:
|
||||||
|
log.exception('Failed to parse tags:')
|
||||||
|
tags = []
|
||||||
|
if tags:
|
||||||
|
mi.tags = [x.replace(',', ';') for x in tags]
|
||||||
|
|
||||||
|
# pubdate
|
||||||
|
pubdate = get_text(extra, date)
|
||||||
|
if pubdate:
|
||||||
|
try:
|
||||||
|
default = utcnow().replace(day=15)
|
||||||
|
mi.pubdate = parse_date(pubdate, assume_utc=True, default=default)
|
||||||
|
except:
|
||||||
|
log.exception('Failed to parse pubdate')
|
||||||
|
|
||||||
|
|
||||||
|
return mi
|
||||||
|
|
||||||
|
class Worker(Thread):
|
||||||
|
|
||||||
|
def __init__(self, log, entries, abort, result_queue):
|
||||||
|
self.browser, self.log, self.entries = browser(), log, entries
|
||||||
|
self.abort, self.result_queue = abort, result_queue
|
||||||
|
Thread.__init__(self)
|
||||||
|
self.daemon = True
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
for i in self.entries:
|
||||||
|
try:
|
||||||
|
ans = to_metadata(self.browser, self.log, i)
|
||||||
|
if ans is not None:
|
||||||
|
self.result_queue.put(ans)
|
||||||
|
except:
|
||||||
|
self.log.exception(
|
||||||
|
'Failed to get metadata for identify entry:',
|
||||||
|
etree.tostring(i))
|
||||||
|
if self.abort.is_set():
|
||||||
|
break
|
||||||
|
|
||||||
|
|
||||||
|
class GoogleBooks(Source):
|
||||||
|
|
||||||
|
name = 'Google Books'
|
||||||
|
|
||||||
|
def create_query(self, log, title=None, authors=None, identifiers={},
|
||||||
|
start_index=1):
|
||||||
|
BASE_URL = 'http://books.google.com/books/feeds/volumes?'
|
||||||
|
isbn = identifiers.get('isbn', None)
|
||||||
|
q = ''
|
||||||
|
if isbn is not None:
|
||||||
|
q += 'isbn:'+isbn
|
||||||
|
elif title or authors:
|
||||||
|
def build_term(prefix, parts):
|
||||||
|
return ' '.join('in'+prefix + ':' + x for x in parts)
|
||||||
|
if title is not None:
|
||||||
|
q += build_term('title', title.split())
|
||||||
|
if authors:
|
||||||
|
q += ('+' if q else '')+build_term('author',
|
||||||
|
self.get_author_tokens(authors))
|
||||||
|
|
||||||
|
if isinstance(q, unicode):
|
||||||
|
q = q.encode('utf-8')
|
||||||
|
if not q:
|
||||||
|
return None
|
||||||
|
return BASE_URL+urlencode({
|
||||||
|
'q':q,
|
||||||
|
'max-results':20,
|
||||||
|
'start-index':start_index,
|
||||||
|
'min-viewability':'none',
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
def identify(self, log, result_queue, abort, title=None, authors=None, identifiers={}):
|
||||||
|
query = self.create_query(log, title=title, authors=authors,
|
||||||
|
identifiers=identifiers)
|
||||||
|
try:
|
||||||
|
raw = browser().open_novisit(query).read()
|
||||||
|
except Exception, e:
|
||||||
|
log.exception('Failed to make identify query: %r'%query)
|
||||||
|
return as_unicode(e)
|
||||||
|
|
||||||
|
try:
|
||||||
|
parser = etree.XMLParser(recover=True, no_network=True)
|
||||||
|
feed = etree.fromstring(raw, parser=parser)
|
||||||
|
entries = entry(feed)
|
||||||
|
except Exception, e:
|
||||||
|
log.exception('Failed to parse identify results')
|
||||||
|
return as_unicode(e)
|
||||||
|
|
||||||
|
|
||||||
|
groups = self.split_jobs(entries, 5) # At most 5 threads
|
||||||
|
if not groups:
|
||||||
|
return
|
||||||
|
workers = [Worker(log, entries, abort, result_queue) for entries in
|
||||||
|
groups]
|
||||||
|
|
||||||
|
if abort.is_set():
|
||||||
|
return
|
||||||
|
|
||||||
|
for worker in workers: worker.start()
|
||||||
|
|
||||||
|
has_alive_worker = True
|
||||||
|
while has_alive_worker and not abort.is_set():
|
||||||
|
has_alive_worker = False
|
||||||
|
for worker in workers:
|
||||||
|
if worker.is_alive():
|
||||||
|
has_alive_worker = True
|
||||||
|
time.sleep(0.1)
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -83,6 +83,7 @@ class RTFInput(InputFormatPlugin):
|
|||||||
os.mkdir(debug_dir)
|
os.mkdir(debug_dir)
|
||||||
debug_dir = 'rtfdebug'
|
debug_dir = 'rtfdebug'
|
||||||
run_lev = 4
|
run_lev = 4
|
||||||
|
self.log('Running RTFParser in debug mode')
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
parser = ParseRtf(
|
parser = ParseRtf(
|
||||||
@ -230,22 +231,6 @@ class RTFInput(InputFormatPlugin):
|
|||||||
with open('styles.css', 'ab') as f:
|
with open('styles.css', 'ab') as f:
|
||||||
f.write(css)
|
f.write(css)
|
||||||
|
|
||||||
# def preprocess(self, fname):
|
|
||||||
# self.log('\tPreprocessing to convert unicode characters')
|
|
||||||
# try:
|
|
||||||
# data = open(fname, 'rb').read()
|
|
||||||
# from calibre.ebooks.rtf.preprocess import RtfTokenizer, RtfTokenParser
|
|
||||||
# tokenizer = RtfTokenizer(data)
|
|
||||||
# tokens = RtfTokenParser(tokenizer.tokens)
|
|
||||||
# data = tokens.toRTF()
|
|
||||||
# fname = 'preprocessed.rtf'
|
|
||||||
# with open(fname, 'wb') as f:
|
|
||||||
# f.write(data)
|
|
||||||
# except:
|
|
||||||
# self.log.exception(
|
|
||||||
# 'Failed to preprocess RTF to convert unicode sequences, ignoring...')
|
|
||||||
# return fname
|
|
||||||
|
|
||||||
def convert_borders(self, doc):
|
def convert_borders(self, doc):
|
||||||
border_styles = []
|
border_styles = []
|
||||||
style_map = {}
|
style_map = {}
|
||||||
@ -280,8 +265,6 @@ class RTFInput(InputFormatPlugin):
|
|||||||
self.opts = options
|
self.opts = options
|
||||||
self.log = log
|
self.log = log
|
||||||
self.log('Converting RTF to XML...')
|
self.log('Converting RTF to XML...')
|
||||||
#Name of the preprocesssed RTF file
|
|
||||||
# fname = self.preprocess(stream.name)
|
|
||||||
try:
|
try:
|
||||||
xml = self.generate_xml(stream.name)
|
xml = self.generate_xml(stream.name)
|
||||||
except RtfInvalidCodeException, e:
|
except RtfInvalidCodeException, e:
|
||||||
@ -335,3 +318,4 @@ class RTFInput(InputFormatPlugin):
|
|||||||
opf.render(open('metadata.opf', 'wb'))
|
opf.render(open('metadata.opf', 'wb'))
|
||||||
return os.path.abspath('metadata.opf')
|
return os.path.abspath('metadata.opf')
|
||||||
|
|
||||||
|
|
||||||
|
@ -238,6 +238,8 @@ class ParseRtf:
|
|||||||
bug_handler = RtfInvalidCodeException,
|
bug_handler = RtfInvalidCodeException,
|
||||||
)
|
)
|
||||||
enc = 'cp' + encode_obj.get_codepage()
|
enc = 'cp' + encode_obj.get_codepage()
|
||||||
|
if enc == 'cp10000':
|
||||||
|
enc = 'mac_roman'
|
||||||
msg = 'Exception in token processing'
|
msg = 'Exception in token processing'
|
||||||
if check_encoding_obj.check_encoding(self.__file, enc):
|
if check_encoding_obj.check_encoding(self.__file, enc):
|
||||||
file_name = self.__file if isinstance(self.__file, str) \
|
file_name = self.__file if isinstance(self.__file, str) \
|
||||||
|
@ -15,8 +15,10 @@
|
|||||||
# #
|
# #
|
||||||
# #
|
# #
|
||||||
#########################################################################
|
#########################################################################
|
||||||
import sys, os, tempfile, re
|
import sys, os, tempfile, re
|
||||||
|
|
||||||
from calibre.ebooks.rtf2xml import copy
|
from calibre.ebooks.rtf2xml import copy
|
||||||
|
|
||||||
class Colors:
|
class Colors:
|
||||||
"""
|
"""
|
||||||
Change lines with color info from color numbers to the actual color names.
|
Change lines with color info from color numbers to the actual color names.
|
||||||
@ -40,8 +42,10 @@ class Colors:
|
|||||||
self.__file = in_file
|
self.__file = in_file
|
||||||
self.__copy = copy
|
self.__copy = copy
|
||||||
self.__bug_handler = bug_handler
|
self.__bug_handler = bug_handler
|
||||||
|
self.__line = 0
|
||||||
self.__write_to = tempfile.mktemp()
|
self.__write_to = tempfile.mktemp()
|
||||||
self.__run_level = run_level
|
self.__run_level = run_level
|
||||||
|
|
||||||
def __initiate_values(self):
|
def __initiate_values(self):
|
||||||
"""
|
"""
|
||||||
Initiate all values.
|
Initiate all values.
|
||||||
@ -61,6 +65,7 @@ class Colors:
|
|||||||
self.__color_num = 1
|
self.__color_num = 1
|
||||||
self.__line_color_exp = re.compile(r'bdr-color_:(\d+)')
|
self.__line_color_exp = re.compile(r'bdr-color_:(\d+)')
|
||||||
# cw<bd<bor-par-to<nu<bdr-hair__|bdr-li-wid:0.50|bdr-sp-wid:1.00|bdr-color_:2
|
# cw<bd<bor-par-to<nu<bdr-hair__|bdr-li-wid:0.50|bdr-sp-wid:1.00|bdr-color_:2
|
||||||
|
|
||||||
def __before_color_func(self, line):
|
def __before_color_func(self, line):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
@ -76,6 +81,7 @@ class Colors:
|
|||||||
if self.__token_info == 'mi<mk<clrtbl-beg':
|
if self.__token_info == 'mi<mk<clrtbl-beg':
|
||||||
self.__state = 'in_color_table'
|
self.__state = 'in_color_table'
|
||||||
self.__write_obj.write(line)
|
self.__write_obj.write(line)
|
||||||
|
|
||||||
def __default_color_func(self, line):
|
def __default_color_func(self, line):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
@ -87,6 +93,7 @@ class Colors:
|
|||||||
"""
|
"""
|
||||||
hex_num = line[-3:-1]
|
hex_num = line[-3:-1]
|
||||||
self.__color_string += hex_num
|
self.__color_string += hex_num
|
||||||
|
|
||||||
def __blue_func(self, line):
|
def __blue_func(self, line):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
@ -109,6 +116,7 @@ class Colors:
|
|||||||
)
|
)
|
||||||
self.__color_num += 1
|
self.__color_num += 1
|
||||||
self.__color_string = '#'
|
self.__color_string = '#'
|
||||||
|
|
||||||
def __in_color_func(self, line):
|
def __in_color_func(self, line):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
@ -127,12 +135,13 @@ class Colors:
|
|||||||
self.__state = 'after_color_table'
|
self.__state = 'after_color_table'
|
||||||
else:
|
else:
|
||||||
action = self.__state_dict.get(self.__token_info)
|
action = self.__state_dict.get(self.__token_info)
|
||||||
if action == None:
|
if action is None:
|
||||||
sys.stderr.write('in module colors.py\n'
|
sys.stderr.write('in module colors.py\n'
|
||||||
'function is self.__in_color_func\n'
|
'function is self.__in_color_func\n'
|
||||||
'no action for %s' % self.__token_info
|
'no action for %s' % self.__token_info
|
||||||
)
|
)
|
||||||
action(line)
|
action(line)
|
||||||
|
|
||||||
def __after_color_func(self, line):
|
def __after_color_func(self, line):
|
||||||
"""
|
"""
|
||||||
Check the to see if it contains color info. If it does, extract the
|
Check the to see if it contains color info. If it does, extract the
|
||||||
@ -180,6 +189,7 @@ class Colors:
|
|||||||
else:
|
else:
|
||||||
self.__write_obj.write(line)
|
self.__write_obj.write(line)
|
||||||
# cw<bd<bor-par-to<nu<bdr-hair__|bdr-li-wid:0.50|bdr-sp-wid:1.00|bdr-color_:2
|
# cw<bd<bor-par-to<nu<bdr-hair__|bdr-li-wid:0.50|bdr-sp-wid:1.00|bdr-color_:2
|
||||||
|
|
||||||
def __sub_from_line_color(self, match_obj):
|
def __sub_from_line_color(self, match_obj):
|
||||||
num = match_obj.group(1)
|
num = match_obj.group(1)
|
||||||
try:
|
try:
|
||||||
@ -191,25 +201,27 @@ class Colors:
|
|||||||
else:
|
else:
|
||||||
return 'bdr-color_:no-value'
|
return 'bdr-color_:no-value'
|
||||||
hex_num = self.__figure_num(num)
|
hex_num = self.__figure_num(num)
|
||||||
return_value = 'bdr-color_:%s' % hex_num
|
return 'bdr-color_:%s' % hex_num
|
||||||
return return_value
|
|
||||||
def __figure_num(self, num):
|
def __figure_num(self, num):
|
||||||
if num == 0:
|
if num == 0:
|
||||||
hex_num = 'false'
|
hex_num = 'false'
|
||||||
else:
|
else:
|
||||||
hex_num = self.__color_dict.get(num)
|
hex_num = self.__color_dict.get(num)
|
||||||
if hex_num == None:
|
if hex_num is None:
|
||||||
if self.__run_level > 3:
|
|
||||||
msg = 'no value in self.__color_dict for key %s\n' % num
|
|
||||||
raise self.__bug_hanlder, msg
|
|
||||||
if hex_num == None:
|
|
||||||
hex_num = '0'
|
hex_num = '0'
|
||||||
|
if self.__run_level > 5:
|
||||||
|
msg = 'no value in self.__color_dict' \
|
||||||
|
'for key %s at line %d\n' % (num, self.__line)
|
||||||
|
raise self.__bug_handler, msg
|
||||||
return hex_num
|
return hex_num
|
||||||
|
|
||||||
def __do_nothing_func(self, line):
|
def __do_nothing_func(self, line):
|
||||||
"""
|
"""
|
||||||
Bad RTF will have text in the color table
|
Bad RTF will have text in the color table
|
||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def convert_colors(self):
|
def convert_colors(self):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
@ -226,20 +238,16 @@ class Colors:
|
|||||||
info, and substitute the number with the hex number.
|
info, and substitute the number with the hex number.
|
||||||
"""
|
"""
|
||||||
self.__initiate_values()
|
self.__initiate_values()
|
||||||
read_obj = open(self.__file, 'r')
|
with open(self.__file, 'r') as read_obj:
|
||||||
self.__write_obj = open(self.__write_to, 'w')
|
with open(self.__write_to, 'w') as self.__write_obj:
|
||||||
line_to_read = 1
|
for line in read_obj:
|
||||||
while line_to_read:
|
self.__line+=1
|
||||||
line_to_read = read_obj.readline()
|
self.__token_info = line[:16]
|
||||||
line = line_to_read
|
action = self.__state_dict.get(self.__state)
|
||||||
self.__token_info = line[:16]
|
if action is None:
|
||||||
action = self.__state_dict.get(self.__state)
|
sys.stderr.write('no matching state in module fonts.py\n')
|
||||||
if action == None:
|
sys.stderr.write(self.__state + '\n')
|
||||||
sys.stderr.write('no no matching state in module fonts.py\n')
|
action(line)
|
||||||
sys.stderr.write(self.__state + '\n')
|
|
||||||
action(line)
|
|
||||||
read_obj.close()
|
|
||||||
self.__write_obj.close()
|
|
||||||
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
||||||
if self.__copy:
|
if self.__copy:
|
||||||
copy_obj.copy_file(self.__write_to, "color.data")
|
copy_obj.copy_file(self.__write_to, "color.data")
|
||||||
|
@ -33,13 +33,13 @@ class ConvertToTags:
|
|||||||
self.__copy = copy
|
self.__copy = copy
|
||||||
self.__dtd_path = dtd_path
|
self.__dtd_path = dtd_path
|
||||||
self.__no_dtd = no_dtd
|
self.__no_dtd = no_dtd
|
||||||
if encoding != 'mac_roman':
|
self.__encoding = 'cp' + encoding
|
||||||
self.__encoding = 'cp' + encoding
|
if encoding == 'mac_roman':
|
||||||
else:
|
|
||||||
self.__encoding = 'mac_roman'
|
self.__encoding = 'mac_roman'
|
||||||
self.__indent = indent
|
self.__indent = indent
|
||||||
self.__run_level = run_level
|
self.__run_level = run_level
|
||||||
self.__write_to = tempfile.mktemp()
|
self.__write_to = tempfile.mktemp()
|
||||||
|
self.__convert_utf = False
|
||||||
|
|
||||||
def __initiate_values(self):
|
def __initiate_values(self):
|
||||||
"""
|
"""
|
||||||
@ -213,7 +213,8 @@ class ConvertToTags:
|
|||||||
if not check_encoding_obj.check_encoding(self.__file, verbose=False):
|
if not check_encoding_obj.check_encoding(self.__file, verbose=False):
|
||||||
self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
|
self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
|
||||||
elif not check_encoding_obj.check_encoding(self.__file, self.__encoding):
|
elif not check_encoding_obj.check_encoding(self.__file, self.__encoding):
|
||||||
self.__write_obj.write('<?xml version="1.0" encoding="%s" ?>' % self.__encoding)
|
self.__write_obj.write('<?xml version="1.0" encoding="UTF-8" ?>')
|
||||||
|
self.__convert_utf = True
|
||||||
else:
|
else:
|
||||||
self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
|
self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
|
||||||
sys.stderr.write('Bad RTF encoding, revert to US-ASCII chars and'
|
sys.stderr.write('Bad RTF encoding, revert to US-ASCII chars and'
|
||||||
@ -253,15 +254,28 @@ class ConvertToTags:
|
|||||||
an empty tag function.
|
an empty tag function.
|
||||||
"""
|
"""
|
||||||
self.__initiate_values()
|
self.__initiate_values()
|
||||||
self.__write_obj = open(self.__write_to, 'w')
|
with open(self.__write_to, 'w') as self.__write_obj:
|
||||||
self.__write_dec()
|
self.__write_dec()
|
||||||
with open(self.__file, 'r') as read_obj:
|
with open(self.__file, 'r') as read_obj:
|
||||||
for line in read_obj:
|
for line in read_obj:
|
||||||
self.__token_info = line[:16]
|
self.__token_info = line[:16]
|
||||||
action = self.__state_dict.get(self.__token_info)
|
action = self.__state_dict.get(self.__token_info)
|
||||||
if action is not None:
|
if action is not None:
|
||||||
action(line)
|
action(line)
|
||||||
self.__write_obj.close()
|
self.__write_obj.close()
|
||||||
|
#convert all encodings to UTF8 to avoid unsupported encodings in lxml
|
||||||
|
if self.__convert_utf:
|
||||||
|
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
||||||
|
copy_obj.rename(self.__write_to, self.__file)
|
||||||
|
with open(self.__file, 'r') as read_obj:
|
||||||
|
with open(self.__write_to, 'w') as write_obj:
|
||||||
|
file = read_obj.read()
|
||||||
|
try:
|
||||||
|
file = file.decode(self.__encoding)
|
||||||
|
write_obj.write(file.encode('utf-8'))
|
||||||
|
except:
|
||||||
|
sys.stderr.write('Conversion to UTF-8 is not possible,'
|
||||||
|
' encoding should be very carefully checked')
|
||||||
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
||||||
if self.__copy:
|
if self.__copy:
|
||||||
copy_obj.copy_file(self.__write_to, "convert_to_tags.data")
|
copy_obj.copy_file(self.__write_to, "convert_to_tags.data")
|
||||||
|
@ -75,12 +75,16 @@ class DefaultEncoding:
|
|||||||
self._encoding()
|
self._encoding()
|
||||||
self.__datafetched = True
|
self.__datafetched = True
|
||||||
code_page = 'ansicpg' + self.__code_page
|
code_page = 'ansicpg' + self.__code_page
|
||||||
|
if self.__code_page == '10000':
|
||||||
|
self.__code_page = 'mac_roman'
|
||||||
return self.__platform, code_page, self.__default_num
|
return self.__platform, code_page, self.__default_num
|
||||||
|
|
||||||
def get_codepage(self):
|
def get_codepage(self):
|
||||||
if not self.__datafetched:
|
if not self.__datafetched:
|
||||||
self._encoding()
|
self._encoding()
|
||||||
self.__datafetched = True
|
self.__datafetched = True
|
||||||
|
if self.__code_page == '10000':
|
||||||
|
self.__code_page = 'mac_roman'
|
||||||
return self.__code_page
|
return self.__code_page
|
||||||
|
|
||||||
def get_platform(self):
|
def get_platform(self):
|
||||||
|
@ -16,7 +16,9 @@
|
|||||||
# #
|
# #
|
||||||
#########################################################################
|
#########################################################################
|
||||||
import sys, os, tempfile
|
import sys, os, tempfile
|
||||||
|
|
||||||
from calibre.ebooks.rtf2xml import copy
|
from calibre.ebooks.rtf2xml import copy
|
||||||
|
|
||||||
class Fonts:
|
class Fonts:
|
||||||
"""
|
"""
|
||||||
Change lines with font info from font numbers to the actual font names.
|
Change lines with font info from font numbers to the actual font names.
|
||||||
@ -45,6 +47,7 @@ class Fonts:
|
|||||||
self.__default_font_num = default_font_num
|
self.__default_font_num = default_font_num
|
||||||
self.__write_to = tempfile.mktemp()
|
self.__write_to = tempfile.mktemp()
|
||||||
self.__run_level = run_level
|
self.__run_level = run_level
|
||||||
|
|
||||||
def __initiate_values(self):
|
def __initiate_values(self):
|
||||||
"""
|
"""
|
||||||
Initiate all values.
|
Initiate all values.
|
||||||
@ -67,6 +70,7 @@ class Fonts:
|
|||||||
self.__font_table = {}
|
self.__font_table = {}
|
||||||
# individual font written
|
# individual font written
|
||||||
self.__wrote_ind_font = 0
|
self.__wrote_ind_font = 0
|
||||||
|
|
||||||
def __default_func(self, line):
|
def __default_func(self, line):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
@ -79,6 +83,7 @@ class Fonts:
|
|||||||
if self.__token_info == 'mi<mk<fonttb-beg':
|
if self.__token_info == 'mi<mk<fonttb-beg':
|
||||||
self.__state = 'font_table'
|
self.__state = 'font_table'
|
||||||
self.__write_obj.write(line)
|
self.__write_obj.write(line)
|
||||||
|
|
||||||
def __font_table_func(self, line):
|
def __font_table_func(self, line):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
@ -101,6 +106,7 @@ class Fonts:
|
|||||||
self.__font_num = self.__default_font_num
|
self.__font_num = self.__default_font_num
|
||||||
self.__text_line = ''
|
self.__text_line = ''
|
||||||
##self.__write_obj.write(line)
|
##self.__write_obj.write(line)
|
||||||
|
|
||||||
def __font_in_table_func(self, line):
|
def __font_in_table_func(self, line):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
@ -138,6 +144,7 @@ class Fonts:
|
|||||||
elif self.__token_info == 'mi<mk<fonttb-end':
|
elif self.__token_info == 'mi<mk<fonttb-end':
|
||||||
self.__found_end_font_table_func()
|
self.__found_end_font_table_func()
|
||||||
self.__state = 'after_font_table'
|
self.__state = 'after_font_table'
|
||||||
|
|
||||||
def __found_end_font_table_func(self):
|
def __found_end_font_table_func(self):
|
||||||
"""
|
"""
|
||||||
Required:
|
Required:
|
||||||
@ -150,7 +157,8 @@ class Fonts:
|
|||||||
if not self.__wrote_ind_font:
|
if not self.__wrote_ind_font:
|
||||||
self.__write_obj.write(
|
self.__write_obj.write(
|
||||||
'mi<tg<empty-att_'
|
'mi<tg<empty-att_'
|
||||||
'<font-in-table<name>Times<num>0\n' )
|
'<font-in-table<name>Times<num>0\n')
|
||||||
|
|
||||||
def __after_font_table_func(self, line):
|
def __after_font_table_func(self, line):
|
||||||
"""
|
"""
|
||||||
Required:
|
Required:
|
||||||
@ -169,7 +177,7 @@ class Fonts:
|
|||||||
if self.__token_info == 'cw<ci<font-style':
|
if self.__token_info == 'cw<ci<font-style':
|
||||||
font_num = line[20:-1]
|
font_num = line[20:-1]
|
||||||
font_name = self.__font_table.get(font_num)
|
font_name = self.__font_table.get(font_num)
|
||||||
if font_name == None:
|
if font_name is None:
|
||||||
if self.__run_level > 3:
|
if self.__run_level > 3:
|
||||||
msg = 'no value for %s in self.__font_table\n' % font_num
|
msg = 'no value for %s in self.__font_table\n' % font_num
|
||||||
raise self.__bug_handler, msg
|
raise self.__bug_handler, msg
|
||||||
@ -182,6 +190,7 @@ class Fonts:
|
|||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
self.__write_obj.write(line)
|
self.__write_obj.write(line)
|
||||||
|
|
||||||
def convert_fonts(self):
|
def convert_fonts(self):
|
||||||
"""
|
"""
|
||||||
Required:
|
Required:
|
||||||
@ -197,20 +206,15 @@ class Fonts:
|
|||||||
info. Substitute a font name for a font number.
|
info. Substitute a font name for a font number.
|
||||||
"""
|
"""
|
||||||
self.__initiate_values()
|
self.__initiate_values()
|
||||||
read_obj = open(self.__file, 'r')
|
with open(self.__file, 'r') as read_obj:
|
||||||
self.__write_obj = open(self.__write_to, 'w')
|
with open(self.__write_to, 'w') as self.__write_obj:
|
||||||
line_to_read = 1
|
for line in read_obj:
|
||||||
while line_to_read:
|
self.__token_info = line[:16]
|
||||||
line_to_read = read_obj.readline()
|
action = self.__state_dict.get(self.__state)
|
||||||
line = line_to_read
|
if action is None:
|
||||||
self.__token_info = line[:16]
|
sys.stderr.write('no matching state in module fonts.py\n' \
|
||||||
action = self.__state_dict.get(self.__state)
|
+ self.__state + '\n')
|
||||||
if action == None:
|
action(line)
|
||||||
sys.stderr.write('no no matching state in module fonts.py\n')
|
|
||||||
sys.stderr.write(self.__state + '\n')
|
|
||||||
action(line)
|
|
||||||
read_obj.close()
|
|
||||||
self.__write_obj.close()
|
|
||||||
default_font_name = self.__font_table.get(self.__default_font_num)
|
default_font_name = self.__font_table.get(self.__default_font_num)
|
||||||
if not default_font_name:
|
if not default_font_name:
|
||||||
default_font_name = 'Not Defined'
|
default_font_name = 'Not Defined'
|
||||||
|
@ -43,7 +43,7 @@ class GetCharMap:
|
|||||||
def get_char_map(self, map):
|
def get_char_map(self, map):
|
||||||
if map == 'ansicpg0':
|
if map == 'ansicpg0':
|
||||||
map = 'ansicpg1250'
|
map = 'ansicpg1250'
|
||||||
if map in ('ansicpg10000', '10000'):
|
if map == 'ansicpg10000':
|
||||||
map = 'mac_roman'
|
map = 'mac_roman'
|
||||||
found_map = False
|
found_map = False
|
||||||
map_dict = {}
|
map_dict = {}
|
||||||
|
@ -126,12 +126,6 @@ class Tokenize:
|
|||||||
tokens = re.split(self.__splitexp, input_file)
|
tokens = re.split(self.__splitexp, input_file)
|
||||||
#remove empty tokens and \n
|
#remove empty tokens and \n
|
||||||
return filter(lambda x: len(x) > 0 and x != '\n', tokens)
|
return filter(lambda x: len(x) > 0 and x != '\n', tokens)
|
||||||
#input_file = re.sub(self.__utf_exp, self.__from_ms_to_utf8, input_file)
|
|
||||||
# line = re.sub( self.__neg_utf_exp, self.__neg_unicode_func, line)
|
|
||||||
# this is for older RTF
|
|
||||||
#line = re.sub(self.__par_exp, '\\par ', line)
|
|
||||||
#return filter(lambda x: len(x) > 0, \
|
|
||||||
#(self.__remove_line.sub('', x) for x in tokens))
|
|
||||||
|
|
||||||
def __compile_expressions(self):
|
def __compile_expressions(self):
|
||||||
SIMPLE_RPL = {
|
SIMPLE_RPL = {
|
||||||
@ -160,7 +154,7 @@ class Tokenize:
|
|||||||
}
|
}
|
||||||
self.__replace_spchar = MReplace(SIMPLE_RPL)
|
self.__replace_spchar = MReplace(SIMPLE_RPL)
|
||||||
#add ;? in case of char following \u
|
#add ;? in case of char following \u
|
||||||
self.__ms_hex_exp = re.compile(r"\\\'([0-9a-fA-F]{2})") #r"\\\'(..)"
|
self.__ms_hex_exp = re.compile(r"\\\'([0-9a-fA-F]{2})")
|
||||||
self.__utf_exp = re.compile(r"\\u(-?\d{3,6}) ?")
|
self.__utf_exp = re.compile(r"\\u(-?\d{3,6}) ?")
|
||||||
self.__bin_exp = re.compile(r"(?:\\bin(-?\d{0,10})[\n ]+)[01\n]+")
|
self.__bin_exp = re.compile(r"(?:\\bin(-?\d{0,10})[\n ]+)[01\n]+")
|
||||||
#manage upr/ud situations
|
#manage upr/ud situations
|
||||||
@ -172,14 +166,21 @@ class Tokenize:
|
|||||||
self.__splitexp = re.compile(r"(\\[{}]|\n|\\[^\s\\{}&]+(?:[ \t\r\f\v])?)")
|
self.__splitexp = re.compile(r"(\\[{}]|\n|\\[^\s\\{}&]+(?:[ \t\r\f\v])?)")
|
||||||
#this is for old RTF
|
#this is for old RTF
|
||||||
self.__par_exp = re.compile(r'\\\n+')
|
self.__par_exp = re.compile(r'\\\n+')
|
||||||
# self.__par_exp = re.compile(r'\\$')
|
#handle cw using a digit as argument and without space as delimiter
|
||||||
|
self.__cwdigit_exp = re.compile(r"(\\[a-zA-Z]+[\-0-9]+)([^0-9 \\]+)")
|
||||||
#self.__bin_exp = re.compile(r"\\bin(-?\d{1,8}) {0,1}")
|
#self.__bin_exp = re.compile(r"\\bin(-?\d{1,8}) {0,1}")
|
||||||
#self.__utf_exp = re.compile(r"^\\u(-?\d{3,6})")
|
#self.__utf_exp = re.compile(r"^\\u(-?\d{3,6})")
|
||||||
#self.__splitexp = re.compile(r"(\\[\\{}]|{|}|\n|\\[^\s\\{}&]+(?:\s)?)")
|
#self.__splitexp = re.compile(r"(\\[\\{}]|{|}|\n|\\[^\s\\{}&]+(?:\s)?)")
|
||||||
#self.__remove_line = re.compile(r'\n+')
|
#self.__remove_line = re.compile(r'\n+')
|
||||||
#self.__mixed_exp = re.compile(r"(\\[a-zA-Z]+\d+)(\D+)")
|
|
||||||
##self.num_exp = re.compile(r"(\*|:|[a-zA-Z]+)(.*)")
|
##self.num_exp = re.compile(r"(\*|:|[a-zA-Z]+)(.*)")
|
||||||
|
|
||||||
|
def __correct_spliting(self, token):
|
||||||
|
match_obj = re.search(self.__cwdigit_exp, token)
|
||||||
|
if match_obj is None:
|
||||||
|
return token
|
||||||
|
else:
|
||||||
|
return '%s\n%s' % (match_obj.group(1), match_obj.group(2))
|
||||||
|
|
||||||
def tokenize(self):
|
def tokenize(self):
|
||||||
"""Main class for handling other methods. Reads the file \
|
"""Main class for handling other methods. Reads the file \
|
||||||
, uses method self.sub_reg to make basic substitutions,\
|
, uses method self.sub_reg to make basic substitutions,\
|
||||||
@ -195,6 +196,8 @@ class Tokenize:
|
|||||||
tokens = map(self.__unicode_process, tokens)
|
tokens = map(self.__unicode_process, tokens)
|
||||||
#remove empty items created by removing \uc
|
#remove empty items created by removing \uc
|
||||||
tokens = filter(lambda x: len(x) > 0, tokens)
|
tokens = filter(lambda x: len(x) > 0, tokens)
|
||||||
|
#handles bothersome cases
|
||||||
|
tokens = map(self.__correct_spliting, tokens)
|
||||||
|
|
||||||
#write
|
#write
|
||||||
with open(self.__write_to, 'wb') as write_obj:
|
with open(self.__write_to, 'wb') as write_obj:
|
||||||
@ -203,8 +206,6 @@ class Tokenize:
|
|||||||
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
||||||
if self.__copy:
|
if self.__copy:
|
||||||
copy_obj.copy_file(self.__write_to, "tokenize.data")
|
copy_obj.copy_file(self.__write_to, "tokenize.data")
|
||||||
# if self.__out_file:
|
|
||||||
# self.__file = self.__out_file
|
|
||||||
copy_obj.rename(self.__write_to, self.__file)
|
copy_obj.rename(self.__write_to, self.__file)
|
||||||
os.remove(self.__write_to)
|
os.remove(self.__write_to)
|
||||||
|
|
||||||
|
@ -429,10 +429,12 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
|
|||||||
old_extensions.add(ext)
|
old_extensions.add(ext)
|
||||||
for ext in new_extensions:
|
for ext in new_extensions:
|
||||||
self.db.add_format(self.row, ext, open(paths[ext], 'rb'), notify=False)
|
self.db.add_format(self.row, ext, open(paths[ext], 'rb'), notify=False)
|
||||||
db_extensions = set([f.lower() for f in self.db.formats(self.row).split(',')])
|
dbfmts = self.db.formats(self.row)
|
||||||
|
db_extensions = set([f.lower() for f in (dbfmts.split(',') if dbfmts
|
||||||
|
else [])])
|
||||||
extensions = new_extensions.union(old_extensions)
|
extensions = new_extensions.union(old_extensions)
|
||||||
for ext in db_extensions:
|
for ext in db_extensions:
|
||||||
if ext not in extensions:
|
if ext not in extensions and ext in self.original_formats:
|
||||||
self.db.remove_format(self.row, ext, notify=False)
|
self.db.remove_format(self.row, ext, notify=False)
|
||||||
|
|
||||||
def show_format(self, item, *args):
|
def show_format(self, item, *args):
|
||||||
@ -576,6 +578,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
|
|||||||
self.orig_date = qt_to_dt(self.date.date())
|
self.orig_date = qt_to_dt(self.date.date())
|
||||||
|
|
||||||
exts = self.db.formats(row)
|
exts = self.db.formats(row)
|
||||||
|
self.original_formats = []
|
||||||
if exts:
|
if exts:
|
||||||
exts = exts.split(',')
|
exts = exts.split(',')
|
||||||
for ext in exts:
|
for ext in exts:
|
||||||
@ -586,6 +589,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
|
|||||||
if size is None:
|
if size is None:
|
||||||
continue
|
continue
|
||||||
Format(self.formats, ext, size, timestamp=timestamp)
|
Format(self.formats, ext, size, timestamp=timestamp)
|
||||||
|
self.original_formats.append(ext.lower())
|
||||||
|
|
||||||
|
|
||||||
self.initialize_combos()
|
self.initialize_combos()
|
||||||
|
@ -472,6 +472,7 @@ class FormatsManager(QWidget): # {{{
|
|||||||
def initialize(self, db, id_):
|
def initialize(self, db, id_):
|
||||||
self.changed = False
|
self.changed = False
|
||||||
exts = db.formats(id_, index_is_id=True)
|
exts = db.formats(id_, index_is_id=True)
|
||||||
|
self.original_val = set([])
|
||||||
if exts:
|
if exts:
|
||||||
exts = exts.split(',')
|
exts = exts.split(',')
|
||||||
for ext in exts:
|
for ext in exts:
|
||||||
@ -482,6 +483,7 @@ class FormatsManager(QWidget): # {{{
|
|||||||
if size is None:
|
if size is None:
|
||||||
continue
|
continue
|
||||||
Format(self.formats, ext, size, timestamp=timestamp)
|
Format(self.formats, ext, size, timestamp=timestamp)
|
||||||
|
self.original_val.add(ext.lower())
|
||||||
|
|
||||||
def commit(self, db, id_):
|
def commit(self, db, id_):
|
||||||
if not self.changed:
|
if not self.changed:
|
||||||
@ -500,11 +502,12 @@ class FormatsManager(QWidget): # {{{
|
|||||||
for ext in new_extensions:
|
for ext in new_extensions:
|
||||||
db.add_format(id_, ext, open(paths[ext], 'rb'), notify=False,
|
db.add_format(id_, ext, open(paths[ext], 'rb'), notify=False,
|
||||||
index_is_id=True)
|
index_is_id=True)
|
||||||
db_extensions = set([f.lower() for f in db.formats(id_,
|
dbfmts = db.formats(id_, index_is_id=True)
|
||||||
index_is_id=True).split(',')])
|
db_extensions = set([f.lower() for f in (dbfmts.split(',') if dbfmts
|
||||||
|
else [])])
|
||||||
extensions = new_extensions.union(old_extensions)
|
extensions = new_extensions.union(old_extensions)
|
||||||
for ext in db_extensions:
|
for ext in db_extensions:
|
||||||
if ext not in extensions:
|
if ext not in extensions and ext in self.original_val:
|
||||||
db.remove_format(id_, ext, notify=False, index_is_id=True)
|
db.remove_format(id_, ext, notify=False, index_is_id=True)
|
||||||
|
|
||||||
self.changed = False
|
self.changed = False
|
||||||
|
@ -232,6 +232,7 @@ class BIBTEX(CatalogPlugin): # {{{
|
|||||||
help = _('The fields to output when cataloging books in the '
|
help = _('The fields to output when cataloging books in the '
|
||||||
'database. Should be a comma-separated list of fields.\n'
|
'database. Should be a comma-separated list of fields.\n'
|
||||||
'Available fields: %s.\n'
|
'Available fields: %s.\n'
|
||||||
|
'plus user-created custom fields.\n'
|
||||||
'Example: %s=title,authors,tags\n'
|
'Example: %s=title,authors,tags\n'
|
||||||
"Default: '%%default'\n"
|
"Default: '%%default'\n"
|
||||||
"Applies to: BIBTEX output format")%(', '.join(FIELDS),
|
"Applies to: BIBTEX output format")%(', '.join(FIELDS),
|
||||||
@ -269,7 +270,7 @@ class BIBTEX(CatalogPlugin): # {{{
|
|||||||
dest = 'bib_cit',
|
dest = 'bib_cit',
|
||||||
action = None,
|
action = None,
|
||||||
help = _('The template for citation creation from database fields.\n'
|
help = _('The template for citation creation from database fields.\n'
|
||||||
' Should be a template with {} enclosed fields.\n'
|
'Should be a template with {} enclosed fields.\n'
|
||||||
'Available fields: %s.\n'
|
'Available fields: %s.\n'
|
||||||
"Default: '%%default'\n"
|
"Default: '%%default'\n"
|
||||||
"Applies to: BIBTEX output format")%', '.join(TEMPLATE_ALLOWED_FIELDS)),
|
"Applies to: BIBTEX output format")%', '.join(TEMPLATE_ALLOWED_FIELDS)),
|
||||||
@ -344,7 +345,7 @@ class BIBTEX(CatalogPlugin): # {{{
|
|||||||
if field == 'authors' :
|
if field == 'authors' :
|
||||||
bibtex_entry.append(u'author = "%s"' % bibtexdict.bibtex_author_format(item))
|
bibtex_entry.append(u'author = "%s"' % bibtexdict.bibtex_author_format(item))
|
||||||
|
|
||||||
elif field in ['title', 'publisher', 'cover', 'uuid',
|
elif field in ['title', 'publisher', 'cover', 'uuid', 'ondevice',
|
||||||
'author_sort', 'series'] :
|
'author_sort', 'series'] :
|
||||||
bibtex_entry.append(u'%s = "%s"' % (field, bibtexdict.utf8ToBibtex(item)))
|
bibtex_entry.append(u'%s = "%s"' % (field, bibtexdict.utf8ToBibtex(item)))
|
||||||
|
|
||||||
@ -378,7 +379,7 @@ class BIBTEX(CatalogPlugin): # {{{
|
|||||||
if calibre_files:
|
if calibre_files:
|
||||||
files = [u':%s:%s' % (format, format.rpartition('.')[2].upper())\
|
files = [u':%s:%s' % (format, format.rpartition('.')[2].upper())\
|
||||||
for format in item]
|
for format in item]
|
||||||
bibtex_entry.append(u'files = "%s"' % u', '.join(files))
|
bibtex_entry.append(u'file = "%s"' % u', '.join(files))
|
||||||
|
|
||||||
elif field == 'series_index' :
|
elif field == 'series_index' :
|
||||||
bibtex_entry.append(u'volume = "%s"' % int(item))
|
bibtex_entry.append(u'volume = "%s"' % int(item))
|
||||||
@ -474,6 +475,8 @@ class BIBTEX(CatalogPlugin): # {{{
|
|||||||
if opts.verbose:
|
if opts.verbose:
|
||||||
opts_dict = vars(opts)
|
opts_dict = vars(opts)
|
||||||
log("%s(): Generating %s" % (self.name,self.fmt))
|
log("%s(): Generating %s" % (self.name,self.fmt))
|
||||||
|
if opts.connected_device['is_device_connected']:
|
||||||
|
log(" connected_device: %s" % opts.connected_device['name'])
|
||||||
if opts_dict['search_text']:
|
if opts_dict['search_text']:
|
||||||
log(" --search='%s'" % opts_dict['search_text'])
|
log(" --search='%s'" % opts_dict['search_text'])
|
||||||
|
|
||||||
@ -548,6 +551,7 @@ class BIBTEX(CatalogPlugin): # {{{
|
|||||||
as outfile:
|
as outfile:
|
||||||
#File header
|
#File header
|
||||||
nb_entries = len(data)
|
nb_entries = len(data)
|
||||||
|
|
||||||
#check in book strict if all is ok else throw a warning into log
|
#check in book strict if all is ok else throw a warning into log
|
||||||
if bib_entry == 'book' :
|
if bib_entry == 'book' :
|
||||||
nb_books = len(filter(check_entry_book_valid, data))
|
nb_books = len(filter(check_entry_book_valid, data))
|
||||||
@ -555,6 +559,11 @@ class BIBTEX(CatalogPlugin): # {{{
|
|||||||
log(" WARNING: only %d entries in %d are book compatible" % (nb_books, nb_entries))
|
log(" WARNING: only %d entries in %d are book compatible" % (nb_books, nb_entries))
|
||||||
nb_entries = nb_books
|
nb_entries = nb_books
|
||||||
|
|
||||||
|
# If connected device, add 'On Device' values to data
|
||||||
|
if opts.connected_device['is_device_connected'] and 'ondevice' in fields:
|
||||||
|
for entry in data:
|
||||||
|
entry['ondevice'] = db.catalog_plugin_on_device_temp_mapping[entry['id']]['ondevice']
|
||||||
|
|
||||||
outfile.write(u'%%%Calibre catalog\n%%%{0} entries in catalog\n\n'.format(nb_entries))
|
outfile.write(u'%%%Calibre catalog\n%%%{0} entries in catalog\n\n'.format(nb_entries))
|
||||||
outfile.write(u'@preamble{"This catalog of %d entries was generated by calibre on %s"}\n\n'
|
outfile.write(u'@preamble{"This catalog of %d entries was generated by calibre on %s"}\n\n'
|
||||||
% (nb_entries, nowf().strftime("%A, %d. %B %Y %H:%M").decode(preferred_encoding)))
|
% (nb_entries, nowf().strftime("%A, %d. %B %Y %H:%M").decode(preferred_encoding)))
|
||||||
|
@ -391,6 +391,8 @@ Take your pick:
|
|||||||
* A tribute to the SONY Librie which was the first e-ink based e-book reader
|
* A tribute to the SONY Librie which was the first e-ink based e-book reader
|
||||||
* My wife chose it ;-)
|
* My wife chose it ;-)
|
||||||
|
|
||||||
|
|app| is pronounced as cal-i-ber *not* ca-libre. If you're wondering, |app| is the British/commonwealth spelling for caliber. Being Indian, that's the natural spelling for me.
|
||||||
|
|
||||||
Why does |app| show only some of my fonts on OS X?
|
Why does |app| show only some of my fonts on OS X?
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|app| embeds fonts in ebook files it creates. E-book files support embedding only TrueType (.ttf) fonts. Most fonts on OS X systems are in .dfont format, thus they cannot be embedded. |app| shows only TrueType fonts found on your system. You can obtain many TrueType fonts on the web. Simply download the .ttf files and add them to the Library/Fonts directory in your home directory.
|
|app| embeds fonts in ebook files it creates. E-book files support embedding only TrueType (.ttf) fonts. Most fonts on OS X systems are in .dfont format, thus they cannot be embedded. |app| shows only TrueType fonts found on your system. You can obtain many TrueType fonts on the web. Simply download the .ttf files and add them to the Library/Fonts directory in your home directory.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user