merge from trunk

This commit is contained in:
ldolse 2011-02-01 02:03:13 +08:00
commit 483075e784
86 changed files with 988 additions and 554 deletions

View File

@ -12,7 +12,7 @@ class Noticias(BasicNewsRecipe):
title = '180.com.uy' title = '180.com.uy'
__author__ = 'Gustavo Azambuja' __author__ = 'Gustavo Azambuja'
description = 'Noticias de Uruguay' description = 'Noticias de Uruguay'
language = 'es' language = 'es_UY'
timefmt = '[%a, %d %b, %Y]' timefmt = '[%a, %d %b, %Y]'
use_embedded_content = False use_embedded_content = False
recursion = 5 recursion = 5

View File

@ -20,7 +20,7 @@ class SieteDias(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
encoding = 'utf-8' encoding = 'utf-8'
language = 'es' language = 'es_AR'
lang = 'es-AR' lang = 'es-AR'
direction = 'ltr' direction = 'ltr'

View File

@ -58,4 +58,4 @@ class Ambito(BasicNewsRecipe):
del item['style'] del item['style']
return soup return soup
language = 'es' language = 'es_AR'

View File

@ -12,7 +12,7 @@ class AdvancedUserRecipe1290663986(BasicNewsRecipe):
masthead_url = 'http://www.animalpolitico.com/wp-content/themes/animal_mu/images/logo.png' masthead_url = 'http://www.animalpolitico.com/wp-content/themes/animal_mu/images/logo.png'
oldest_article = 1 oldest_article = 1
max_articles_per_feed = 100 max_articles_per_feed = 100
language = 'es' language = 'es_MX'
#feeds = [(u'Animal Politico', u'http://www.animalpolitico.com/feed/')] #feeds = [(u'Animal Politico', u'http://www.animalpolitico.com/feed/')]

View File

@ -17,7 +17,7 @@ class Axxon_news(BasicNewsRecipe):
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = False no_stylesheets = False
use_embedded_content = False use_embedded_content = False
language = 'es' language = 'es_AR'
encoding = 'utf-8' encoding = 'utf-8'
publication_type = 'magazine' publication_type = 'magazine'
INDEX = 'http://axxon.com.ar/rev/' INDEX = 'http://axxon.com.ar/rev/'

View File

@ -18,7 +18,7 @@ class Axxon_news(BasicNewsRecipe):
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = False no_stylesheets = False
use_embedded_content = False use_embedded_content = False
language = 'es' language = 'es_AR'
lang = 'es-AR' lang = 'es-AR'

View File

@ -0,0 +1,53 @@
__license__ = 'GPL v3'
__author__ = 'Luis Hernandez'
__copyright__ = 'Luis Hernandez<tolyluis@gmail.com>'
__version__ = 'v1.0'
__date__ = '29 January 2011'
'''
http://www.bbc.co.uk/mundo/
'''
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1294946868(BasicNewsRecipe):
title = u'BBC Mundo'
publisher = u'BBC'
__author__ = 'Luis Hernandez'
description = 'BBC World for spanish readers'
cover_url = 'http://1.bp.blogspot.com/_NHiOjk_uZwU/TEYy7IJAdAI/AAAAAAAABP8/coAE-pJ7_5E/s1600/bbcmundo_h.png'
oldest_article = 2
max_articles_per_feed = 100
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
language = 'es'
remove_empty_feeds = True
encoding = 'UTF-8'
timefmt = '[%a, %d %b, %Y]'
remove_tags_before = dict(name='div' , attrs={'class':['g-group']})
remove_tags_after = dict(name='div' , attrs={'class':[' g-w8']})
remove_tags = [
dict(name='ul', attrs={'class':['document-tools blq-clearfix','blq-clearfix']})
,dict(name='div', attrs={'class':['box bx-quote-bubble','socialmedia-links','list li-carousel','list li-plain rolling-news','list li-plain','box bx-livestats','li-tab content','list li-relatedlinks','list li-relatedinternetlinks']})
]
feeds = [
(u'Portada' , u'http://www.bbc.co.uk/mundo/index.xml')
,(u'Ultimas Noticias' , u'http://www.bbc.co.uk/mundo/ultimas_noticias/index.xml')
,(u'Internacional' , u'http://www.bbc.co.uk/mundo/temas/internacional/index.xml')
,(u'Economia' , u'http://www.bbc.co.uk/mundo/temas/economia/index.xml')
,(u'America Latina' , u'http://www.bbc.co.uk/mundo/temas/america_latina/index.xml')
,(u'Ciencia' , u'http://www.bbc.co.uk/mundo/temas/ciencia/index.xml')
,(u'Salud' , u'http://www.bbc.co.uk/mundo/temas/salud/index.xml')
,(u'Tecnologia' , u'http://www.bbc.co.uk/mundo/temas/tecnologia/index.xml')
,(u'Cultura' , u'http://www.bbc.co.uk/mundo/temas/cultura/index.xml')
]

View File

@ -12,7 +12,7 @@ class General(BasicNewsRecipe):
title = 'bitacora.com.uy' title = 'bitacora.com.uy'
__author__ = 'Gustavo Azambuja' __author__ = 'Gustavo Azambuja'
description = 'Noticias de Uruguay' description = 'Noticias de Uruguay'
language = 'es' language = 'es_UY'
timefmt = '[%a, %d %b, %Y]' timefmt = '[%a, %d %b, %Y]'
use_embedded_content = False use_embedded_content = False
recursion = 5 recursion = 5

View File

@ -20,7 +20,7 @@ class BsAsEconomico(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
encoding = 'utf-8' encoding = 'utf-8'
language = 'es' language = 'es_AR'
lang = 'es-AR' lang = 'es-AR'
direction = 'ltr' direction = 'ltr'

View File

@ -18,7 +18,7 @@ class Clarin(BasicNewsRecipe):
use_embedded_content = False use_embedded_content = False
no_stylesheets = True no_stylesheets = True
encoding = 'utf8' encoding = 'utf8'
language = 'es' language = 'es_AR'
publication_type = 'newspaper' publication_type = 'newspaper'
INDEX = 'http://www.clarin.com' INDEX = 'http://www.clarin.com'
masthead_url = 'http://www.clarin.com/static/CLAClarin/images/logo-clarin-print.jpg' masthead_url = 'http://www.clarin.com/static/CLAClarin/images/logo-clarin-print.jpg'

View File

@ -14,7 +14,7 @@ class CriticaDigital(BasicNewsRecipe):
description = 'Noticias de Argentina' description = 'Noticias de Argentina'
oldest_article = 2 oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 100
language = 'es' language = 'es_AR'
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False

View File

@ -11,7 +11,7 @@ class CubaDebate(BasicNewsRecipe):
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'Contra el Terorismo Mediatico' description = 'Contra el Terorismo Mediatico'
oldest_article = 15 oldest_article = 15
language = 'es' language = 'es_CU'
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False

View File

@ -16,7 +16,7 @@ class DeutscheWelle_es(BasicNewsRecipe):
max_articles_per_feed = 100 max_articles_per_feed = 100
use_embedded_content = False use_embedded_content = False
no_stylesheets = True no_stylesheets = True
language = 'es' language = 'de_ES'
publication_type = 'newsportal' publication_type = 'newsportal'
remove_empty_feeds = True remove_empty_feeds = True
masthead_url = 'http://www.dw-world.de/skins/std/channel1/pics/dw_logo1024.gif' masthead_url = 'http://www.dw-world.de/skins/std/channel1/pics/dw_logo1024.gif'

View File

@ -20,7 +20,7 @@ class Diagonales(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
encoding = 'utf-8' encoding = 'utf-8'
language = 'es' language = 'es_AR'
lang = 'es-AR' lang = 'es-AR'
direction = 'ltr' direction = 'ltr'

View File

@ -20,7 +20,7 @@ class ElMercurio(BasicNewsRecipe):
masthead_url = 'http://www.emol.com/especiales/logo_emol/logo_emol.gif' masthead_url = 'http://www.emol.com/especiales/logo_emol/logo_emol.gif'
remove_javascript = True remove_javascript = True
use_embedded_content = False use_embedded_content = False
language = 'es' language = 'es_CL'
conversion_options = { conversion_options = {

View File

@ -13,7 +13,7 @@ class ObservaDigital(BasicNewsRecipe):
title = 'Observa Digital' title = 'Observa Digital'
__author__ = 'yrvn' __author__ = 'yrvn'
description = 'Noticias de Uruguay' description = 'Noticias de Uruguay'
language = 'es' language = 'es_UY'
timefmt = '[%a, %d %b, %Y]' timefmt = '[%a, %d %b, %Y]'
use_embedded_content = False use_embedded_content = False
recursion = 5 recursion = 5

View File

@ -14,7 +14,7 @@ class General(BasicNewsRecipe):
description = 'Noticias de Uruguay y el resto del mundo' description = 'Noticias de Uruguay y el resto del mundo'
publisher = 'EL PAIS S.A.' publisher = 'EL PAIS S.A.'
category = 'news, politics, Uruguay' category = 'news, politics, Uruguay'
language = 'es' language = 'es_UY'
timefmt = '[%a, %d %b, %Y]' timefmt = '[%a, %d %b, %Y]'
use_embedded_content = False use_embedded_content = False
recursion = 2 recursion = 2

View File

@ -20,7 +20,7 @@ class ElUniversal(BasicNewsRecipe):
remove_javascript = True remove_javascript = True
remove_empty_feeds = True remove_empty_feeds = True
publication_type = 'newspaper' publication_type = 'newspaper'
language = 'es' language = 'es_MX'
extra_css = ''' extra_css = '''
body{font-family:Arial,Helvetica,sans-serif} body{font-family:Arial,Helvetica,sans-serif}

View File

@ -20,7 +20,7 @@ class ElArgentino(BasicNewsRecipe):
use_embedded_content = False use_embedded_content = False
encoding = 'utf8' encoding = 'utf8'
cover_url = 'http://www.elargentino.com/TemplateWeb/MediosFooter/tapa_elargentino.png' cover_url = 'http://www.elargentino.com/TemplateWeb/MediosFooter/tapa_elargentino.png'
language = 'es' language = 'es_AR'
html2lrf_options = [ html2lrf_options = [

View File

@ -18,7 +18,7 @@ class ElComercio(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
encoding = 'utf-8' encoding = 'utf-8'
use_embedded_content = True use_embedded_content = True
language = 'es' language = 'es_EC'
masthead_url = 'http://ww1.elcomercio.com/nv_images/headers/EC/logo_new_08.gif' masthead_url = 'http://ww1.elcomercio.com/nv_images/headers/EC/logo_new_08.gif'
extra_css = ' body{font-family: Arial,Verdana,sans-serif} img{margin-bottom: 1em} ' extra_css = ' body{font-family: Arial,Verdana,sans-serif} img{margin-bottom: 1em} '

View File

@ -13,7 +13,7 @@ class ElCronista(BasicNewsRecipe):
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'Noticias de Argentina' description = 'Noticias de Argentina'
oldest_article = 2 oldest_article = 2
language = 'es' language = 'es_AR'
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True

View File

@ -21,7 +21,7 @@ class ElTiempoHn(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
remove_javascript = True remove_javascript = True
encoding = 'utf-8' encoding = 'utf-8'
language = 'es' language = 'es_HN'
lang = 'es-HN' lang = 'es-HN'
direction = 'ltr' direction = 'ltr'

View File

@ -18,7 +18,7 @@ class ElUniversal(BasicNewsRecipe):
encoding = 'cp1252' encoding = 'cp1252'
publisher = 'El Universal' publisher = 'El Universal'
category = 'news, Caracas, Venezuela, world' category = 'news, Caracas, Venezuela, world'
language = 'es' language = 'es_VE'
cover_url = strftime('http://static.eluniversal.com/%Y/%m/%d/portada.jpg') cover_url = strftime('http://static.eluniversal.com/%Y/%m/%d/portada.jpg')
conversion_options = { conversion_options = {

View File

@ -3,7 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class ElUniversalImpresaRecipe(BasicNewsRecipe): class ElUniversalImpresaRecipe(BasicNewsRecipe):
__license__ = 'GPL v3' __license__ = 'GPL v3'
__author__ = 'kwetal' __author__ = 'kwetal'
language = 'es' language = 'es_MX'
version = 1 version = 1
title = u'El Universal (Edici\u00F3n Impresa)' title = u'El Universal (Edici\u00F3n Impresa)'

View File

@ -17,7 +17,7 @@ class ElUniverso_Ecuador(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
encoding = 'utf8' encoding = 'utf8'
use_embedded_content = False use_embedded_content = False
language = 'es' language = 'es_EC'
remove_empty_feeds = True remove_empty_feeds = True
publication_type = 'newspaper' publication_type = 'newspaper'
masthead_url = 'http://servicios2.eluniverso.com/versiones/v1/img/Hd/lg_ElUniverso.gif' masthead_url = 'http://servicios2.eluniverso.com/versiones/v1/img/Hd/lg_ElUniverso.gif'

View File

@ -18,3 +18,6 @@ class EndgadgetJapan(BasicNewsRecipe):
language = 'ja' language = 'ja'
encoding = 'utf-8' encoding = 'utf-8'
feeds = [(u'engadget', u'http://japanese.engadget.com/rss.xml')] feeds = [(u'engadget', u'http://japanese.engadget.com/rss.xml')]
remove_tags_before = dict(name="div", attrs={'id':"content_wrap"})
remove_tags_after = dict(name='h3', attrs={'id':'addcomments'})

View File

@ -0,0 +1,54 @@
from calibre.web.feeds.news import BasicNewsRecipe
import re
class Explosm(BasicNewsRecipe):
title = u'Explosm Rotated'
__author__ = 'Andromeda Rabbit'
description = 'Explosm'
language = 'en'
use_embedded_content = False
no_stylesheets = True
oldest_article = 24
remove_javascript = True
remove_empty_feeds = True
max_articles_per_feed = 10
feeds = [
(u'Explosm Feed', u'http://feeds.feedburner.com/Explosm')
]
#match_regexps = [r'http://www.explosm.net/comics/.*']
keep_only_tags = [dict(name='img', attrs={'alt':'Cyanide and Happiness, a daily webcomic'})]
remove_tags = [dict(name='div'), dict(name='span'), dict(name='table'), dict(name='br'), dict(name='nobr'), dict(name='a'), dict(name='b')]
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}'''
def get_cover_url(self):
return 'http://cdn.shopify.com/s/files/1/0059/1872/products/cyanidetitle_large.jpg?1295846286'
def parse_feeds(self):
feeds = BasicNewsRecipe.parse_feeds(self)
for curfeed in feeds:
delList = []
for a,curarticle in enumerate(curfeed.articles):
if re.search(r'http://www.explosm.net/comics', curarticle.url) == None:
delList.append(curarticle)
if len(delList)>0:
for d in delList:
index = curfeed.articles.index(d)
curfeed.articles[index:index+1] = []
return feeds
def skip_ad_pages(self, soup):
# Skip ad pages served before actual article
skip_tag = soup.find(name='img', attrs={'alt':'Cyanide and Happiness, a daily webcomic'})
if skip_tag is None:
return soup
return None

View File

@ -12,7 +12,7 @@ class General(BasicNewsRecipe):
title = 'freeway.com.uy' title = 'freeway.com.uy'
__author__ = 'Gustavo Azambuja' __author__ = 'Gustavo Azambuja'
description = 'Revista Freeway, Montevideo, Uruguay' description = 'Revista Freeway, Montevideo, Uruguay'
language = 'es' language = 'es_UY'
timefmt = '[%a, %d %b, %Y]' timefmt = '[%a, %d %b, %Y]'
use_embedded_content = False use_embedded_content = False
recursion = 1 recursion = 1

View File

@ -20,7 +20,7 @@ class Granma(BasicNewsRecipe):
use_embedded_content = False use_embedded_content = False
encoding = 'cp1252' encoding = 'cp1252'
cover_url = 'http://www.granma.cubaweb.cu/imagenes/granweb229d.jpg' cover_url = 'http://www.granma.cubaweb.cu/imagenes/granweb229d.jpg'
language = 'es' language = 'es_CU'
remove_javascript = True remove_javascript = True

View File

@ -18,7 +18,7 @@ class iEco(BasicNewsRecipe):
encoding = 'utf-8' encoding = 'utf-8'
publisher = 'Grupo Clarin' publisher = 'Grupo Clarin'
category = 'news, economia, mercados, bolsa de valores, finanzas, empresas, negocios, empleos, emprendedores, marketinguniversidades, tecnologia, agronegocios, noticias, informacion' category = 'news, economia, mercados, bolsa de valores, finanzas, empresas, negocios, empleos, emprendedores, marketinguniversidades, tecnologia, agronegocios, noticias, informacion'
language = 'es' language = 'es_AR'
cover_url = 'http://www.ieco.clarin.com/static2/images/Tapa-PDF.gif' cover_url = 'http://www.ieco.clarin.com/static2/images/Tapa-PDF.gif'
extra_css = ' #bd{font-family: sans-serif} ' extra_css = ' #bd{font-family: sans-serif} '

View File

@ -16,7 +16,7 @@ class Infobae(BasicNewsRecipe):
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
language = 'es' language = 'es_AR'
encoding = 'cp1252' encoding = 'cp1252'
masthead_url = 'http://www.infobae.com/imgs/header/header.gif' masthead_url = 'http://www.infobae.com/imgs/header/header.gif'
remove_javascript = True remove_javascript = True

View File

@ -20,7 +20,7 @@ class Juventudrebelde(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
encoding = 'cp1252' encoding = 'cp1252'
language = 'es' language = 'es_CU'
cover_url = strftime('http://www.juventudrebelde.cu/UserFiles/File/impreso/iportada-%Y-%m-%d.jpg') cover_url = strftime('http://www.juventudrebelde.cu/UserFiles/File/impreso/iportada-%Y-%m-%d.jpg')
remove_javascript = True remove_javascript = True

View File

@ -50,4 +50,4 @@ class LaCuarta(BasicNewsRecipe):
feeds = [(u'Noticias', u'http://lacuarta.cl/app/rss?sc=TEFDVUFSVEE=')] feeds = [(u'Noticias', u'http://lacuarta.cl/app/rss?sc=TEFDVUFSVEE=')]
language = 'es' language = 'es_CL'

View File

@ -12,7 +12,7 @@ class General(BasicNewsRecipe):
title = 'La Diaria' title = 'La Diaria'
__author__ = 'Gustavo Azambuja' __author__ = 'Gustavo Azambuja'
description = 'Noticias de Uruguay' description = 'Noticias de Uruguay'
language = 'es' language = 'es_UY'
timefmt = '[%a, %d %b, %Y]' timefmt = '[%a, %d %b, %Y]'
use_embedded_content = False use_embedded_content = False
recursion = 5 recursion = 5

View File

@ -19,7 +19,7 @@ class LaJornada_mx(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
encoding = 'utf8' encoding = 'utf8'
use_embedded_content = False use_embedded_content = False
language = 'es' language = 'es_MX'
remove_empty_feeds = True remove_empty_feeds = True
cover_url = strftime("http://www.jornada.unam.mx/%Y/%m/%d/portada.pdf") cover_url = strftime("http://www.jornada.unam.mx/%Y/%m/%d/portada.pdf")
masthead_url = 'http://www.jornada.unam.mx/v7.0/imagenes/la-jornada-trans.png' masthead_url = 'http://www.jornada.unam.mx/v7.0/imagenes/la-jornada-trans.png'

View File

@ -18,7 +18,7 @@ class LaRazon_Bol(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
encoding = 'cp1252' encoding = 'cp1252'
use_embedded_content = False use_embedded_content = False
language = 'es' language = 'es_BO'
publication_type = 'newspaper' publication_type = 'newspaper'
delay = 1 delay = 1
remove_empty_feeds = True remove_empty_feeds = True

View File

@ -19,7 +19,7 @@ class LaSegunda(BasicNewsRecipe):
encoding = 'cp1252' encoding = 'cp1252'
masthead_url = 'http://www.lasegunda.com/imagenes/logotipo_lasegunda_Oli.gif' masthead_url = 'http://www.lasegunda.com/imagenes/logotipo_lasegunda_Oli.gif'
remove_empty_feeds = True remove_empty_feeds = True
language = 'es' language = 'es_CL'
extra_css = ' .titulonegritastop{font-size: xx-large; font-weight: bold} ' extra_css = ' .titulonegritastop{font-size: xx-large; font-weight: bold} '
conversion_options = { conversion_options = {

View File

@ -19,7 +19,7 @@ class LaMujerDeMiVida(BasicNewsRecipe):
encoding = 'cp1252' encoding = 'cp1252'
publisher = 'La Mujer de mi Vida' publisher = 'La Mujer de mi Vida'
category = 'literatura, critica, arte, ensayos' category = 'literatura, critica, arte, ensayos'
language = 'es' language = 'es_AR'
INDEX = 'http://www.lamujerdemivida.com.ar/' INDEX = 'http://www.lamujerdemivida.com.ar/'
html2lrf_options = [ html2lrf_options = [

View File

@ -16,7 +16,7 @@ class Lanacion(BasicNewsRecipe):
max_articles_per_feed = 100 max_articles_per_feed = 100
use_embedded_content = False use_embedded_content = False
no_stylesheets = True no_stylesheets = True
language = 'es' language = 'es_AR'
publication_type = 'newspaper' publication_type = 'newspaper'
remove_empty_feeds = True remove_empty_feeds = True
masthead_url = 'http://www.lanacion.com.ar/imgs/layout/logos/ln341x47.gif' masthead_url = 'http://www.lanacion.com.ar/imgs/layout/logos/ln341x47.gif'

View File

@ -51,4 +51,4 @@ class LaNacionChile(BasicNewsRecipe):
del item['style'] del item['style']
return soup return soup
language = 'es' language = 'es_CL'

View File

@ -21,7 +21,7 @@ class LaPrensa(BasicNewsRecipe):
encoding = 'cp1252' encoding = 'cp1252'
# cover_url = 'http://www.laprensa.com.ar/imgs/logo.gif' # cover_url = 'http://www.laprensa.com.ar/imgs/logo.gif'
remove_javascript = True remove_javascript = True
language = 'es' language = 'es_AR'
lang = 'es' lang = 'es'
html2lrf_options = [ html2lrf_options = [

View File

@ -21,7 +21,7 @@ class LaPrensaHn(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
remove_javascript = True remove_javascript = True
encoding = 'utf-8' encoding = 'utf-8'
language = 'es' language = 'es_HN'
lang = 'es-HN' lang = 'es-HN'
direction = 'ltr' direction = 'ltr'

View File

@ -22,7 +22,7 @@ class LaPrensa_ni(BasicNewsRecipe):
use_embedded_content = False use_embedded_content = False
encoding = 'cp1252' encoding = 'cp1252'
remove_javascript = True remove_javascript = True
language = 'es' language = 'es_NI'
months_es = ['enero','febrero','marzo','abril','mayo','junio','julio','agosto','septiembre','octubre','noviembre','diciembre'] months_es = ['enero','febrero','marzo','abril','mayo','junio','julio','agosto','septiembre','octubre','noviembre','diciembre']
current_month = months_es[datetime.date.today().month - 1] current_month = months_es[datetime.date.today().month - 1]

View File

@ -21,7 +21,7 @@ class LaTribuna(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
remove_javascript = True remove_javascript = True
encoding = 'utf-8' encoding = 'utf-8'
language = 'es' language = 'es_HN'
lang = 'es-HN' lang = 'es-HN'
direction = 'ltr' direction = 'ltr'

View File

@ -9,6 +9,8 @@ __description__ = 'Canadian Paper '
http://www.ledevoir.com/ http://www.ledevoir.com/
''' '''
import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class ledevoir(BasicNewsRecipe): class ledevoir(BasicNewsRecipe):
@ -32,6 +34,8 @@ class ledevoir(BasicNewsRecipe):
remove_javascript = True remove_javascript = True
no_stylesheets = True no_stylesheets = True
preprocess_regexps = [(re.compile(r'(title|alt)=".*?>.*?"', re.DOTALL), lambda m: '')]
keep_only_tags = [ keep_only_tags = [
dict(name='div', attrs={'id':'article'}), dict(name='div', attrs={'id':'article'}),
dict(name='ul', attrs={'id':'ariane'}) dict(name='ul', attrs={'id':'ariane'})

View File

@ -18,7 +18,7 @@ class LosTiempos_Bol(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
encoding = 'cp1252' encoding = 'cp1252'
use_embedded_content = False use_embedded_content = False
language = 'es' language = 'es_BO'
publication_type = 'newspaper' publication_type = 'newspaper'
delay = 1 delay = 1
remove_empty_feeds = True remove_empty_feeds = True

View File

@ -12,7 +12,7 @@ import datetime
class Milenio(BasicNewsRecipe): class Milenio(BasicNewsRecipe):
title = u'Milenio-diario' title = u'Milenio-diario'
__author__ = 'Bmsleight' __author__ = 'Bmsleight'
language = 'es' language = 'es_MX'
description = 'Milenio-diario' description = 'Milenio-diario'
oldest_article = 10 oldest_article = 10
max_articles_per_feed = 100 max_articles_per_feed = 100

View File

@ -20,7 +20,7 @@ class MiradasAlSur(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
encoding = 'utf-8' encoding = 'utf-8'
language = 'es' language = 'es_AR'
lang = 'es-AR' lang = 'es-AR'
direction = 'ltr' direction = 'ltr'

View File

@ -12,7 +12,7 @@ class Noticias(BasicNewsRecipe):
title = 'Montevideo COMM' title = 'Montevideo COMM'
__author__ = 'Gustavo Azambuja' __author__ = 'Gustavo Azambuja'
description = 'Noticias de Uruguay' description = 'Noticias de Uruguay'
language = 'es' language = 'es_UY'
timefmt = '[%a, %d %b, %Y]' timefmt = '[%a, %d %b, %Y]'
use_embedded_content = False use_embedded_content = False
recursion = 5 recursion = 5

View File

@ -20,7 +20,7 @@ class Newsweek_Argentina(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
encoding = 'utf-8' encoding = 'utf-8'
language = 'es' language = 'es_AR'
lang = 'es-AR' lang = 'es-AR'
direction = 'ltr' direction = 'ltr'

View File

@ -12,7 +12,7 @@ class Noticias(BasicNewsRecipe):
title = 'Observa Digital' title = 'Observa Digital'
__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>' __author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
description = 'Noticias desde Uruguay' description = 'Noticias desde Uruguay'
language = 'es' language = 'es_UY'
timefmt = '[%a, %d %b, %Y]' timefmt = '[%a, %d %b, %Y]'
use_embedded_content = False use_embedded_content = False
recursion = 5 recursion = 5

View File

@ -19,7 +19,7 @@ class Pagina12(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
encoding = 'cp1252' encoding = 'cp1252'
use_embedded_content = False use_embedded_content = False
language = 'es' language = 'es_AR'
remove_empty_feeds = True remove_empty_feeds = True
publication_type = 'newspaper' publication_type = 'newspaper'
masthead_url = 'http://www.pagina12.com.ar/commons/imgs/logo-home.gif' masthead_url = 'http://www.pagina12.com.ar/commons/imgs/logo-home.gif'

View File

@ -17,7 +17,7 @@ class Perfil(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
encoding = 'cp1252' encoding = 'cp1252'
use_embedded_content = False use_embedded_content = False
language = 'es' language = 'es_AR'
remove_empty_feeds = True remove_empty_feeds = True
masthead_url = 'http://www.perfil.com/export/sites/diarioperfil/arte/10/logo_perfilcom_mm.gif' masthead_url = 'http://www.perfil.com/export/sites/diarioperfil/arte/10/logo_perfilcom_mm.gif'
extra_css = """ extra_css = """

View File

@ -13,7 +13,7 @@ class Reptantes(BasicNewsRecipe):
description = u"cada vez que te haces acupuntura, tu muñeco vudú sufre en algún lado" description = u"cada vez que te haces acupuntura, tu muñeco vudú sufre en algún lado"
oldest_article = 130 oldest_article = 130
max_articles_per_feed = 100 max_articles_per_feed = 100
language = 'es' language = 'es_AR'
encoding = 'utf-8' encoding = 'utf-8'
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False

View File

@ -12,7 +12,7 @@ class Noticias(BasicNewsRecipe):
title = 'Revista Bla' title = 'Revista Bla'
__author__ = 'Gustavo Azambuja' __author__ = 'Gustavo Azambuja'
description = 'Moda | Uruguay' description = 'Moda | Uruguay'
language = 'es' language = 'es_UY'
timefmt = '[%a, %d %b, %Y]' timefmt = '[%a, %d %b, %Y]'
use_embedded_content = False use_embedded_content = False
recursion = 5 recursion = 5

View File

@ -20,7 +20,7 @@ class Veintitres(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
encoding = 'utf-8' encoding = 'utf-8'
language = 'es' language = 'es_AR'
lang = 'es-AR' lang = 'es-AR'
direction = 'ltr' direction = 'ltr'

View File

@ -1,6 +1,6 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>'
''' '''
vijesti.me vijesti.me
@ -18,12 +18,16 @@ class Vijesti(BasicNewsRecipe):
oldest_article = 2 oldest_article = 2
max_articles_per_feed = 150 max_articles_per_feed = 150
no_stylesheets = True no_stylesheets = True
encoding = 'cp1250' encoding = 'utf8'
use_embedded_content = False use_embedded_content = False
language = 'sr' language = 'sr'
publication_type = 'newspaper' publication_type = 'newspaper'
masthead_url = 'http://www.vijesti.me/img/logo.gif' extra_css = """
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}' @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
body{font-family: Georgia,"Times New Roman",Times,serif1,serif}
.articledescription,.article,.chapter{font-family: sans1, sans-serif}
"""
conversion_options = { conversion_options = {
'comment' : description 'comment' : description
@ -34,11 +38,11 @@ class Vijesti(BasicNewsRecipe):
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
keep_only_tags = [dict(name='div', attrs={'id':'mainnews'})] keep_only_tags = [dict(name='div', attrs={'id':['article_intro_text','article_text']})]
remove_tags = [dict(name=['object','link','embed','form'])] remove_tags = [dict(name=['object','link','embed','form'])]
feeds = [(u'Sve vijesti', u'http://www.vijesti.me/rss.php' )] feeds = [(u'Sve vijesti', u'http://www.vijesti.me/rss/' )]
def preprocess_html(self, soup): def preprocess_html(self, soup):
return self.adeify_images(soup) return self.adeify_images(soup)

View File

@ -360,6 +360,9 @@ class LinuxFreeze(Command):
def main(): def main():
try: try:
sys.argv[0] = sys.calibre_basename sys.argv[0] = sys.calibre_basename
dfv = os.environ.get('CALIBRE_DEVELOP_FROM', None)
if dfv and os.path.exists(dfv):
sys.path.insert(0, os.path.abspath(dfv))
set_default_encoding() set_default_encoding()
set_helper() set_helper()
set_qt_plugin_path() set_qt_plugin_path()

View File

@ -21,6 +21,7 @@ from calibre.devices.usbms.driver import USBMS
class EB600(USBMS): class EB600(USBMS):
name = 'Netronix EB600 Device Interface' name = 'Netronix EB600 Device Interface'
gui_name = 'Netronix EB600'
description = _('Communicate with the EB600 eBook reader.') description = _('Communicate with the EB600 eBook reader.')
author = 'Kovid Goyal' author = 'Kovid Goyal'
supported_platforms = ['windows', 'osx', 'linux'] supported_platforms = ['windows', 'osx', 'linux']

View File

@ -53,7 +53,7 @@ def find_pages(dir, sort_on_mtime=False, verbose=False):
prints('\t'+'\n\t'.join([os.path.basename(p) for p in pages])) prints('\t'+'\n\t'.join([os.path.basename(p) for p in pages]))
return pages return pages
class PageProcessor(list): class PageProcessor(list): # {{{
''' '''
Contains the actual image rendering logic. See :method:`render` and Contains the actual image rendering logic. See :method:`render` and
:method:`process_pages`. :method:`process_pages`.
@ -111,6 +111,13 @@ class PageProcessor(list):
SCRWIDTH, SCRHEIGHT = self.opts.output_profile.comic_screen_size SCRWIDTH, SCRHEIGHT = self.opts.output_profile.comic_screen_size
try:
if self.opts.comic_image_size:
SCRWIDTH, SCRHEIGHT = map(int, [x.strip() for x in
self.opts.comic_image_size.split('x')])
except:
pass # Ignore
if self.opts.keep_aspect_ratio: if self.opts.keep_aspect_ratio:
# Preserve the aspect ratio by adding border # Preserve the aspect ratio by adding border
aspect = float(sizex) / float(sizey) aspect = float(sizex) / float(sizey)
@ -170,6 +177,7 @@ class PageProcessor(list):
dest = dest[:-1] dest = dest[:-1]
os.rename(dest+'8', dest) os.rename(dest+'8', dest)
self.append(dest) self.append(dest)
# }}}
def render_pages(tasks, dest, opts, notification=lambda x, y: x): def render_pages(tasks, dest, opts, notification=lambda x, y: x):
''' '''
@ -291,7 +299,11 @@ class ComicInput(InputFormatPlugin):
OptionRecommendation(name='no_process', recommended_value=False, OptionRecommendation(name='no_process', recommended_value=False,
help=_("Apply no processing to the image")), help=_("Apply no processing to the image")),
OptionRecommendation(name='dont_grayscale', recommended_value=False, OptionRecommendation(name='dont_grayscale', recommended_value=False,
help=_('Do not convert the image to grayscale (black and white)')) help=_('Do not convert the image to grayscale (black and white)')),
OptionRecommendation(name='comic_image_size', recommended_value=None,
help=_('Specify the image size as widthxheight pixels. Normally,'
' an image size is automatically calculated from the output '
'profile, this option overrides it.')),
]) ])
recommendations = set([ recommendations = set([

View File

@ -39,12 +39,15 @@ class LITInput(InputFormatPlugin):
pre = body[0] pre = body[0]
from calibre.ebooks.txt.processor import convert_basic, preserve_spaces, \ from calibre.ebooks.txt.processor import convert_basic, preserve_spaces, \
separate_paragraphs_single_line separate_paragraphs_single_line
from calibre.ebooks.chardet import xml_to_unicode
from lxml import etree from lxml import etree
import copy import copy
html = separate_paragraphs_single_line(pre.text) html = separate_paragraphs_single_line(pre.text)
html = preserve_spaces(html) html = preserve_spaces(html)
html = convert_basic(html).replace('<html>', html = convert_basic(html).replace('<html>',
'<html xmlns="%s">'%XHTML_NS) '<html xmlns="%s">'%XHTML_NS)
html = xml_to_unicode(html, strip_encoding_pats=True,
resolve_entities=True)[0]
root = etree.fromstring(html) root = etree.fromstring(html)
body = XPath('//h:body')(root) body = XPath('//h:body')(root)
pre.tag = XHTML('div') pre.tag = XHTML('div')

View File

@ -488,7 +488,7 @@ class MobiReader(object):
def remove_random_bytes(self, html): def remove_random_bytes(self, html):
return re.sub('\x14|\x15|\x19|\x1c|\x1d|\xef|\x12|\x13|\xec|\x08', return re.sub('\x14|\x15|\x19|\x1c|\x1d|\xef|\x12|\x13|\xec|\x08|\x01|\x02|\x03|\x04|\x05|\x06|\x07',
'', html) '', html)
def ensure_unit(self, raw, unit='px'): def ensure_unit(self, raw, unit='px'):

View File

@ -83,6 +83,7 @@ class RTFInput(InputFormatPlugin):
os.mkdir(debug_dir) os.mkdir(debug_dir)
debug_dir = 'rtfdebug' debug_dir = 'rtfdebug'
run_lev = 4 run_lev = 4
self.log('Running RTFParser in debug mode')
except: except:
pass pass
parser = ParseRtf( parser = ParseRtf(
@ -230,22 +231,6 @@ class RTFInput(InputFormatPlugin):
with open('styles.css', 'ab') as f: with open('styles.css', 'ab') as f:
f.write(css) f.write(css)
# def preprocess(self, fname):
# self.log('\tPreprocessing to convert unicode characters')
# try:
# data = open(fname, 'rb').read()
# from calibre.ebooks.rtf.preprocess import RtfTokenizer, RtfTokenParser
# tokenizer = RtfTokenizer(data)
# tokens = RtfTokenParser(tokenizer.tokens)
# data = tokens.toRTF()
# fname = 'preprocessed.rtf'
# with open(fname, 'wb') as f:
# f.write(data)
# except:
# self.log.exception(
# 'Failed to preprocess RTF to convert unicode sequences, ignoring...')
# return fname
def convert_borders(self, doc): def convert_borders(self, doc):
border_styles = [] border_styles = []
style_map = {} style_map = {}
@ -280,8 +265,6 @@ class RTFInput(InputFormatPlugin):
self.opts = options self.opts = options
self.log = log self.log = log
self.log('Converting RTF to XML...') self.log('Converting RTF to XML...')
#Name of the preprocesssed RTF file
# fname = self.preprocess(stream.name)
try: try:
xml = self.generate_xml(stream.name) xml = self.generate_xml(stream.name)
except RtfInvalidCodeException, e: except RtfInvalidCodeException, e:
@ -335,3 +318,4 @@ class RTFInput(InputFormatPlugin):
opf.render(open('metadata.opf', 'wb')) opf.render(open('metadata.opf', 'wb'))
return os.path.abspath('metadata.opf') return os.path.abspath('metadata.opf')

View File

@ -24,14 +24,15 @@ from calibre.utils.magick.draw import save_cover_data_to, identify_data
TAGS = { TAGS = {
'b': '\\b', 'b': '\\b',
'del': '\\deleted', 'del': '\\deleted',
'h1': '\\b \\par \\pard \\hyphpar', 'h1': '\\s1 \\afs32',
'h2': '\\b \\par \\pard \\hyphpar', 'h2': '\\s2 \\afs28',
'h3': '\\b \\par \\pard \\hyphpar', 'h3': '\\s3 \\afs28',
'h4': '\\b \\par \\pard \\hyphpar', 'h4': '\\s4 \\afs23',
'h5': '\\b \\par \\pard \\hyphpar', 'h5': '\\s5 \\afs23',
'h6': '\\b \\par \\pard \\hyphpar', 'h6': '\\s6 \\afs21',
'li': '\\par \\pard \\hyphpar \t', 'i': '\\i',
'p': '\\par \\pard \\hyphpar \t', 'li': '\t',
'p': '\t',
'sub': '\\sub', 'sub': '\\sub',
'sup': '\\super', 'sup': '\\super',
'u': '\\ul', 'u': '\\ul',
@ -39,15 +40,9 @@ TAGS = {
SINGLE_TAGS = { SINGLE_TAGS = {
'br': '\n{\\line }\n', 'br': '\n{\\line }\n',
'div': '\n{\\line }\n',
}
SINGLE_TAGS_END = {
'div': '\n{\\line }\n',
} }
STYLES = [ STYLES = [
('display', {'block': '\\par \\pard \\hyphpar'}),
('font-weight', {'bold': '\\b', 'bolder': '\\b'}), ('font-weight', {'bold': '\\b', 'bolder': '\\b'}),
('font-style', {'italic': '\\i'}), ('font-style', {'italic': '\\i'}),
('text-align', {'center': '\\qc', 'left': '\\ql', 'right': '\\qr'}), ('text-align', {'center': '\\qc', 'left': '\\ql', 'right': '\\qr'}),
@ -55,6 +50,7 @@ STYLES = [
] ]
BLOCK_TAGS = [ BLOCK_TAGS = [
'div',
'p', 'p',
'h1', 'h1',
'h2', 'h2',
@ -117,9 +113,11 @@ class RTFMLizer(object):
self.log.debug('Converting %s to RTF markup...' % item.href) self.log.debug('Converting %s to RTF markup...' % item.href)
content = unicode(etree.tostring(item.data, encoding=unicode)) content = unicode(etree.tostring(item.data, encoding=unicode))
content = self.remove_newlines(content) content = self.remove_newlines(content)
content = self.remove_tabs(content)
content = etree.fromstring(content) content = etree.fromstring(content)
stylizer = Stylizer(content, item.href, self.oeb_book, self.opts, self.opts.output_profile) stylizer = Stylizer(content, item.href, self.oeb_book, self.opts, self.opts.output_profile)
output += self.dump_text(content.find(XHTML('body')), stylizer) output += self.dump_text(content.find(XHTML('body')), stylizer)
output += '{\\page }'
output += self.footer() output += self.footer()
output = self.insert_images(output) output = self.insert_images(output)
output = self.clean_text(output) output = self.clean_text(output)
@ -134,8 +132,23 @@ class RTFMLizer(object):
return text return text
def remove_tabs(self, text):
self.log.debug('\Replace tabs with space for processing...')
text = text.replace('\t', ' ')
return text
def header(self): def header(self):
return u'{\\rtf1{\\info{\\title %s}{\\author %s}}\\ansi\\ansicpg1252\\deff0\\deflang1033' % (self.oeb_book.metadata.title[0].value, authors_to_string([x.value for x in self.oeb_book.metadata.creator])) header = u'{\\rtf1{\\info{\\title %s}{\\author %s}}\\ansi\\ansicpg1252\\deff0\\deflang1033\n' % (self.oeb_book.metadata.title[0].value, authors_to_string([x.value for x in self.oeb_book.metadata.creator]))
return header + \
'{\\fonttbl{\\f0\\froman\\fprq2\\fcharset128 Times New Roman;}{\\f1\\froman\\fprq2\\fcharset128 Times New Roman;}{\\f2\\fswiss\\fprq2\\fcharset128 Arial;}{\\f3\\fnil\\fprq2\\fcharset128 Arial;}{\\f4\\fnil\\fprq2\\fcharset128 MS Mincho;}{\\f5\\fnil\\fprq2\\fcharset128 Tahoma;}{\\f6\\fnil\\fprq0\\fcharset128 Tahoma;}}\n' \
'{\\stylesheet{\\ql \\li0\\ri0\\nowidctlpar\\wrapdefault\\faauto\\rin0\\lin0\\itap0 \\rtlch\\fcs1 \\af25\\afs24\\alang1033 \\ltrch\\fcs0 \\fs24\\lang1033\\langfe255\\cgrid\\langnp1033\\langfenp255 \\snext0 Normal;}\n' \
'{\\s1\\ql \\li0\\ri0\\sb240\\sa120\\keepn\\nowidctlpar\\wrapdefault\\faauto\\outlinelevel0\\rin0\\lin0\\itap0 \\rtlch\\fcs1 \\ab\\af0\\afs32\\alang1033 \\ltrch\\fcs0 \\b\\fs32\\lang1033\\langfe255\\loch\\f1\\hich\\af1\\dbch\\af26\\cgrid\\langnp1033\\langfenp255 \\sbasedon15 \\snext16 \\slink21 heading 1;}\n' \
'{\\s2\\ql \\li0\\ri0\\sb240\\sa120\\keepn\\nowidctlpar\\wrapdefault\\faauto\\outlinelevel1\\rin0\\lin0\\itap0 \\rtlch\\fcs1 \\ab\\ai\\af0\\afs28\\alang1033 \\ltrch\\fcs0 \\b\\i\\fs28\\lang1033\\langfe255\\loch\\f1\\hich\\af1\\dbch\\af26\\cgrid\\langnp1033\\langfenp255 \\sbasedon15 \\snext16 \\slink22 heading 2;}\n' \
'{\\s3\\ql \\li0\\ri0\\sb240\\sa120\\keepn\\nowidctlpar\\wrapdefault\\faauto\\outlinelevel2\\rin0\\lin0\\itap0 \\rtlch\\fcs1 \\ab\\af0\\afs28\\alang1033 \\ltrch\\fcs0 \\b\\fs28\\lang1033\\langfe255\\loch\\f1\\hich\\af1\\dbch\\af26\\cgrid\\langnp1033\\langfenp255 \\sbasedon15 \\snext16 \\slink23 heading 3;}\n' \
'{\\s4\\ql \\li0\\ri0\\sb240\\sa120\\keepn\\nowidctlpar\\wrapdefault\\faauto\\outlinelevel3\\rin0\\lin0\\itap0 \\rtlch\\fcs1 \\ab\\ai\\af0\\afs23\\alang1033 \\ltrch\\fcs0\\b\\i\\fs23\\lang1033\\langfe255\\loch\\f1\\hich\\af1\\dbch\\af26\\cgrid\\langnp1033\\langfenp255 \\sbasedon15 \\snext16 \\slink24 heading 4;}\n' \
'{\\s5\\ql \\li0\\ri0\\sb240\\sa120\\keepn\\nowidctlpar\\wrapdefault\\faauto\\outlinelevel4\\rin0\\lin0\\itap0 \\rtlch\\fcs1 \\ab\\af0\\afs23\\alang1033 \\ltrch\\fcs0 \\b\\fs23\\lang1033\\langfe255\\loch\\f1\\hich\\af1\\dbch\\af26\\cgrid\\langnp1033\\langfenp255 \\sbasedon15 \\snext16 \\slink25 heading 5;}\n' \
'{\\s6\\ql \\li0\\ri0\\sb240\\sa120\\keepn\\nowidctlpar\\wrapdefault\\faauto\\outlinelevel5\\rin0\\lin0\\itap0 \\rtlch\\fcs1 \\ab\\af0\\afs21\\alang1033 \\ltrch\\fcs0 \\b\\fs21\\lang1033\\langfe255\\loch\\f1\\hich\\af1\\dbch\\af26\\cgrid\\langnp1033\\langfenp255 \\sbasedon15 \\snext16 \\slink26 heading 6;}}\n'
def footer(self): def footer(self):
return ' }' return ' }'
@ -170,19 +183,16 @@ class RTFMLizer(object):
return (hex_string, width, height) return (hex_string, width, height)
def clean_text(self, text): def clean_text(self, text):
# Remove excess spaces at beginning and end of lines
text = re.sub('(?m)^[ ]+', '', text)
text = re.sub('(?m)[ ]+$', '', text)
# Remove excessive newlines # Remove excessive newlines
#text = re.sub('%s{1,1}' % os.linesep, '%s%s' % (os.linesep, os.linesep), text)
text = re.sub('%s{3,}' % os.linesep, '%s%s' % (os.linesep, os.linesep), text) text = re.sub('%s{3,}' % os.linesep, '%s%s' % (os.linesep, os.linesep), text)
# Remove excessive spaces # Remove excessive spaces
text = re.sub('[ ]{2,}', ' ', text) text = re.sub('[ ]{2,}', ' ', text)
text = re.sub('\t{2,}', '\t', text)
text = re.sub('\t ', '\t', text)
# Remove excessive line breaks
text = re.sub(r'(\{\\line \}\s*){3,}', r'{\\line }{\\line }', text) text = re.sub(r'(\{\\line \}\s*){3,}', r'{\\line }{\\line }', text)
#text = re.compile(r'(\{\\line \}\s*)+(?P<brackets>}*)\s*\{\\par').sub(lambda mo: r'%s{\\par' % mo.group('brackets'), text)
# Remove non-breaking spaces # Remove non-breaking spaces
text = text.replace(u'\xa0', ' ') text = text.replace(u'\xa0', ' ')
@ -245,7 +255,7 @@ class RTFMLizer(object):
tag_stack.append(style_tag) tag_stack.append(style_tag)
# Proccess tags that contain text. # Proccess tags that contain text.
if hasattr(elem, 'text') and elem.text != None and elem.text.strip() != '': if hasattr(elem, 'text') and elem.text:
text += txt2rtf(elem.text) text += txt2rtf(elem.text)
for item in elem: for item in elem:
@ -254,13 +264,12 @@ class RTFMLizer(object):
for i in range(0, tag_count): for i in range(0, tag_count):
end_tag = tag_stack.pop() end_tag = tag_stack.pop()
if end_tag != 'block': if end_tag != 'block':
if tag in BLOCK_TAGS:
text += u'\\par\\pard\\plain\\hyphpar}'
else:
text += u'}' text += u'}'
single_tag_end = SINGLE_TAGS_END.get(tag, None) if hasattr(elem, 'tail') and elem.tail:
if single_tag_end:
text += single_tag_end
if hasattr(elem, 'tail') and elem.tail != None and elem.tail.strip() != '':
if 'block' in tag_stack: if 'block' in tag_stack:
text += '%s' % txt2rtf(elem.tail) text += '%s' % txt2rtf(elem.tail)
else: else:

View File

@ -238,6 +238,8 @@ class ParseRtf:
bug_handler = RtfInvalidCodeException, bug_handler = RtfInvalidCodeException,
) )
enc = 'cp' + encode_obj.get_codepage() enc = 'cp' + encode_obj.get_codepage()
if enc == 'cp10000':
enc = 'mac_roman'
msg = 'Exception in token processing' msg = 'Exception in token processing'
if check_encoding_obj.check_encoding(self.__file, enc): if check_encoding_obj.check_encoding(self.__file, enc):
file_name = self.__file if isinstance(self.__file, str) \ file_name = self.__file if isinstance(self.__file, str) \

View File

@ -16,7 +16,9 @@
# # # #
######################################################################### #########################################################################
import sys, os, tempfile, re import sys, os, tempfile, re
from calibre.ebooks.rtf2xml import copy from calibre.ebooks.rtf2xml import copy
class Colors: class Colors:
""" """
Change lines with color info from color numbers to the actual color names. Change lines with color info from color numbers to the actual color names.
@ -40,8 +42,10 @@ class Colors:
self.__file = in_file self.__file = in_file
self.__copy = copy self.__copy = copy
self.__bug_handler = bug_handler self.__bug_handler = bug_handler
self.__line = 0
self.__write_to = tempfile.mktemp() self.__write_to = tempfile.mktemp()
self.__run_level = run_level self.__run_level = run_level
def __initiate_values(self): def __initiate_values(self):
""" """
Initiate all values. Initiate all values.
@ -61,6 +65,7 @@ class Colors:
self.__color_num = 1 self.__color_num = 1
self.__line_color_exp = re.compile(r'bdr-color_:(\d+)') self.__line_color_exp = re.compile(r'bdr-color_:(\d+)')
# cw<bd<bor-par-to<nu<bdr-hair__|bdr-li-wid:0.50|bdr-sp-wid:1.00|bdr-color_:2 # cw<bd<bor-par-to<nu<bdr-hair__|bdr-li-wid:0.50|bdr-sp-wid:1.00|bdr-color_:2
def __before_color_func(self, line): def __before_color_func(self, line):
""" """
Requires: Requires:
@ -76,6 +81,7 @@ class Colors:
if self.__token_info == 'mi<mk<clrtbl-beg': if self.__token_info == 'mi<mk<clrtbl-beg':
self.__state = 'in_color_table' self.__state = 'in_color_table'
self.__write_obj.write(line) self.__write_obj.write(line)
def __default_color_func(self, line): def __default_color_func(self, line):
""" """
Requires: Requires:
@ -87,6 +93,7 @@ class Colors:
""" """
hex_num = line[-3:-1] hex_num = line[-3:-1]
self.__color_string += hex_num self.__color_string += hex_num
def __blue_func(self, line): def __blue_func(self, line):
""" """
Requires: Requires:
@ -109,6 +116,7 @@ class Colors:
) )
self.__color_num += 1 self.__color_num += 1
self.__color_string = '#' self.__color_string = '#'
def __in_color_func(self, line): def __in_color_func(self, line):
""" """
Requires: Requires:
@ -127,12 +135,13 @@ class Colors:
self.__state = 'after_color_table' self.__state = 'after_color_table'
else: else:
action = self.__state_dict.get(self.__token_info) action = self.__state_dict.get(self.__token_info)
if action == None: if action is None:
sys.stderr.write('in module colors.py\n' sys.stderr.write('in module colors.py\n'
'function is self.__in_color_func\n' 'function is self.__in_color_func\n'
'no action for %s' % self.__token_info 'no action for %s' % self.__token_info
) )
action(line) action(line)
def __after_color_func(self, line): def __after_color_func(self, line):
""" """
Check the to see if it contains color info. If it does, extract the Check the to see if it contains color info. If it does, extract the
@ -180,6 +189,7 @@ class Colors:
else: else:
self.__write_obj.write(line) self.__write_obj.write(line)
# cw<bd<bor-par-to<nu<bdr-hair__|bdr-li-wid:0.50|bdr-sp-wid:1.00|bdr-color_:2 # cw<bd<bor-par-to<nu<bdr-hair__|bdr-li-wid:0.50|bdr-sp-wid:1.00|bdr-color_:2
def __sub_from_line_color(self, match_obj): def __sub_from_line_color(self, match_obj):
num = match_obj.group(1) num = match_obj.group(1)
try: try:
@ -191,25 +201,27 @@ class Colors:
else: else:
return 'bdr-color_:no-value' return 'bdr-color_:no-value'
hex_num = self.__figure_num(num) hex_num = self.__figure_num(num)
return_value = 'bdr-color_:%s' % hex_num return 'bdr-color_:%s' % hex_num
return return_value
def __figure_num(self, num): def __figure_num(self, num):
if num == 0: if num == 0:
hex_num = 'false' hex_num = 'false'
else: else:
hex_num = self.__color_dict.get(num) hex_num = self.__color_dict.get(num)
if hex_num == None: if hex_num is None:
if self.__run_level > 3:
msg = 'no value in self.__color_dict for key %s\n' % num
raise self.__bug_hanlder, msg
if hex_num == None:
hex_num = '0' hex_num = '0'
if self.__run_level > 5:
msg = 'no value in self.__color_dict' \
'for key %s at line %d\n' % (num, self.__line)
raise self.__bug_handler, msg
return hex_num return hex_num
def __do_nothing_func(self, line): def __do_nothing_func(self, line):
""" """
Bad RTF will have text in the color table Bad RTF will have text in the color table
""" """
pass pass
def convert_colors(self): def convert_colors(self):
""" """
Requires: Requires:
@ -226,20 +238,16 @@ class Colors:
info, and substitute the number with the hex number. info, and substitute the number with the hex number.
""" """
self.__initiate_values() self.__initiate_values()
read_obj = open(self.__file, 'r') with open(self.__file, 'r') as read_obj:
self.__write_obj = open(self.__write_to, 'w') with open(self.__write_to, 'w') as self.__write_obj:
line_to_read = 1 for line in read_obj:
while line_to_read: self.__line+=1
line_to_read = read_obj.readline()
line = line_to_read
self.__token_info = line[:16] self.__token_info = line[:16]
action = self.__state_dict.get(self.__state) action = self.__state_dict.get(self.__state)
if action == None: if action is None:
sys.stderr.write('no no matching state in module fonts.py\n') sys.stderr.write('no matching state in module fonts.py\n')
sys.stderr.write(self.__state + '\n') sys.stderr.write(self.__state + '\n')
action(line) action(line)
read_obj.close()
self.__write_obj.close()
copy_obj = copy.Copy(bug_handler = self.__bug_handler) copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy: if self.__copy:
copy_obj.copy_file(self.__write_to, "color.data") copy_obj.copy_file(self.__write_to, "color.data")

View File

@ -33,13 +33,13 @@ class ConvertToTags:
self.__copy = copy self.__copy = copy
self.__dtd_path = dtd_path self.__dtd_path = dtd_path
self.__no_dtd = no_dtd self.__no_dtd = no_dtd
if encoding != 'mac_roman':
self.__encoding = 'cp' + encoding self.__encoding = 'cp' + encoding
else: if encoding == 'mac_roman':
self.__encoding = 'mac_roman' self.__encoding = 'mac_roman'
self.__indent = indent self.__indent = indent
self.__run_level = run_level self.__run_level = run_level
self.__write_to = tempfile.mktemp() self.__write_to = tempfile.mktemp()
self.__convert_utf = False
def __initiate_values(self): def __initiate_values(self):
""" """
@ -213,7 +213,8 @@ class ConvertToTags:
if not check_encoding_obj.check_encoding(self.__file, verbose=False): if not check_encoding_obj.check_encoding(self.__file, verbose=False):
self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>') self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
elif not check_encoding_obj.check_encoding(self.__file, self.__encoding): elif not check_encoding_obj.check_encoding(self.__file, self.__encoding):
self.__write_obj.write('<?xml version="1.0" encoding="%s" ?>' % self.__encoding) self.__write_obj.write('<?xml version="1.0" encoding="UTF-8" ?>')
self.__convert_utf = True
else: else:
self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>') self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
sys.stderr.write('Bad RTF encoding, revert to US-ASCII chars and' sys.stderr.write('Bad RTF encoding, revert to US-ASCII chars and'
@ -253,7 +254,7 @@ class ConvertToTags:
an empty tag function. an empty tag function.
""" """
self.__initiate_values() self.__initiate_values()
self.__write_obj = open(self.__write_to, 'w') with open(self.__write_to, 'w') as self.__write_obj:
self.__write_dec() self.__write_dec()
with open(self.__file, 'r') as read_obj: with open(self.__file, 'r') as read_obj:
for line in read_obj: for line in read_obj:
@ -262,6 +263,19 @@ class ConvertToTags:
if action is not None: if action is not None:
action(line) action(line)
self.__write_obj.close() self.__write_obj.close()
#convert all encodings to UTF8 to avoid unsupported encodings in lxml
if self.__convert_utf:
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
copy_obj.rename(self.__write_to, self.__file)
with open(self.__file, 'r') as read_obj:
with open(self.__write_to, 'w') as write_obj:
file = read_obj.read()
try:
file = file.decode(self.__encoding)
write_obj.write(file.encode('utf-8'))
except:
sys.stderr.write('Conversion to UTF-8 is not possible,'
' encoding should be very carefully checked')
copy_obj = copy.Copy(bug_handler = self.__bug_handler) copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy: if self.__copy:
copy_obj.copy_file(self.__write_to, "convert_to_tags.data") copy_obj.copy_file(self.__write_to, "convert_to_tags.data")

View File

@ -75,12 +75,16 @@ class DefaultEncoding:
self._encoding() self._encoding()
self.__datafetched = True self.__datafetched = True
code_page = 'ansicpg' + self.__code_page code_page = 'ansicpg' + self.__code_page
if self.__code_page == '10000':
self.__code_page = 'mac_roman'
return self.__platform, code_page, self.__default_num return self.__platform, code_page, self.__default_num
def get_codepage(self): def get_codepage(self):
if not self.__datafetched: if not self.__datafetched:
self._encoding() self._encoding()
self.__datafetched = True self.__datafetched = True
if self.__code_page == '10000':
self.__code_page = 'mac_roman'
return self.__code_page return self.__code_page
def get_platform(self): def get_platform(self):

View File

@ -16,7 +16,9 @@
# # # #
######################################################################### #########################################################################
import sys, os, tempfile import sys, os, tempfile
from calibre.ebooks.rtf2xml import copy from calibre.ebooks.rtf2xml import copy
class Fonts: class Fonts:
""" """
Change lines with font info from font numbers to the actual font names. Change lines with font info from font numbers to the actual font names.
@ -45,6 +47,7 @@ class Fonts:
self.__default_font_num = default_font_num self.__default_font_num = default_font_num
self.__write_to = tempfile.mktemp() self.__write_to = tempfile.mktemp()
self.__run_level = run_level self.__run_level = run_level
def __initiate_values(self): def __initiate_values(self):
""" """
Initiate all values. Initiate all values.
@ -67,6 +70,7 @@ class Fonts:
self.__font_table = {} self.__font_table = {}
# individual font written # individual font written
self.__wrote_ind_font = 0 self.__wrote_ind_font = 0
def __default_func(self, line): def __default_func(self, line):
""" """
Requires: Requires:
@ -79,6 +83,7 @@ class Fonts:
if self.__token_info == 'mi<mk<fonttb-beg': if self.__token_info == 'mi<mk<fonttb-beg':
self.__state = 'font_table' self.__state = 'font_table'
self.__write_obj.write(line) self.__write_obj.write(line)
def __font_table_func(self, line): def __font_table_func(self, line):
""" """
Requires: Requires:
@ -101,6 +106,7 @@ class Fonts:
self.__font_num = self.__default_font_num self.__font_num = self.__default_font_num
self.__text_line = '' self.__text_line = ''
##self.__write_obj.write(line) ##self.__write_obj.write(line)
def __font_in_table_func(self, line): def __font_in_table_func(self, line):
""" """
Requires: Requires:
@ -138,6 +144,7 @@ class Fonts:
elif self.__token_info == 'mi<mk<fonttb-end': elif self.__token_info == 'mi<mk<fonttb-end':
self.__found_end_font_table_func() self.__found_end_font_table_func()
self.__state = 'after_font_table' self.__state = 'after_font_table'
def __found_end_font_table_func(self): def __found_end_font_table_func(self):
""" """
Required: Required:
@ -151,6 +158,7 @@ class Fonts:
self.__write_obj.write( self.__write_obj.write(
'mi<tg<empty-att_' 'mi<tg<empty-att_'
'<font-in-table<name>Times<num>0\n') '<font-in-table<name>Times<num>0\n')
def __after_font_table_func(self, line): def __after_font_table_func(self, line):
""" """
Required: Required:
@ -169,7 +177,7 @@ class Fonts:
if self.__token_info == 'cw<ci<font-style': if self.__token_info == 'cw<ci<font-style':
font_num = line[20:-1] font_num = line[20:-1]
font_name = self.__font_table.get(font_num) font_name = self.__font_table.get(font_num)
if font_name == None: if font_name is None:
if self.__run_level > 3: if self.__run_level > 3:
msg = 'no value for %s in self.__font_table\n' % font_num msg = 'no value for %s in self.__font_table\n' % font_num
raise self.__bug_handler, msg raise self.__bug_handler, msg
@ -182,6 +190,7 @@ class Fonts:
) )
else: else:
self.__write_obj.write(line) self.__write_obj.write(line)
def convert_fonts(self): def convert_fonts(self):
""" """
Required: Required:
@ -197,20 +206,15 @@ class Fonts:
info. Substitute a font name for a font number. info. Substitute a font name for a font number.
""" """
self.__initiate_values() self.__initiate_values()
read_obj = open(self.__file, 'r') with open(self.__file, 'r') as read_obj:
self.__write_obj = open(self.__write_to, 'w') with open(self.__write_to, 'w') as self.__write_obj:
line_to_read = 1 for line in read_obj:
while line_to_read:
line_to_read = read_obj.readline()
line = line_to_read
self.__token_info = line[:16] self.__token_info = line[:16]
action = self.__state_dict.get(self.__state) action = self.__state_dict.get(self.__state)
if action == None: if action is None:
sys.stderr.write('no no matching state in module fonts.py\n') sys.stderr.write('no matching state in module fonts.py\n' \
sys.stderr.write(self.__state + '\n') + self.__state + '\n')
action(line) action(line)
read_obj.close()
self.__write_obj.close()
default_font_name = self.__font_table.get(self.__default_font_num) default_font_name = self.__font_table.get(self.__default_font_num)
if not default_font_name: if not default_font_name:
default_font_name = 'Not Defined' default_font_name = 'Not Defined'

View File

@ -43,7 +43,7 @@ class GetCharMap:
def get_char_map(self, map): def get_char_map(self, map):
if map == 'ansicpg0': if map == 'ansicpg0':
map = 'ansicpg1250' map = 'ansicpg1250'
if map in ('ansicpg10000', '10000'): if map == 'ansicpg10000':
map = 'mac_roman' map = 'mac_roman'
found_map = False found_map = False
map_dict = {} map_dict = {}

View File

@ -126,12 +126,6 @@ class Tokenize:
tokens = re.split(self.__splitexp, input_file) tokens = re.split(self.__splitexp, input_file)
#remove empty tokens and \n #remove empty tokens and \n
return filter(lambda x: len(x) > 0 and x != '\n', tokens) return filter(lambda x: len(x) > 0 and x != '\n', tokens)
#input_file = re.sub(self.__utf_exp, self.__from_ms_to_utf8, input_file)
# line = re.sub( self.__neg_utf_exp, self.__neg_unicode_func, line)
# this is for older RTF
#line = re.sub(self.__par_exp, '\\par ', line)
#return filter(lambda x: len(x) > 0, \
#(self.__remove_line.sub('', x) for x in tokens))
def __compile_expressions(self): def __compile_expressions(self):
SIMPLE_RPL = { SIMPLE_RPL = {
@ -160,7 +154,7 @@ class Tokenize:
} }
self.__replace_spchar = MReplace(SIMPLE_RPL) self.__replace_spchar = MReplace(SIMPLE_RPL)
#add ;? in case of char following \u #add ;? in case of char following \u
self.__ms_hex_exp = re.compile(r"\\\'([0-9a-fA-F]{2})") #r"\\\'(..)" self.__ms_hex_exp = re.compile(r"\\\'([0-9a-fA-F]{2})")
self.__utf_exp = re.compile(r"\\u(-?\d{3,6}) ?") self.__utf_exp = re.compile(r"\\u(-?\d{3,6}) ?")
self.__bin_exp = re.compile(r"(?:\\bin(-?\d{0,10})[\n ]+)[01\n]+") self.__bin_exp = re.compile(r"(?:\\bin(-?\d{0,10})[\n ]+)[01\n]+")
#manage upr/ud situations #manage upr/ud situations
@ -172,14 +166,21 @@ class Tokenize:
self.__splitexp = re.compile(r"(\\[{}]|\n|\\[^\s\\{}&]+(?:[ \t\r\f\v])?)") self.__splitexp = re.compile(r"(\\[{}]|\n|\\[^\s\\{}&]+(?:[ \t\r\f\v])?)")
#this is for old RTF #this is for old RTF
self.__par_exp = re.compile(r'\\\n+') self.__par_exp = re.compile(r'\\\n+')
# self.__par_exp = re.compile(r'\\$') #handle cw using a digit as argument and without space as delimiter
self.__cwdigit_exp = re.compile(r"(\\[a-zA-Z]+[\-0-9]+)([^0-9 \\]+)")
#self.__bin_exp = re.compile(r"\\bin(-?\d{1,8}) {0,1}") #self.__bin_exp = re.compile(r"\\bin(-?\d{1,8}) {0,1}")
#self.__utf_exp = re.compile(r"^\\u(-?\d{3,6})") #self.__utf_exp = re.compile(r"^\\u(-?\d{3,6})")
#self.__splitexp = re.compile(r"(\\[\\{}]|{|}|\n|\\[^\s\\{}&]+(?:\s)?)") #self.__splitexp = re.compile(r"(\\[\\{}]|{|}|\n|\\[^\s\\{}&]+(?:\s)?)")
#self.__remove_line = re.compile(r'\n+') #self.__remove_line = re.compile(r'\n+')
#self.__mixed_exp = re.compile(r"(\\[a-zA-Z]+\d+)(\D+)")
##self.num_exp = re.compile(r"(\*|:|[a-zA-Z]+)(.*)") ##self.num_exp = re.compile(r"(\*|:|[a-zA-Z]+)(.*)")
def __correct_spliting(self, token):
match_obj = re.search(self.__cwdigit_exp, token)
if match_obj is None:
return token
else:
return '%s\n%s' % (match_obj.group(1), match_obj.group(2))
def tokenize(self): def tokenize(self):
"""Main class for handling other methods. Reads the file \ """Main class for handling other methods. Reads the file \
, uses method self.sub_reg to make basic substitutions,\ , uses method self.sub_reg to make basic substitutions,\
@ -195,6 +196,8 @@ class Tokenize:
tokens = map(self.__unicode_process, tokens) tokens = map(self.__unicode_process, tokens)
#remove empty items created by removing \uc #remove empty items created by removing \uc
tokens = filter(lambda x: len(x) > 0, tokens) tokens = filter(lambda x: len(x) > 0, tokens)
#handles bothersome cases
tokens = map(self.__correct_spliting, tokens)
#write #write
with open(self.__write_to, 'wb') as write_obj: with open(self.__write_to, 'wb') as write_obj:
@ -203,8 +206,6 @@ class Tokenize:
copy_obj = copy.Copy(bug_handler = self.__bug_handler) copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy: if self.__copy:
copy_obj.copy_file(self.__write_to, "tokenize.data") copy_obj.copy_file(self.__write_to, "tokenize.data")
# if self.__out_file:
# self.__file = self.__out_file
copy_obj.rename(self.__write_to, self.__file) copy_obj.rename(self.__write_to, self.__file)
os.remove(self.__write_to) os.remove(self.__write_to)

View File

@ -22,7 +22,7 @@ class PluginWidget(Widget, Ui_Form):
['colors', 'dont_normalize', 'keep_aspect_ratio', 'right2left', ['colors', 'dont_normalize', 'keep_aspect_ratio', 'right2left',
'despeckle', 'no_sort', 'no_process', 'landscape', 'despeckle', 'no_sort', 'no_process', 'landscape',
'dont_sharpen', 'disable_trim', 'wide', 'output_format', 'dont_sharpen', 'disable_trim', 'wide', 'output_format',
'dont_grayscale'] 'dont_grayscale', 'comic_image_size']
) )
self.db, self.book_id = db, book_id self.db, self.book_id = db, book_id
for x in get_option('output_format').option.choices: for x in get_option('output_format').option.choices:

View File

@ -7,7 +7,7 @@
<x>0</x> <x>0</x>
<y>0</y> <y>0</y>
<width>599</width> <width>599</width>
<height>345</height> <height>398</height>
</rect> </rect>
</property> </property>
<property name="windowTitle"> <property name="windowTitle">
@ -37,70 +37,70 @@
</property> </property>
</widget> </widget>
</item> </item>
<item row="3" column="0"> <item row="4" column="0">
<widget class="QCheckBox" name="opt_dont_normalize"> <widget class="QCheckBox" name="opt_dont_normalize">
<property name="text"> <property name="text">
<string>Disable &amp;normalize</string> <string>Disable &amp;normalize</string>
</property> </property>
</widget> </widget>
</item> </item>
<item row="4" column="0"> <item row="5" column="0">
<widget class="QCheckBox" name="opt_keep_aspect_ratio"> <widget class="QCheckBox" name="opt_keep_aspect_ratio">
<property name="text"> <property name="text">
<string>Keep &amp;aspect ratio</string> <string>Keep &amp;aspect ratio</string>
</property> </property>
</widget> </widget>
</item> </item>
<item row="5" column="0"> <item row="6" column="0">
<widget class="QCheckBox" name="opt_dont_sharpen"> <widget class="QCheckBox" name="opt_dont_sharpen">
<property name="text"> <property name="text">
<string>Disable &amp;Sharpening</string> <string>Disable &amp;Sharpening</string>
</property> </property>
</widget> </widget>
</item> </item>
<item row="6" column="0"> <item row="7" column="0">
<widget class="QCheckBox" name="opt_disable_trim"> <widget class="QCheckBox" name="opt_disable_trim">
<property name="text"> <property name="text">
<string>Disable &amp;Trimming</string> <string>Disable &amp;Trimming</string>
</property> </property>
</widget> </widget>
</item> </item>
<item row="7" column="0"> <item row="8" column="0">
<widget class="QCheckBox" name="opt_wide"> <widget class="QCheckBox" name="opt_wide">
<property name="text"> <property name="text">
<string>&amp;Wide</string> <string>&amp;Wide</string>
</property> </property>
</widget> </widget>
</item> </item>
<item row="8" column="0"> <item row="9" column="0">
<widget class="QCheckBox" name="opt_landscape"> <widget class="QCheckBox" name="opt_landscape">
<property name="text"> <property name="text">
<string>&amp;Landscape</string> <string>&amp;Landscape</string>
</property> </property>
</widget> </widget>
</item> </item>
<item row="9" column="0"> <item row="10" column="0">
<widget class="QCheckBox" name="opt_right2left"> <widget class="QCheckBox" name="opt_right2left">
<property name="text"> <property name="text">
<string>&amp;Right to left</string> <string>&amp;Right to left</string>
</property> </property>
</widget> </widget>
</item> </item>
<item row="10" column="0"> <item row="11" column="0">
<widget class="QCheckBox" name="opt_no_sort"> <widget class="QCheckBox" name="opt_no_sort">
<property name="text"> <property name="text">
<string>Don't so&amp;rt</string> <string>Don't so&amp;rt</string>
</property> </property>
</widget> </widget>
</item> </item>
<item row="11" column="0"> <item row="12" column="0">
<widget class="QCheckBox" name="opt_despeckle"> <widget class="QCheckBox" name="opt_despeckle">
<property name="text"> <property name="text">
<string>De&amp;speckle</string> <string>De&amp;speckle</string>
</property> </property>
</widget> </widget>
</item> </item>
<item row="13" column="0"> <item row="14" column="0">
<spacer name="verticalSpacer"> <spacer name="verticalSpacer">
<property name="orientation"> <property name="orientation">
<enum>Qt::Vertical</enum> <enum>Qt::Vertical</enum>
@ -120,7 +120,7 @@
</property> </property>
</widget> </widget>
</item> </item>
<item row="12" column="0"> <item row="13" column="0">
<widget class="QLabel" name="label"> <widget class="QLabel" name="label">
<property name="text"> <property name="text">
<string>&amp;Output format:</string> <string>&amp;Output format:</string>
@ -130,7 +130,7 @@
</property> </property>
</widget> </widget>
</item> </item>
<item row="12" column="1"> <item row="13" column="1">
<widget class="QComboBox" name="opt_output_format"/> <widget class="QComboBox" name="opt_output_format"/>
</item> </item>
<item row="1" column="0"> <item row="1" column="0">
@ -140,6 +140,19 @@
</property> </property>
</widget> </widget>
</item> </item>
<item row="3" column="0">
<widget class="QLabel" name="label_2">
<property name="text">
<string>Override image &amp;size:</string>
</property>
<property name="buddy">
<cstring>opt_comic_image_size</cstring>
</property>
</widget>
</item>
<item row="3" column="1">
<widget class="QLineEdit" name="opt_comic_image_size"/>
</item>
</layout> </layout>
</widget> </widget>
<resources/> <resources/>

View File

@ -687,7 +687,7 @@ class DeviceMixin(object): # {{{
except: except:
pass pass
if not self.device_error_dialog.isVisible(): if not self.device_error_dialog.isVisible():
self.device_error_dialog.setDetailedText(job.details) self.device_error_dialog.set_details(job.details)
self.device_error_dialog.show() self.device_error_dialog.show()
# Device connected {{{ # Device connected {{{
@ -838,9 +838,9 @@ class DeviceMixin(object): # {{{
format_count[f] = 1 format_count[f] = 1
for f in self.device_manager.device.settings().format_map: for f in self.device_manager.device.settings().format_map:
if f in format_count.keys(): if f in format_count.keys():
formats.append((f, _('%i of %i Books' % (format_count[f], len(rows))), True if f in aval_out_formats else False)) formats.append((f, _('%i of %i Books') % (format_count[f], len(rows))), True if f in aval_out_formats else False)
elif f in aval_out_formats: elif f in aval_out_formats:
formats.append((f, _('0 of %i Books' % len(rows)), True)) formats.append((f, _('0 of %i Books') % len(rows)), True)
d = ChooseFormatDeviceDialog(self, _('Choose format to send to device'), formats) d = ChooseFormatDeviceDialog(self, _('Choose format to send to device'), formats)
if d.exec_() != QDialog.Accepted: if d.exec_() != QDialog.Accepted:
return return

View File

@ -7,7 +7,7 @@ import os, shutil
from PyQt4.Qt import QDialog, QVBoxLayout, QHBoxLayout, QTreeWidget, QLabel, \ from PyQt4.Qt import QDialog, QVBoxLayout, QHBoxLayout, QTreeWidget, QLabel, \
QPushButton, QDialogButtonBox, QApplication, QTreeWidgetItem, \ QPushButton, QDialogButtonBox, QApplication, QTreeWidgetItem, \
QLineEdit, Qt, QProgressBar, QSize, QTimer QLineEdit, Qt, QProgressBar, QSize, QTimer, QIcon, QTextEdit
from calibre.gui2.dialogs.confirm_delete import confirm from calibre.gui2.dialogs.confirm_delete import confirm
from calibre.library.check_library import CheckLibrary, CHECKS from calibre.library.check_library import CheckLibrary, CHECKS
@ -16,7 +16,7 @@ from calibre import prints, as_unicode
from calibre.ptempfile import PersistentTemporaryFile from calibre.ptempfile import PersistentTemporaryFile
from calibre.library.sqlite import DBThread, OperationalError from calibre.library.sqlite import DBThread, OperationalError
class DBCheck(QDialog): class DBCheck(QDialog): # {{{
def __init__(self, parent, db): def __init__(self, parent, db):
QDialog.__init__(self, parent) QDialog.__init__(self, parent)
@ -74,6 +74,7 @@ class DBCheck(QDialog):
self.reject() self.reject()
def start_load(self): def start_load(self):
try:
self.conn.close() self.conn.close()
self.pb.setMaximum(self.count) self.pb.setMaximum(self.count)
self.pb.setValue(0) self.pb.setValue(0)
@ -89,6 +90,11 @@ class DBCheck(QDialog):
self.conn.commit() self.conn.commit()
QTimer.singleShot(0, self.do_one_load) QTimer.singleShot(0, self.do_one_load)
except Exception, e:
import traceback
self.error = (as_unicode(e), traceback.format_exc())
self.reject()
def do_one_load(self): def do_one_load(self):
if self.rejected: if self.rejected:
@ -128,7 +134,7 @@ class DBCheck(QDialog):
def reject(self): def reject(self):
self.rejected = True self.rejected = True
QDialog.reject(self) QDialog.reject(self)
# }}}
class Item(QTreeWidgetItem): class Item(QTreeWidgetItem):
pass pass
@ -140,9 +146,70 @@ class CheckLibraryDialog(QDialog):
self.db = db self.db = db
self.setWindowTitle(_('Check Library -- Problems Found')) self.setWindowTitle(_('Check Library -- Problems Found'))
self.setWindowIcon(QIcon(I('debug.png')))
self._layout = QVBoxLayout(self) self._tl = QHBoxLayout()
self.setLayout(self._layout) self._layout = QVBoxLayout()
self.setLayout(self._tl)
self._tl.addLayout(self._layout)
self.helpw = QTextEdit(self)
self._tl.addWidget(self.helpw)
self.helpw.setReadOnly(True)
self.helpw.setText(_('''\
<h1>Help</h1>
<p>calibre stores the list of your books and their metadata in a
database. The actual book files and covers are stored as normal
files in the calibre library folder. The database contains a list of the files
and covers belonging to each book entry. This tool checks that the
actual files in the library folder on your computer match the
information in the database.</p>
<p>The result of each type of check is shown to the left. The various
checks are:
</p>
<ul>
<li><b>Invalid titles</b>: These are files and folders appearing
in the library where books titles should, but that do not have the
correct form to be a book title.</li>
<li><b>Extra titles</b>: These are extra files in your calibre
library that appear to be correctly-formed titles, but have no corresponding
entries in the database</li>
<li><b>Invalid authors</b>: These are files appearing
in the library where only author folders should be.</li>
<li><b>Extra authors</b>: These are folders in the
calibre library that appear to be authors but that do not have entries
in the database</li>
<li><b>Missing book formats</b>: These are book formats that are in
the database but have no corresponding format file in the book's folder.
<li><b>Extra book formats</b>: These are book format files found in
the book's folder but not in the database.
<li><b>Unknown files in books</b>: These are extra files in the
folder of each book that do not correspond to a known format or cover
file.</li>
<li><b>Missing cover files</b>: These represent books that are marked
in the database as having covers but the actual cover files are
missing.</li>
<li><b>Cover files not in database</b>: These are books that have
cover files but are marked as not having covers in the database.</li>
<li><b>Folder raising exception</b>: These represent folders in the
calibre library that could not be processed/understood by this
tool.</li>
</ul>
<p>There are two kinds of automatic fixes possible: <i>Delete
marked</i> and <i>Fix marked</i>.</p>
<p><i>Delete marked</i> is used to remove extra files/folders/covers that
have no entries in the database. Check the box next to the item you want
to delete. Use with caution.</p>
<p><i>Fix marked</i> is applicable only to covers (the two lines marked
'fixable'). In the case of missing cover files, checking the fixable
box and pushing this button will remove the cover mark from the
database for all the files in that category. In the case of extra
cover files, checking the fixable box and pushing this button will
add the cover mark to the database for all the files in that
category.</p>
'''))
self.log = QTreeWidget(self) self.log = QTreeWidget(self)
self.log.itemChanged.connect(self.item_changed) self.log.itemChanged.connect(self.item_changed)
@ -193,7 +260,7 @@ class CheckLibraryDialog(QDialog):
self._layout.addLayout(h) self._layout.addLayout(h)
self._layout.addWidget(self.bbox) self._layout.addWidget(self.bbox)
self.resize(750, 500) self.resize(950, 500)
self.bbox.setEnabled(True) self.bbox.setEnabled(True)
def do_exec(self): def do_exec(self):
@ -341,5 +408,6 @@ class CheckLibraryDialog(QDialog):
if __name__ == '__main__': if __name__ == '__main__':
app = QApplication([]) app = QApplication([])
d = CheckLibraryDialog() from calibre.library import db
d = CheckLibraryDialog(None, db())
d.exec_() d.exec_()

View File

@ -45,7 +45,6 @@ class MessageBox(QDialog, Ui_Dialog):
self.ctc_button.clicked.connect(self.copy_to_clipboard) self.ctc_button.clicked.connect(self.copy_to_clipboard)
if det_msg:
self.show_det_msg = _('Show &details') self.show_det_msg = _('Show &details')
self.hide_det_msg = _('Hide &details') self.hide_det_msg = _('Hide &details')
self.det_msg_toggle = self.bb.addButton(self.show_det_msg, self.bb.ActionRole) self.det_msg_toggle = self.bb.addButton(self.show_det_msg, self.bb.ActionRole)
@ -53,7 +52,6 @@ class MessageBox(QDialog, Ui_Dialog):
self.det_msg_toggle.setToolTip( self.det_msg_toggle.setToolTip(
_('Show detailed information about this error')) _('Show detailed information about this error'))
self.copy_action = QAction(self) self.copy_action = QAction(self)
self.addAction(self.copy_action) self.addAction(self.copy_action)
self.copy_action.setShortcuts(QKeySequence.Copy) self.copy_action.setShortcuts(QKeySequence.Copy)
@ -66,10 +64,14 @@ class MessageBox(QDialog, Ui_Dialog):
else: else:
self.bb.button(self.bb.Ok).setDefault(True) self.bb.button(self.bb.Ok).setDefault(True)
if not det_msg:
self.det_msg_toggle.setVisible(False)
self.do_resize() self.do_resize()
def toggle_det_msg(self, *args): def toggle_det_msg(self, *args):
vis = self.det_msg.isVisible() vis = unicode(self.det_msg_toggle.text()) == self.hide_det_msg
self.det_msg_toggle.setText(self.show_det_msg if vis else self.det_msg_toggle.setText(self.show_det_msg if vis else
self.hide_det_msg) self.hide_det_msg)
self.det_msg.setVisible(not vis) self.det_msg.setVisible(not vis)
@ -100,6 +102,15 @@ class MessageBox(QDialog, Ui_Dialog):
self.bb.button(self.bb.Ok).setFocus(Qt.OtherFocusReason) self.bb.button(self.bb.Ok).setFocus(Qt.OtherFocusReason)
return ret return ret
def set_details(self, msg):
if not msg:
msg = ''
self.det_msg.setPlainText(msg)
self.det_msg_toggle.setText(self.show_det_msg)
self.det_msg_toggle.setVisible(bool(msg))
self.det_msg.setVisible(False)
self.do_resize()
if __name__ == '__main__': if __name__ == '__main__':
app = QApplication([]) app = QApplication([])
from calibre.gui2 import question_dialog from calibre.gui2 import question_dialog

View File

@ -21,7 +21,7 @@ class DBRestore(QDialog):
self.l = QVBoxLayout() self.l = QVBoxLayout()
self.setLayout(self.l) self.setLayout(self.l)
self.l1 = QLabel('<b>'+_('Restoring database from backups, do not' self.l1 = QLabel('<b>'+_('Restoring database from backups, do not'
' interrupt, this will happen in two stages')+'...') ' interrupt, this will happen in three stages')+'...')
self.setWindowTitle(_('Restoring database')) self.setWindowTitle(_('Restoring database'))
self.l.addWidget(self.l1) self.l.addWidget(self.l1)
self.pb = QProgressBar(self) self.pb = QProgressBar(self)
@ -104,7 +104,7 @@ def restore_database(db, parent=None):
else: else:
if r.errors_occurred: if r.errors_occurred:
warning_dialog(parent, _('Success'), warning_dialog(parent, _('Success'),
_('Restoring the database succeeded with some warnings', _('Restoring the database succeeded with some warnings'
' click Show details to see the details.'), ' click Show details to see the details.'),
det_msg=r.report, show=True) det_msg=r.report, show=True)
else: else:

View File

@ -266,7 +266,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
def add_plugin(self): def add_plugin(self):
path = choose_files(self, 'add a plugin dialog', _('Add plugin'), path = choose_files(self, 'add a plugin dialog', _('Add plugin'),
filters=[(_('Plugins'), ['zip'])], all_files=False, filters=[(_('Plugins') + ' (*.zip)', ['zip'])], all_files=False,
select_only_single_file=True) select_only_single_file=True)
if not path: if not path:
return return

View File

@ -275,7 +275,7 @@ def generate_catalog(parent, dbspec, ids, device_manager, db):
if device_manager.is_device_connected: if device_manager.is_device_connected:
device = device_manager.device device = device_manager.device
connected_device['name'] = device.gui_name connected_device['name'] = device.get_gui_name()
try: try:
storage = [] storage = []
if device._main_prefix: if device._main_prefix:

View File

@ -232,6 +232,7 @@ class BIBTEX(CatalogPlugin): # {{{
help = _('The fields to output when cataloging books in the ' help = _('The fields to output when cataloging books in the '
'database. Should be a comma-separated list of fields.\n' 'database. Should be a comma-separated list of fields.\n'
'Available fields: %s.\n' 'Available fields: %s.\n'
'plus user-created custom fields.\n'
'Example: %s=title,authors,tags\n' 'Example: %s=title,authors,tags\n'
"Default: '%%default'\n" "Default: '%%default'\n"
"Applies to: BIBTEX output format")%(', '.join(FIELDS), "Applies to: BIBTEX output format")%(', '.join(FIELDS),
@ -344,7 +345,7 @@ class BIBTEX(CatalogPlugin): # {{{
if field == 'authors' : if field == 'authors' :
bibtex_entry.append(u'author = "%s"' % bibtexdict.bibtex_author_format(item)) bibtex_entry.append(u'author = "%s"' % bibtexdict.bibtex_author_format(item))
elif field in ['title', 'publisher', 'cover', 'uuid', elif field in ['title', 'publisher', 'cover', 'uuid', 'ondevice',
'author_sort', 'series'] : 'author_sort', 'series'] :
bibtex_entry.append(u'%s = "%s"' % (field, bibtexdict.utf8ToBibtex(item))) bibtex_entry.append(u'%s = "%s"' % (field, bibtexdict.utf8ToBibtex(item)))
@ -378,7 +379,7 @@ class BIBTEX(CatalogPlugin): # {{{
if calibre_files: if calibre_files:
files = [u':%s:%s' % (format, format.rpartition('.')[2].upper())\ files = [u':%s:%s' % (format, format.rpartition('.')[2].upper())\
for format in item] for format in item]
bibtex_entry.append(u'files = "%s"' % u', '.join(files)) bibtex_entry.append(u'file = "%s"' % u', '.join(files))
elif field == 'series_index' : elif field == 'series_index' :
bibtex_entry.append(u'volume = "%s"' % int(item)) bibtex_entry.append(u'volume = "%s"' % int(item))
@ -474,6 +475,8 @@ class BIBTEX(CatalogPlugin): # {{{
if opts.verbose: if opts.verbose:
opts_dict = vars(opts) opts_dict = vars(opts)
log("%s(): Generating %s" % (self.name,self.fmt)) log("%s(): Generating %s" % (self.name,self.fmt))
if opts.connected_device['is_device_connected']:
log(" connected_device: %s" % opts.connected_device['name'])
if opts_dict['search_text']: if opts_dict['search_text']:
log(" --search='%s'" % opts_dict['search_text']) log(" --search='%s'" % opts_dict['search_text'])
@ -548,6 +551,7 @@ class BIBTEX(CatalogPlugin): # {{{
as outfile: as outfile:
#File header #File header
nb_entries = len(data) nb_entries = len(data)
#check in book strict if all is ok else throw a warning into log #check in book strict if all is ok else throw a warning into log
if bib_entry == 'book' : if bib_entry == 'book' :
nb_books = len(filter(check_entry_book_valid, data)) nb_books = len(filter(check_entry_book_valid, data))
@ -555,6 +559,11 @@ class BIBTEX(CatalogPlugin): # {{{
log(" WARNING: only %d entries in %d are book compatible" % (nb_books, nb_entries)) log(" WARNING: only %d entries in %d are book compatible" % (nb_books, nb_entries))
nb_entries = nb_books nb_entries = nb_books
# If connected device, add 'On Device' values to data
if opts.connected_device['is_device_connected'] and 'ondevice' in fields:
for entry in data:
entry['ondevice'] = db.catalog_plugin_on_device_temp_mapping[entry['id']]['ondevice']
outfile.write(u'%%%Calibre catalog\n%%%{0} entries in catalog\n\n'.format(nb_entries)) outfile.write(u'%%%Calibre catalog\n%%%{0} entries in catalog\n\n'.format(nb_entries))
outfile.write(u'@preamble{"This catalog of %d entries was generated by calibre on %s"}\n\n' outfile.write(u'@preamble{"This catalog of %d entries was generated by calibre on %s"}\n\n'
% (nb_entries, nowf().strftime("%A, %d. %B %Y %H:%M").decode(preferred_encoding))) % (nb_entries, nowf().strftime("%A, %d. %B %Y %H:%M").decode(preferred_encoding)))

View File

@ -30,8 +30,8 @@ CHECKS = [('invalid_titles', _('Invalid titles'), True, False),
('missing_formats', _('Missing book formats'), False, False), ('missing_formats', _('Missing book formats'), False, False),
('extra_formats', _('Extra book formats'), True, False), ('extra_formats', _('Extra book formats'), True, False),
('extra_files', _('Unknown files in books'), True, False), ('extra_files', _('Unknown files in books'), True, False),
('missing_covers', _('Missing covers in books'), False, True), ('missing_covers', _('Missing covers files'), False, True),
('extra_covers', _('Extra covers in books'), True, True), ('extra_covers', _('Cover files not in database'), True, True),
('failed_folders', _('Folders raising exception'), False, False) ('failed_folders', _('Folders raising exception'), False, False)
] ]

View File

@ -1549,7 +1549,9 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
elif mi.cover is not None: elif mi.cover is not None:
if os.access(mi.cover, os.R_OK): if os.access(mi.cover, os.R_OK):
with lopen(mi.cover, 'rb') as f: with lopen(mi.cover, 'rb') as f:
doit(self.set_cover, id, f, commit=False) raw = f.read()
if raw:
doit(self.set_cover, id, raw, commit=False)
if mi.tags: if mi.tags:
doit(self.set_tags, id, mi.tags, notify=False, commit=False) doit(self.set_tags, id, mi.tags, notify=False, commit=False)
if mi.comments: if mi.comments:

View File

@ -141,7 +141,7 @@ class Restore(Thread):
sizes = [os.path.getsize(os.path.join(dirpath, x)) for x in formats] sizes = [os.path.getsize(os.path.join(dirpath, x)) for x in formats]
names = [os.path.splitext(x)[0] for x in formats] names = [os.path.splitext(x)[0] for x in formats]
opf = os.path.join(dirpath, 'metadata.opf') opf = os.path.join(dirpath, 'metadata.opf')
mi = OPF(opf).to_book_metadata() mi = OPF(opf, basedir=dirpath).to_book_metadata()
timestamp = os.path.getmtime(opf) timestamp = os.path.getmtime(opf)
path = os.path.relpath(dirpath, self.src_library_path).replace(os.sep, path = os.path.relpath(dirpath, self.src_library_path).replace(os.sep,
'/') '/')

View File

@ -295,7 +295,9 @@ e-ink screen :)
Note that in the case of the Kindle, there is a way to manipulate collections via USB, Note that in the case of the Kindle, there is a way to manipulate collections via USB,
but it requires that the Kindle be rebooted *every time* it is disconnected from the computer, for the but it requires that the Kindle be rebooted *every time* it is disconnected from the computer, for the
changes to the collections to be recognized. As such, it is unlikely that changes to the collections to be recognized. As such, it is unlikely that
any |app| developers will ever feel motivated enough to support it. any |app| developers will ever feel motivated enough to support it. There is however, a |app| plugin
that allows you to create collections on your Kindle from the |app| metadata. It is available
`here <http://www.mobileread.com/forums/showthread.php?t=118635>`_.
Library Management Library Management
------------------ ------------------

File diff suppressed because it is too large Load Diff

View File

@ -112,6 +112,16 @@ _extra_lang_codes = {
'en_IE' : _('English (Ireland)'), 'en_IE' : _('English (Ireland)'),
'en_CN' : _('English (China)'), 'en_CN' : _('English (China)'),
'es_PY' : _('Spanish (Paraguay)'), 'es_PY' : _('Spanish (Paraguay)'),
'es_UY' : _('Spanish (Uruguay)'),
'es_AR' : _('Spanish (Argentina)'),
'es_MX' : _('Spanish (Mexico)'),
'es_CU' : _('Spanish (Cuba)'),
'es_CL' : _('Spanish (Chile)'),
'es_EC' : _('Spanish (Ecuador)'),
'es_HN' : _('Spanish (Honduras)'),
'es_VE' : _('Spanish (Venezuela)'),
'es_BO' : _('Spanish (Bolivia)'),
'es_NI' : _('Spanish (Nicaragua)'),
'de_AT' : _('German (AT)'), 'de_AT' : _('German (AT)'),
'fr_BE' : _('French (BE)'), 'fr_BE' : _('French (BE)'),
'nl' : _('Dutch (NL)'), 'nl' : _('Dutch (NL)'),