mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Sync to trunk.
This commit is contained in:
commit
950fcbee09
35
resources/recipes/cicero.recipe
Normal file
35
resources/recipes/cicero.recipe
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Cicero(BasicNewsRecipe):
|
||||||
|
timefmt = ' [%Y-%m-%d]'
|
||||||
|
title = u'Cicero'
|
||||||
|
__author__ = 'mad@sharktooth.de'
|
||||||
|
description = u'Magazin f\xfcr politische Kultur'
|
||||||
|
oldest_article = 7
|
||||||
|
language = 'de'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
publisher = 'Ringier Publishing'
|
||||||
|
category = 'news, politics, Germany'
|
||||||
|
encoding = 'iso-8859-1'
|
||||||
|
publication_type = 'magazine'
|
||||||
|
masthead_url = 'http://www.cicero.de/img2/cicero_logo_rss.gif'
|
||||||
|
feeds = [
|
||||||
|
(u'Das gesamte Portfolio', u'http://www.cicero.de/rss/rss.php?ress_id='),
|
||||||
|
#(u'Alle Heft-Inhalte', u'http://www.cicero.de/rss/rss.php?ress_id=heft'),
|
||||||
|
#(u'Alle Online-Inhalte', u'http://www.cicero.de/rss/rss.php?ress_id=online'),
|
||||||
|
#(u'Berliner Republik', u'http://www.cicero.de/rss/rss.php?ress_id=4'),
|
||||||
|
#(u'Weltb\xfchne', u'http://www.cicero.de/rss/rss.php?ress_id=1'),
|
||||||
|
#(u'Salon', u'http://www.cicero.de/rss/rss.php?ress_id=7'),
|
||||||
|
#(u'Kapital', u'http://www.cicero.de/rss/rss.php?ress_id=6'),
|
||||||
|
#(u'Netzst\xfccke', u'http://www.cicero.de/rss/rss.php?ress_id=9'),
|
||||||
|
#(u'Leinwand', u'http://www.cicero.de/rss/rss.php?ress_id=12'),
|
||||||
|
#(u'Bibliothek', u'http://www.cicero.de/rss/rss.php?ress_id=15'),
|
||||||
|
(u'Kolumne - Alle Kolulmnen', u'http://www.cicero.de/rss/rss2.php?ress_id='),
|
||||||
|
#(u'Kolumne - Schreiber, Berlin', u'http://www.cicero.de/rss/rss2.php?ress_id=35'),
|
||||||
|
#(u'Kolumne - TV Kritik', u'http://www.cicero.de/rss/rss2.php?ress_id=34')
|
||||||
|
]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return 'http://www.cicero.de/page_print.php?' + url.rpartition('?')[2]
|
122
resources/recipes/el_correo.recipe
Normal file
122
resources/recipes/el_correo.recipe
Normal file
@ -0,0 +1,122 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '08 Januery 2011, desUBIKado'
|
||||||
|
__author__ = 'desUBIKado'
|
||||||
|
__description__ = 'Daily newspaper from Biscay'
|
||||||
|
__version__ = 'v0.08'
|
||||||
|
__date__ = '08, Januery 2011'
|
||||||
|
'''
|
||||||
|
[url]http://www.elcorreo.com/[/url]
|
||||||
|
'''
|
||||||
|
|
||||||
|
import time
|
||||||
|
import re
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class heraldo(BasicNewsRecipe):
|
||||||
|
__author__ = 'desUBIKado'
|
||||||
|
description = 'Daily newspaper from Biscay'
|
||||||
|
title = u'El Correo'
|
||||||
|
publisher = 'Vocento'
|
||||||
|
category = 'News, politics, culture, economy, general interest'
|
||||||
|
oldest_article = 2
|
||||||
|
delay = 1
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
language = 'es'
|
||||||
|
timefmt = '[%a, %d %b, %Y]'
|
||||||
|
encoding = 'iso-8859-1'
|
||||||
|
remove_empty_feeds = True
|
||||||
|
remove_javascript = False
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Portada', u'http://www.elcorreo.com/vizcaya/portada.xml'),
|
||||||
|
(u'Local', u'http://www.elcorreo.com/vizcaya/rss/feeds/vizcaya.xml'),
|
||||||
|
(u'Internacional', u'hhttp://www.elcorreo.com/vizcaya/rss/feeds/internacional.xml'),
|
||||||
|
(u'Econom\xeda', u'http://www.elcorreo.com/vizcaya/rss/feeds/economia.xml'),
|
||||||
|
(u'Pol\xedtica', u'http://www.elcorreo.com/vizcaya/rss/feeds/politica.xml'),
|
||||||
|
(u'Opini\xf3n', u'http://www.elcorreo.com/vizcaya/rss/feeds/opinion.xml'),
|
||||||
|
(u'Deportes', u'http://www.elcorreo.com/vizcaya/rss/feeds/deportes.xml'),
|
||||||
|
(u'Sociedad', u'http://www.elcorreo.com/vizcaya/rss/feeds/sociedad.xml'),
|
||||||
|
(u'Cultura', u'http://www.elcorreo.com/vizcaya/rss/feeds/cultura.xml'),
|
||||||
|
(u'Televisi\xf3n', u'http://www.elcorreo.com/vizcaya/rss/feeds/television.xml'),
|
||||||
|
(u'Gente', u'http://www.elcorreo.com/vizcaya/rss/feeds/gente.xml')
|
||||||
|
]
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'class':['grouphead','date','art_head','story-texto','text','colC_articulo','contenido_comentarios']}),
|
||||||
|
dict(name='div' , attrs={'id':['articulo','story-texto','story-entradilla']})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div', attrs={'class':['art_barra','detalles-opinion','formdenunciar','modulo calculadoras','nubetags','pie']}),
|
||||||
|
dict(name='div', attrs={'class':['mod_lomas','bloque_lomas','blm_header','link-app3','link-app4','botones_listado']}),
|
||||||
|
dict(name='div', attrs={'class':['navegacion_galeria','modulocanalpromocion','separa','separacion','compartir','tags_relacionados']}),
|
||||||
|
dict(name='div', attrs={'class':['moduloBuscadorDeportes','modulo-gente','moddestacadopeq','OpcArt','articulopiniones']}),
|
||||||
|
dict(name='div', attrs={'class':['modulo-especial','publiEspecial']}),
|
||||||
|
dict(name='div', attrs={'id':['articulopina']}),
|
||||||
|
dict(name='br', attrs={'class':'clear'}),
|
||||||
|
dict(name='form', attrs={'name':'frm_conversor2'})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags_before = dict(name='div' , attrs={'class':'articulo '})
|
||||||
|
remove_tags_after = dict(name='div' , attrs={'class':'comentarios'})
|
||||||
|
|
||||||
|
def get_cover_url(self):
|
||||||
|
cover = None
|
||||||
|
st = time.localtime()
|
||||||
|
year = str(st.tm_year)
|
||||||
|
month = "%.2d" % st.tm_mon
|
||||||
|
day = "%.2d" % st.tm_mday
|
||||||
|
#[url]http://img.kiosko.net/2011/01/02/es/elcorreo.750.jpg[/url]
|
||||||
|
#[url]http://info.elcorreo.com/pdf/06012011-viz.pdf[/url]
|
||||||
|
cover='http://info.elcorreo.com/pdf/'+ day + month + year +'-viz.pdf'
|
||||||
|
|
||||||
|
br = BasicNewsRecipe.get_browser()
|
||||||
|
try:
|
||||||
|
br.open(cover)
|
||||||
|
except:
|
||||||
|
self.log("\nPortada no disponible")
|
||||||
|
cover ='http://www.elcorreo.com/vizcaya/noticias/201002/02/Media/logo-elcorreo-nuevo.png'
|
||||||
|
return cover
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
h1, .headline {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:30px;}
|
||||||
|
h2, .subhead {font-family:Arial,Helvetica,sans-serif; font-style:italic; font-weight:normal;font-size:18px;}
|
||||||
|
h3, .overhead {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:16px;}
|
||||||
|
h4 {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:16px;}
|
||||||
|
h5 {font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:16px;}
|
||||||
|
h6 {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:16px;}
|
||||||
|
.date,.byline, .photo {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:14px;}
|
||||||
|
img{margin-bottom: 0.4em}
|
||||||
|
'''
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
preprocess_regexps = [
|
||||||
|
|
||||||
|
# To present the image of the embedded video
|
||||||
|
(re.compile(r'var RUTA_IMAGEN', re.DOTALL|re.IGNORECASE), lambda match: '</script><img src'),
|
||||||
|
(re.compile(r'.jpg";', re.DOTALL|re.IGNORECASE), lambda match: '.jpg">'),
|
||||||
|
(re.compile(r'var SITIO = "elcorreo";', re.DOTALL|re.IGNORECASE), lambda match: '<SCRIPT TYPE="text/JavaScript"'),
|
||||||
|
|
||||||
|
# To separate paragraphs with a blank line
|
||||||
|
(re.compile(r'<div class="p"', re.DOTALL|re.IGNORECASE), lambda match: '<p></p><div class="p"'),
|
||||||
|
|
||||||
|
# To put a blank line between the subtitle and the date and time of the news
|
||||||
|
(re.compile(r'<div class="date">', re.DOTALL|re.IGNORECASE), lambda match: '<br><div class="date">'),
|
||||||
|
|
||||||
|
# To put a blank line between the intro of the embedded videos and the previous text
|
||||||
|
(re.compile(r'<div class="video"', re.DOTALL|re.IGNORECASE), lambda match: '<br><div class="video"'),
|
||||||
|
|
||||||
|
# To view photos from the first when these are presented as a gallery
|
||||||
|
(re.compile(r'src="/img/shim.gif"', re.DOTALL|re.IGNORECASE), lambda match: ''),
|
||||||
|
(re.compile(r'rel=', re.DOTALL|re.IGNORECASE), lambda match: 'src='),
|
||||||
|
|
||||||
|
# To remove the link of the title
|
||||||
|
(re.compile(r'<h1 class="headline">\n<a href="', re.DOTALL|re.IGNORECASE), lambda match: '<h1 class="'),
|
||||||
|
(re.compile(r'</a>\n</h1>', re.DOTALL|re.IGNORECASE), lambda match: '</h1>'),
|
||||||
|
|
||||||
|
]
|
||||||
|
|
@ -3,13 +3,14 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '04 December 2010, desUBIKado'
|
__copyright__ = '04 December 2010, desUBIKado'
|
||||||
__author__ = 'desUBIKado'
|
__author__ = 'desUBIKado'
|
||||||
__description__ = 'Daily newspaper from Aragon'
|
__description__ = 'Daily newspaper from Aragon'
|
||||||
__version__ = 'v0.03'
|
__version__ = 'v0.04'
|
||||||
__date__ = '11, December 2010'
|
__date__ = '6, Januery 2011'
|
||||||
'''
|
'''
|
||||||
[url]http://www.heraldo.es/[/url]
|
[url]http://www.heraldo.es/[/url]
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import time
|
import time
|
||||||
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class heraldo(BasicNewsRecipe):
|
class heraldo(BasicNewsRecipe):
|
||||||
@ -20,12 +21,13 @@ class heraldo(BasicNewsRecipe):
|
|||||||
category = 'News, politics, culture, economy, general interest'
|
category = 'News, politics, culture, economy, general interest'
|
||||||
language = 'es'
|
language = 'es'
|
||||||
timefmt = '[%a, %d %b, %Y]'
|
timefmt = '[%a, %d %b, %Y]'
|
||||||
oldest_article = 1
|
oldest_article = 2
|
||||||
|
delay = 1
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
recursion = 10
|
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Portadas', u'http://www.heraldo.es/index.php/mod.portadas/mem.rss')
|
(u'Portadas', u'http://www.heraldo.es/index.php/mod.portadas/mem.rss')
|
||||||
@ -37,7 +39,8 @@ class heraldo(BasicNewsRecipe):
|
|||||||
|
|
||||||
remove_tags = [dict(name='a', attrs={'class':['com flo-r','enl-if','enl-df']}),
|
remove_tags = [dict(name='a', attrs={'class':['com flo-r','enl-if','enl-df']}),
|
||||||
dict(name='div', attrs={'class':['brb-b-s con marg-btt','cnt-rel con']}),
|
dict(name='div', attrs={'class':['brb-b-s con marg-btt','cnt-rel con']}),
|
||||||
dict(name='form', attrs={'class':'form'})]
|
dict(name='form', attrs={'class':'form'}),
|
||||||
|
dict(name='ul', attrs={'id':['cont-tags','pag-1']})]
|
||||||
|
|
||||||
remove_tags_before = dict(name='div' , attrs={'id':'dts'})
|
remove_tags_before = dict(name='div' , attrs={'id':'dts'})
|
||||||
remove_tags_after = dict(name='div' , attrs={'id':'com'})
|
remove_tags_after = dict(name='div' , attrs={'id':'com'})
|
||||||
@ -59,7 +62,16 @@ class heraldo(BasicNewsRecipe):
|
|||||||
return cover
|
return cover
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:xx-large;}
|
.con strong{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:16px;}
|
||||||
|
.con h2{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:30px;}
|
||||||
|
.con span{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:12px;}
|
||||||
|
.ent {font-family:Arial,Helvetica,sans-serif; font-weight:normal; font-style:italic; font-size:18px;}
|
||||||
|
img{margin-bottom: 0.4em}
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
preprocess_regexps = [
|
||||||
|
|
||||||
|
# To separate the comments with a blank line
|
||||||
|
(re.compile(r'<div id="com"', re.DOTALL|re.IGNORECASE), lambda match: '<br><div id="com"')
|
||||||
|
]
|
||||||
|
@ -117,7 +117,6 @@ if iswindows:
|
|||||||
poppler_inc_dirs = consolidate('POPPLER_INC_DIR',
|
poppler_inc_dirs = consolidate('POPPLER_INC_DIR',
|
||||||
r'%s\poppler;%s'%(sw_inc_dir, sw_inc_dir))
|
r'%s\poppler;%s'%(sw_inc_dir, sw_inc_dir))
|
||||||
|
|
||||||
popplerqt4_inc_dirs = poppler_inc_dirs + [poppler_inc_dirs[1]+r'\qt4']
|
|
||||||
poppler_lib_dirs = consolidate('POPPLER_LIB_DIR', sw_lib_dir)
|
poppler_lib_dirs = consolidate('POPPLER_LIB_DIR', sw_lib_dir)
|
||||||
popplerqt4_lib_dirs = poppler_lib_dirs
|
popplerqt4_lib_dirs = poppler_lib_dirs
|
||||||
poppler_libs = ['poppler']
|
poppler_libs = ['poppler']
|
||||||
@ -131,7 +130,6 @@ elif isosx:
|
|||||||
fc_lib = '/sw/lib'
|
fc_lib = '/sw/lib'
|
||||||
poppler_inc_dirs = consolidate('POPPLER_INC_DIR',
|
poppler_inc_dirs = consolidate('POPPLER_INC_DIR',
|
||||||
'/sw/build/poppler-0.14.5/poppler:/sw/build/poppler-0.14.5')
|
'/sw/build/poppler-0.14.5/poppler:/sw/build/poppler-0.14.5')
|
||||||
popplerqt4_inc_dirs = poppler_inc_dirs + [poppler_inc_dirs[0]+'/qt4']
|
|
||||||
poppler_lib_dirs = consolidate('POPPLER_LIB_DIR',
|
poppler_lib_dirs = consolidate('POPPLER_LIB_DIR',
|
||||||
'/sw/lib')
|
'/sw/lib')
|
||||||
poppler_libs = ['poppler']
|
poppler_libs = ['poppler']
|
||||||
@ -150,9 +148,6 @@ else:
|
|||||||
# Include directories
|
# Include directories
|
||||||
poppler_inc_dirs = pkgconfig_include_dirs('poppler',
|
poppler_inc_dirs = pkgconfig_include_dirs('poppler',
|
||||||
'POPPLER_INC_DIR', '/usr/include/poppler')
|
'POPPLER_INC_DIR', '/usr/include/poppler')
|
||||||
popplerqt4_inc_dirs = pkgconfig_include_dirs('poppler-qt4', '', '')
|
|
||||||
if not popplerqt4_inc_dirs:
|
|
||||||
popplerqt4_inc_dirs = poppler_inc_dirs + [poppler_inc_dirs[0]+'/qt4']
|
|
||||||
png_inc_dirs = pkgconfig_include_dirs('libpng', 'PNG_INC_DIR',
|
png_inc_dirs = pkgconfig_include_dirs('libpng', 'PNG_INC_DIR',
|
||||||
'/usr/include')
|
'/usr/include')
|
||||||
magick_inc_dirs = pkgconfig_include_dirs('MagickWand', 'MAGICK_INC', '/usr/include/ImageMagick')
|
magick_inc_dirs = pkgconfig_include_dirs('MagickWand', 'MAGICK_INC', '/usr/include/ImageMagick')
|
||||||
@ -187,20 +182,17 @@ if not poppler_inc_dirs or not os.path.exists(
|
|||||||
poppler_error = \
|
poppler_error = \
|
||||||
('Poppler not found on your system. Various PDF related',
|
('Poppler not found on your system. Various PDF related',
|
||||||
' functionality will not work. Use the POPPLER_INC_DIR and',
|
' functionality will not work. Use the POPPLER_INC_DIR and',
|
||||||
' POPPLER_LIB_DIR environment variables.')
|
' POPPLER_LIB_DIR environment variables. calibre requires '
|
||||||
|
' the poppler XPDF headers. If your distro does not '
|
||||||
popplerqt4_error = None
|
' include them you will have to re-compile poppler '
|
||||||
if not popplerqt4_inc_dirs or not os.path.exists(
|
' by hand with --enable-xpdf-headers')
|
||||||
os.path.join(popplerqt4_inc_dirs[-1], 'poppler-qt4.h')):
|
|
||||||
popplerqt4_error = \
|
|
||||||
('Poppler Qt4 bindings not found on your system.')
|
|
||||||
|
|
||||||
magick_error = None
|
magick_error = None
|
||||||
if not magick_inc_dirs or not os.path.exists(os.path.join(magick_inc_dirs[0],
|
if not magick_inc_dirs or not os.path.exists(os.path.join(magick_inc_dirs[0],
|
||||||
'wand')):
|
'wand')):
|
||||||
magick_error = ('ImageMagick not found on your system. '
|
magick_error = ('ImageMagick not found on your system. '
|
||||||
'Try setting the environment variables MAGICK_INC '
|
'Try setting the environment variables MAGICK_INC '
|
||||||
'and MAGICK_LIB to help calibre locate the inclue and libbrary '
|
'and MAGICK_LIB to help calibre locate the include and library '
|
||||||
'files.')
|
'files.')
|
||||||
|
|
||||||
podofo_lib = os.environ.get('PODOFO_LIB_DIR', podofo_lib)
|
podofo_lib = os.environ.get('PODOFO_LIB_DIR', podofo_lib)
|
||||||
|
@ -79,7 +79,7 @@ class DocAnalysis(object):
|
|||||||
elif format == 'spanned_html':
|
elif format == 'spanned_html':
|
||||||
linere = re.compile('(?<=<span).*?(?=</span>)', re.DOTALL)
|
linere = re.compile('(?<=<span).*?(?=</span>)', re.DOTALL)
|
||||||
elif format == 'txt':
|
elif format == 'txt':
|
||||||
linere = re.compile('.*?\n', re.DOTALL)
|
linere = re.compile('.*?\n')
|
||||||
self.lines = linere.findall(raw)
|
self.lines = linere.findall(raw)
|
||||||
|
|
||||||
def line_length(self, percent):
|
def line_length(self, percent):
|
||||||
@ -177,7 +177,7 @@ class Dehyphenator(object):
|
|||||||
def __init__(self):
|
def __init__(self):
|
||||||
# Add common suffixes to the regex below to increase the likelihood of a match -
|
# Add common suffixes to the regex below to increase the likelihood of a match -
|
||||||
# don't add suffixes which are also complete words, such as 'able' or 'sex'
|
# don't add suffixes which are also complete words, such as 'able' or 'sex'
|
||||||
self.removesuffixes = re.compile(r"((ed)?ly|('e)?s|a?(t|s)?ion(s|al(ly)?)?|ings?|er|(i)?ous|(i|a)ty|(it)?ies|ive|gence|istic(ally)?|(e|a)nce|ment(s)?|ism|ated|(e|u)ct(ed)?|ed|(i|ed)?ness|(e|a)ncy|ble|ier|al|ex)$", re.IGNORECASE)
|
self.removesuffixes = re.compile(r"((ed)?ly|('e)?s|a?(t|s)?ion(s|al(ly)?)?|ings?|er|(i)?ous|(i|a)ty|(it)?ies|ive|gence|istic(ally)?|(e|a)nce|m?ents?|ism|ated|(e|u)ct(ed)?|ed|(i|ed)?ness|(e|a)ncy|ble|ier|al|ex|ian)$", re.IGNORECASE)
|
||||||
# remove prefixes if the prefix was not already the point of hyphenation
|
# remove prefixes if the prefix was not already the point of hyphenation
|
||||||
self.prefixes = re.compile(r'^(dis|re|un|in|ex)$', re.IGNORECASE)
|
self.prefixes = re.compile(r'^(dis|re|un|in|ex)$', re.IGNORECASE)
|
||||||
self.removeprefix = re.compile(r'^(dis|re|un|in|ex)', re.IGNORECASE)
|
self.removeprefix = re.compile(r'^(dis|re|un|in|ex)', re.IGNORECASE)
|
||||||
@ -199,7 +199,7 @@ class Dehyphenator(object):
|
|||||||
searchresult = self.html.find(lookupword.lower())
|
searchresult = self.html.find(lookupword.lower())
|
||||||
except:
|
except:
|
||||||
return hyphenated
|
return hyphenated
|
||||||
if self.format == 'html_cleanup':
|
if self.format == 'html_cleanup' or self.format == 'txt_cleanup':
|
||||||
if self.html.find(lookupword) != -1 or searchresult != -1:
|
if self.html.find(lookupword) != -1 or searchresult != -1:
|
||||||
#print "Cleanup:returned dehyphenated word: " + str(dehyphenated)
|
#print "Cleanup:returned dehyphenated word: " + str(dehyphenated)
|
||||||
return dehyphenated
|
return dehyphenated
|
||||||
@ -225,10 +225,15 @@ class Dehyphenator(object):
|
|||||||
intextmatch = re.compile(u'(?<=.{%i})(?P<firstpart>[^\[\]\\\^\$\.\|\?\*\+\(\)“"\s>]+)(-|‐)\s*(?=<)(?P<wraptags>(</span>)?\s*(</[iubp]>\s*){1,2}(?P<up2threeblanks><(p|div)[^>]*>\s*(<p[^>]*>\s*</p>\s*)?</(p|div)>\s+){0,3}\s*(<[iubp][^>]*>\s*){1,2}(<span[^>]*>)?)\s*(?P<secondpart>[\w\d]+)' % length)
|
intextmatch = re.compile(u'(?<=.{%i})(?P<firstpart>[^\[\]\\\^\$\.\|\?\*\+\(\)“"\s>]+)(-|‐)\s*(?=<)(?P<wraptags>(</span>)?\s*(</[iubp]>\s*){1,2}(?P<up2threeblanks><(p|div)[^>]*>\s*(<p[^>]*>\s*</p>\s*)?</(p|div)>\s+){0,3}\s*(<[iubp][^>]*>\s*){1,2}(<span[^>]*>)?)\s*(?P<secondpart>[\w\d]+)' % length)
|
||||||
elif format == 'pdf':
|
elif format == 'pdf':
|
||||||
intextmatch = re.compile(u'(?<=.{%i})(?P<firstpart>[^\[\]\\\^\$\.\|\?\*\+\(\)“"\s>]+)(-|‐)\s*(?P<wraptags><p>|</[iub]>\s*<p>\s*<[iub]>)\s*(?P<secondpart>[\w\d]+)'% length)
|
intextmatch = re.compile(u'(?<=.{%i})(?P<firstpart>[^\[\]\\\^\$\.\|\?\*\+\(\)“"\s>]+)(-|‐)\s*(?P<wraptags><p>|</[iub]>\s*<p>\s*<[iub]>)\s*(?P<secondpart>[\w\d]+)'% length)
|
||||||
|
elif format == 'txt':
|
||||||
|
intextmatch = re.compile(u'(?<=.{%i})(?P<firstpart>[^\[\]\\\^\$\.\|\?\*\+\(\)“"\s>]+)(-|‐)(\u0020|\u0009)*(?P<wraptags>(\n(\u0020|\u0009)*)+)(?P<secondpart>[\w\d]+)'% length)
|
||||||
elif format == 'individual_words':
|
elif format == 'individual_words':
|
||||||
intextmatch = re.compile(u'>[^<]*\b(?P<firstpart>[^\[\]\\\^\$\.\|\?\*\+\(\)"\s>]+)(-|‐)(?P<secondpart)\w+)\b[^<]*<') # for later, not called anywhere yet
|
intextmatch = re.compile(u'>[^<]*\b(?P<firstpart>[^\[\]\\\^\$\.\|\?\*\+\(\)"\s>]+)(-|‐)\u0020*(?P<secondpart>\w+)\b[^<]*<') # for later, not called anywhere yet
|
||||||
elif format == 'html_cleanup':
|
elif format == 'html_cleanup':
|
||||||
intextmatch = re.compile(u'(?P<firstpart>[^\[\]\\\^\$\.\|\?\*\+\(\)“"\s>]+)(-|‐)\s*(?=<)(?P<wraptags></span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*(?P<secondpart>[\w\d]+)')
|
intextmatch = re.compile(u'(?P<firstpart>[^\[\]\\\^\$\.\|\?\*\+\(\)“"\s>]+)(-|‐)\s*(?=<)(?P<wraptags></span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*(?P<secondpart>[\w\d]+)')
|
||||||
|
elif format == 'txt_cleanup':
|
||||||
|
intextmatch = re.compile(u'(?P<firstpart>\w+)(-|‐)(?P<wraptags>\s+)(?P<secondpart>[\w\d]+)')
|
||||||
|
|
||||||
|
|
||||||
html = intextmatch.sub(self.dehyphenate, html)
|
html = intextmatch.sub(self.dehyphenate, html)
|
||||||
return html
|
return html
|
||||||
|
@ -190,7 +190,7 @@ class PreProcessor(object):
|
|||||||
line_ending = "\s*</(span|p|div)>\s*(</(p|span|div)>)?"
|
line_ending = "\s*</(span|p|div)>\s*(</(p|span|div)>)?"
|
||||||
blanklines = "\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*"
|
blanklines = "\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*"
|
||||||
line_opening = "<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*"
|
line_opening = "<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*"
|
||||||
txt_line_wrap = u"(\u0020|\u0009)*\n"
|
txt_line_wrap = u"((\u0020|\u0009)*\n){1,4}"
|
||||||
|
|
||||||
unwrap_regex = lookahead+line_ending+blanklines+line_opening
|
unwrap_regex = lookahead+line_ending+blanklines+line_opening
|
||||||
if format == 'txt':
|
if format == 'txt':
|
||||||
@ -357,6 +357,6 @@ class PreProcessor(object):
|
|||||||
html = blankreg.sub('\n'+r'\g<openline>'+u'\u00a0'+r'\g<closeline>', html)
|
html = blankreg.sub('\n'+r'\g<openline>'+u'\u00a0'+r'\g<closeline>', html)
|
||||||
|
|
||||||
# Center separator lines
|
# Center separator lines
|
||||||
html = re.sub(u'<p>\s*(?P<break>([*#•]+\s*)+)\s*</p>', '<p style="text-align:center">' + '\g<break>' + '</p>', html)
|
html = re.sub(u'<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*(?P<break>([*#•]+\s*)+)\s*(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)>)?\s*</(?P=outer)>', '<p style="text-align:center">' + '\g<break>' + '</p>', html)
|
||||||
|
|
||||||
return html
|
return html
|
||||||
|
@ -324,14 +324,16 @@ class Metadata(object):
|
|||||||
if metadata is None:
|
if metadata is None:
|
||||||
traceback.print_stack()
|
traceback.print_stack()
|
||||||
return
|
return
|
||||||
metadata = copy.deepcopy(metadata)
|
m = {}
|
||||||
if '#value#' not in metadata:
|
for k in metadata:
|
||||||
if metadata['datatype'] == 'text' and metadata['is_multiple']:
|
m[k] = copy.copy(metadata[k])
|
||||||
metadata['#value#'] = []
|
if '#value#' not in m:
|
||||||
|
if m['datatype'] == 'text' and m['is_multiple']:
|
||||||
|
m['#value#'] = []
|
||||||
else:
|
else:
|
||||||
metadata['#value#'] = None
|
m['#value#'] = None
|
||||||
_data = object.__getattribute__(self, '_data')
|
_data = object.__getattribute__(self, '_data')
|
||||||
_data['user_metadata'][field] = metadata
|
_data['user_metadata'][field] = m
|
||||||
|
|
||||||
def template_to_attribute(self, other, ops):
|
def template_to_attribute(self, other, ops):
|
||||||
'''
|
'''
|
||||||
|
@ -7,11 +7,12 @@ __docformat__ = 'restructuredtext en'
|
|||||||
import os
|
import os
|
||||||
|
|
||||||
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||||
|
from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator
|
||||||
from calibre.ebooks.chardet import detect
|
from calibre.ebooks.chardet import detect
|
||||||
from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \
|
from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \
|
||||||
separate_paragraphs_single_line, separate_paragraphs_print_formatted, \
|
separate_paragraphs_single_line, separate_paragraphs_print_formatted, \
|
||||||
preserve_spaces, detect_paragraph_type, detect_formatting_type, \
|
preserve_spaces, detect_paragraph_type, detect_formatting_type, \
|
||||||
convert_heuristic
|
convert_heuristic, normalize_line_endings
|
||||||
from calibre import _ent_pat, xml_entity_to_unicode
|
from calibre import _ent_pat, xml_entity_to_unicode
|
||||||
|
|
||||||
class TXTInput(InputFormatPlugin):
|
class TXTInput(InputFormatPlugin):
|
||||||
@ -23,7 +24,7 @@ class TXTInput(InputFormatPlugin):
|
|||||||
|
|
||||||
options = set([
|
options = set([
|
||||||
OptionRecommendation(name='paragraph_type', recommended_value='auto',
|
OptionRecommendation(name='paragraph_type', recommended_value='auto',
|
||||||
choices=['auto', 'block', 'single', 'print'],
|
choices=['auto', 'block', 'single', 'print', 'unformatted'],
|
||||||
help=_('Paragraph structure.\n'
|
help=_('Paragraph structure.\n'
|
||||||
'choices are [\'auto\', \'block\', \'single\', \'print\', \'unformatted\']\n'
|
'choices are [\'auto\', \'block\', \'single\', \'print\', \'unformatted\']\n'
|
||||||
'* auto: Try to auto detect paragraph type.\n'
|
'* auto: Try to auto detect paragraph type.\n'
|
||||||
@ -31,7 +32,7 @@ class TXTInput(InputFormatPlugin):
|
|||||||
'* single: Assume every line is a paragraph.\n'
|
'* single: Assume every line is a paragraph.\n'
|
||||||
'* print: Assume every line starting with 2+ spaces or a tab '
|
'* print: Assume every line starting with 2+ spaces or a tab '
|
||||||
'starts a paragraph.'
|
'starts a paragraph.'
|
||||||
'* unformatted: Most lines have hard line breaks, few/no spaces or indents.')),
|
'* unformatted: Most lines have hard line breaks, few/no blank lines or indents.')),
|
||||||
OptionRecommendation(name='formatting_type', recommended_value='auto',
|
OptionRecommendation(name='formatting_type', recommended_value='auto',
|
||||||
choices=['auto', 'none', 'heuristic', 'markdown'],
|
choices=['auto', 'none', 'heuristic', 'markdown'],
|
||||||
help=_('Formatting used within the document.'
|
help=_('Formatting used within the document.'
|
||||||
@ -73,6 +74,13 @@ class TXTInput(InputFormatPlugin):
|
|||||||
if options.preserve_spaces:
|
if options.preserve_spaces:
|
||||||
txt = preserve_spaces(txt)
|
txt = preserve_spaces(txt)
|
||||||
|
|
||||||
|
# Normalize line endings
|
||||||
|
txt = normalize_line_endings(txt)
|
||||||
|
|
||||||
|
# Get length for hyphen removal and punctuation unwrap
|
||||||
|
docanalysis = DocAnalysis('txt', txt)
|
||||||
|
length = docanalysis.line_length(.5)
|
||||||
|
|
||||||
if options.formatting_type == 'auto':
|
if options.formatting_type == 'auto':
|
||||||
options.formatting_type = detect_formatting_type(txt)
|
options.formatting_type = detect_formatting_type(txt)
|
||||||
|
|
||||||
@ -93,8 +101,13 @@ class TXTInput(InputFormatPlugin):
|
|||||||
else:
|
else:
|
||||||
log.debug('Auto detected paragraph type as %s' % options.paragraph_type)
|
log.debug('Auto detected paragraph type as %s' % options.paragraph_type)
|
||||||
|
|
||||||
|
# Dehyphenate
|
||||||
|
dehyphenator = Dehyphenator()
|
||||||
|
txt = dehyphenator(txt,'txt', length)
|
||||||
|
|
||||||
# We don't check for block because the processor assumes block.
|
# We don't check for block because the processor assumes block.
|
||||||
# single and print at transformed to block for processing.
|
# single and print at transformed to block for processing.
|
||||||
|
|
||||||
if options.paragraph_type == 'single' or options.paragraph_type == 'unformatted':
|
if options.paragraph_type == 'single' or options.paragraph_type == 'unformatted':
|
||||||
txt = separate_paragraphs_single_line(txt)
|
txt = separate_paragraphs_single_line(txt)
|
||||||
elif options.paragraph_type == 'print':
|
elif options.paragraph_type == 'print':
|
||||||
@ -102,10 +115,8 @@ class TXTInput(InputFormatPlugin):
|
|||||||
|
|
||||||
if options.paragraph_type == 'unformatted':
|
if options.paragraph_type == 'unformatted':
|
||||||
from calibre.ebooks.conversion.utils import PreProcessor
|
from calibre.ebooks.conversion.utils import PreProcessor
|
||||||
from calibre.ebooks.conversion.preprocess import DocAnalysis
|
|
||||||
# get length
|
# get length
|
||||||
docanalysis = DocAnalysis('txt', txt)
|
|
||||||
length = docanalysis.line_length(.5)
|
|
||||||
# unwrap lines based on punctuation
|
# unwrap lines based on punctuation
|
||||||
preprocessor = PreProcessor(options, log=getattr(self, 'log', None))
|
preprocessor = PreProcessor(options, log=getattr(self, 'log', None))
|
||||||
txt = preprocessor.punctuation_unwrap(length, txt, 'txt')
|
txt = preprocessor.punctuation_unwrap(length, txt, 'txt')
|
||||||
@ -117,6 +128,10 @@ class TXTInput(InputFormatPlugin):
|
|||||||
else:
|
else:
|
||||||
html = convert_basic(txt, epub_split_size_kb=flow_size)
|
html = convert_basic(txt, epub_split_size_kb=flow_size)
|
||||||
|
|
||||||
|
# Dehyphenate in cleanup mode for missed txt and markdown conversion
|
||||||
|
dehyphenator = Dehyphenator()
|
||||||
|
html = dehyphenator(html,'txt_cleanup', length)
|
||||||
|
html = dehyphenator(html,'html_cleanup', length)
|
||||||
|
|
||||||
from calibre.customize.ui import plugin_for_input_format
|
from calibre.customize.ui import plugin_for_input_format
|
||||||
html_input = plugin_for_input_format('html')
|
html_input = plugin_for_input_format('html')
|
||||||
|
@ -80,9 +80,12 @@ def convert_markdown(txt, title='', disable_toc=False):
|
|||||||
safe_mode=False)
|
safe_mode=False)
|
||||||
return HTML_TEMPLATE % (title, md.convert(txt))
|
return HTML_TEMPLATE % (title, md.convert(txt))
|
||||||
|
|
||||||
def separate_paragraphs_single_line(txt):
|
def normalize_line_endings(txt):
|
||||||
txt = txt.replace('\r\n', '\n')
|
txt = txt.replace('\r\n', '\n')
|
||||||
txt = txt.replace('\r', '\n')
|
txt = txt.replace('\r', '\n')
|
||||||
|
return txt
|
||||||
|
|
||||||
|
def separate_paragraphs_single_line(txt):
|
||||||
txt = re.sub(u'(?<=.)\n(?=.)', '\n\n', txt)
|
txt = re.sub(u'(?<=.)\n(?=.)', '\n\n', txt)
|
||||||
return txt
|
return txt
|
||||||
|
|
||||||
@ -117,7 +120,7 @@ def detect_paragraph_type(txt):
|
|||||||
single: Each line is a paragraph.
|
single: Each line is a paragraph.
|
||||||
print: Each paragraph starts with a 2+ spaces or a tab
|
print: Each paragraph starts with a 2+ spaces or a tab
|
||||||
and ends when a new paragraph is reached.
|
and ends when a new paragraph is reached.
|
||||||
unformatted: most lines have hard line breaks, few/no spaces or indents
|
unformatted: most lines have hard line breaks, few/no blank lines or indents
|
||||||
|
|
||||||
returns block, single, print, unformatted
|
returns block, single, print, unformatted
|
||||||
'''
|
'''
|
||||||
@ -130,14 +133,20 @@ def detect_paragraph_type(txt):
|
|||||||
hardbreaks = docanalysis.line_histogram(.55)
|
hardbreaks = docanalysis.line_histogram(.55)
|
||||||
|
|
||||||
if hardbreaks:
|
if hardbreaks:
|
||||||
# Check for print
|
# Determine print percentage
|
||||||
tab_line_count = len(re.findall('(?mu)^(\t|\s{2,}).+$', txt))
|
tab_line_count = len(re.findall('(?mu)^(\t|\s{2,}).+$', txt))
|
||||||
if tab_line_count / float(txt_line_count) >= .15:
|
print_percent = tab_line_count / float(txt_line_count)
|
||||||
return 'print'
|
|
||||||
|
|
||||||
# Check for block
|
# Determine block percentage
|
||||||
empty_line_count = len(re.findall('(?mu)^\s*$', txt))
|
empty_line_count = len(re.findall('(?mu)^\s*$', txt))
|
||||||
if empty_line_count / float(txt_line_count) >= .15:
|
block_percent = empty_line_count / float(txt_line_count)
|
||||||
|
|
||||||
|
# Compare the two types - the type with the larger number of instances wins
|
||||||
|
# in cases where only one or the other represents the vast majority of the document neither wins
|
||||||
|
if print_percent >= block_percent:
|
||||||
|
if .15 <= print_percent <= .75:
|
||||||
|
return 'print'
|
||||||
|
elif .15 <= block_percent <= .75:
|
||||||
return 'block'
|
return 'block'
|
||||||
|
|
||||||
# Assume unformatted text with hardbreaks if nothing else matches
|
# Assume unformatted text with hardbreaks if nothing else matches
|
||||||
|
@ -1266,8 +1266,8 @@ class DeviceMixin(object): # {{{
|
|||||||
# Force a reset if the caches are not initialized
|
# Force a reset if the caches are not initialized
|
||||||
if reset or not hasattr(self, 'db_book_title_cache'):
|
if reset or not hasattr(self, 'db_book_title_cache'):
|
||||||
# Build a cache (map) of the library, so the search isn't On**2
|
# Build a cache (map) of the library, so the search isn't On**2
|
||||||
self.db_book_title_cache = {}
|
db_book_title_cache = {}
|
||||||
self.db_book_uuid_cache = {}
|
db_book_uuid_cache = {}
|
||||||
# It might be possible to get here without having initialized the
|
# It might be possible to get here without having initialized the
|
||||||
# library view. In this case, simply give up
|
# library view. In this case, simply give up
|
||||||
try:
|
try:
|
||||||
@ -1278,8 +1278,8 @@ class DeviceMixin(object): # {{{
|
|||||||
for id in db.data.iterallids():
|
for id in db.data.iterallids():
|
||||||
mi = db.get_metadata(id, index_is_id=True)
|
mi = db.get_metadata(id, index_is_id=True)
|
||||||
title = clean_string(mi.title)
|
title = clean_string(mi.title)
|
||||||
if title not in self.db_book_title_cache:
|
if title not in db_book_title_cache:
|
||||||
self.db_book_title_cache[title] = \
|
db_book_title_cache[title] = \
|
||||||
{'authors':{}, 'author_sort':{}, 'db_ids':{}}
|
{'authors':{}, 'author_sort':{}, 'db_ids':{}}
|
||||||
# If there are multiple books in the library with the same title
|
# If there are multiple books in the library with the same title
|
||||||
# and author, then remember the last one. That is OK, because as
|
# and author, then remember the last one. That is OK, because as
|
||||||
@ -1287,12 +1287,14 @@ class DeviceMixin(object): # {{{
|
|||||||
# as another.
|
# as another.
|
||||||
if mi.authors:
|
if mi.authors:
|
||||||
authors = clean_string(authors_to_string(mi.authors))
|
authors = clean_string(authors_to_string(mi.authors))
|
||||||
self.db_book_title_cache[title]['authors'][authors] = mi
|
db_book_title_cache[title]['authors'][authors] = mi
|
||||||
if mi.author_sort:
|
if mi.author_sort:
|
||||||
aus = clean_string(mi.author_sort)
|
aus = clean_string(mi.author_sort)
|
||||||
self.db_book_title_cache[title]['author_sort'][aus] = mi
|
db_book_title_cache[title]['author_sort'][aus] = mi
|
||||||
self.db_book_title_cache[title]['db_ids'][mi.application_id] = mi
|
db_book_title_cache[title]['db_ids'][mi.application_id] = mi
|
||||||
self.db_book_uuid_cache[mi.uuid] = mi
|
db_book_uuid_cache[mi.uuid] = mi
|
||||||
|
self.db_book_title_cache = db_book_title_cache
|
||||||
|
self.db_book_uuid_cache = db_book_uuid_cache
|
||||||
|
|
||||||
# Now iterate through all the books on the device, setting the
|
# Now iterate through all the books on the device, setting the
|
||||||
# in_library field. If the UUID matches a book in the library, then
|
# in_library field. If the UUID matches a book in the library, then
|
||||||
|
@ -5,11 +5,11 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
from PyQt4 import QtGui
|
from PyQt4.Qt import Qt, QLineEdit, QComboBox, SIGNAL, QListWidgetItem
|
||||||
from PyQt4.Qt import Qt
|
|
||||||
|
|
||||||
from calibre.gui2 import error_dialog
|
from calibre.gui2 import error_dialog
|
||||||
from calibre.gui2.device import device_name_for_plugboards
|
from calibre.gui2.device import device_name_for_plugboards
|
||||||
|
from calibre.gui2.dialogs.template_dialog import TemplateDialog
|
||||||
from calibre.gui2.preferences import ConfigWidgetBase, test_widget
|
from calibre.gui2.preferences import ConfigWidgetBase, test_widget
|
||||||
from calibre.gui2.preferences.plugboard_ui import Ui_Form
|
from calibre.gui2.preferences.plugboard_ui import Ui_Form
|
||||||
from calibre.customize.ui import metadata_writers, device_plugins
|
from calibre.customize.ui import metadata_writers, device_plugins
|
||||||
@ -17,6 +17,27 @@ from calibre.library.save_to_disk import plugboard_any_format_value, \
|
|||||||
plugboard_any_device_value, plugboard_save_to_disk_value
|
plugboard_any_device_value, plugboard_save_to_disk_value
|
||||||
from calibre.utils.formatter import validation_formatter
|
from calibre.utils.formatter import validation_formatter
|
||||||
|
|
||||||
|
|
||||||
|
class LineEditWithTextBox(QLineEdit):
|
||||||
|
|
||||||
|
'''
|
||||||
|
Extend the context menu of a QLineEdit to include more actions.
|
||||||
|
'''
|
||||||
|
|
||||||
|
def contextMenuEvent(self, event):
|
||||||
|
menu = self.createStandardContextMenu()
|
||||||
|
menu.addSeparator()
|
||||||
|
|
||||||
|
action_open_editor = menu.addAction(_('Open Editor'))
|
||||||
|
|
||||||
|
self.connect(action_open_editor, SIGNAL('triggered()'), self.open_editor)
|
||||||
|
menu.exec_(event.globalPos())
|
||||||
|
|
||||||
|
def open_editor(self):
|
||||||
|
t = TemplateDialog(self, self.text())
|
||||||
|
if t.exec_():
|
||||||
|
self.setText(t.textbox.toPlainText())
|
||||||
|
|
||||||
class ConfigWidget(ConfigWidgetBase, Ui_Form):
|
class ConfigWidget(ConfigWidgetBase, Ui_Form):
|
||||||
|
|
||||||
def genesis(self, gui):
|
def genesis(self, gui):
|
||||||
@ -72,10 +93,10 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
|
|||||||
self.source_widgets = []
|
self.source_widgets = []
|
||||||
self.dest_widgets = []
|
self.dest_widgets = []
|
||||||
for i in range(0, len(self.dest_fields)-1):
|
for i in range(0, len(self.dest_fields)-1):
|
||||||
w = QtGui.QLineEdit(self)
|
w = LineEditWithTextBox(self)
|
||||||
self.source_widgets.append(w)
|
self.source_widgets.append(w)
|
||||||
self.fields_layout.addWidget(w, 5+i, 0, 1, 1)
|
self.fields_layout.addWidget(w, 5+i, 0, 1, 1)
|
||||||
w = QtGui.QComboBox(self)
|
w = QComboBox(self)
|
||||||
self.dest_widgets.append(w)
|
self.dest_widgets.append(w)
|
||||||
self.fields_layout.addWidget(w, 5+i, 1, 1, 1)
|
self.fields_layout.addWidget(w, 5+i, 1, 1, 1)
|
||||||
|
|
||||||
@ -297,7 +318,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
|
|||||||
for op in self.current_plugboards[f][d]:
|
for op in self.current_plugboards[f][d]:
|
||||||
ops.append('([' + op[0] + '] -> ' + op[1] + ')')
|
ops.append('([' + op[0] + '] -> ' + op[1] + ')')
|
||||||
txt = '%s:%s = %s\n'%(f, d, ', '.join(ops))
|
txt = '%s:%s = %s\n'%(f, d, ', '.join(ops))
|
||||||
item = QtGui.QListWidgetItem(txt)
|
item = QListWidgetItem(txt)
|
||||||
item.setData(Qt.UserRole, (f, d))
|
item.setData(Qt.UserRole, (f, d))
|
||||||
self.existing_plugboards.addItem(item)
|
self.existing_plugboards.addItem(item)
|
||||||
self.refilling = False
|
self.refilling = False
|
||||||
|
@ -486,7 +486,7 @@ class ResultCache(SearchQueryParser): # {{{
|
|||||||
q = query
|
q = query
|
||||||
|
|
||||||
for id_ in candidates:
|
for id_ in candidates:
|
||||||
item = self._data[id]
|
item = self._data[id_]
|
||||||
if item is None: continue
|
if item is None: continue
|
||||||
|
|
||||||
if col_datatype[loc] == 'bool': # complexity caused by the two-/three-value tweak
|
if col_datatype[loc] == 'bool': # complexity caused by the two-/three-value tweak
|
||||||
|
@ -151,6 +151,8 @@ class CustomColumns(object):
|
|||||||
return v
|
return v
|
||||||
|
|
||||||
def adapt_number(x, d):
|
def adapt_number(x, d):
|
||||||
|
if x is None:
|
||||||
|
return None
|
||||||
if isinstance(x, (str, unicode, bytes)):
|
if isinstance(x, (str, unicode, bytes)):
|
||||||
if x.lower() == 'none':
|
if x.lower() == 'none':
|
||||||
return None
|
return None
|
||||||
|
@ -256,7 +256,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
'pubdate',
|
'pubdate',
|
||||||
'flags',
|
'flags',
|
||||||
'uuid',
|
'uuid',
|
||||||
'has_cover'
|
'has_cover',
|
||||||
|
('au_map', 'authors', 'author', 'aum_sortconcat(link.id, authors.name, authors.sort)')
|
||||||
]
|
]
|
||||||
lines = []
|
lines = []
|
||||||
for col in columns:
|
for col in columns:
|
||||||
@ -273,9 +274,9 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
|
|
||||||
self.FIELD_MAP = {'id':0, 'title':1, 'authors':2, 'timestamp':3,
|
self.FIELD_MAP = {'id':0, 'title':1, 'authors':2, 'timestamp':3,
|
||||||
'size':4, 'rating':5, 'tags':6, 'comments':7, 'series':8,
|
'size':4, 'rating':5, 'tags':6, 'comments':7, 'series':8,
|
||||||
'publisher':9, 'series_index':10,
|
'publisher':9, 'series_index':10, 'sort':11, 'author_sort':12,
|
||||||
'sort':11, 'author_sort':12, 'formats':13, 'isbn':14, 'path':15,
|
'formats':13, 'isbn':14, 'path':15, 'lccn':16, 'pubdate':17,
|
||||||
'lccn':16, 'pubdate':17, 'flags':18, 'uuid':19, 'cover':20}
|
'flags':18, 'uuid':19, 'cover':20, 'au_map':21}
|
||||||
|
|
||||||
for k,v in self.FIELD_MAP.iteritems():
|
for k,v in self.FIELD_MAP.iteritems():
|
||||||
self.field_metadata.set_field_record_index(k, v, prefer_custom=False)
|
self.field_metadata.set_field_record_index(k, v, prefer_custom=False)
|
||||||
@ -687,9 +688,11 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
Convenience method to return metadata as a :class:`Metadata` object.
|
Convenience method to return metadata as a :class:`Metadata` object.
|
||||||
Note that the list of formats is not verified.
|
Note that the list of formats is not verified.
|
||||||
'''
|
'''
|
||||||
|
row = self.data._data[idx] if index_is_id else self.data[idx]
|
||||||
|
fm = self.FIELD_MAP
|
||||||
|
|
||||||
self.gm_count += 1
|
self.gm_count += 1
|
||||||
mi = self.data.get(idx, self.FIELD_MAP['all_metadata'],
|
mi = row[self.FIELD_MAP['all_metadata']]
|
||||||
row_is_id = index_is_id)
|
|
||||||
if mi is not None:
|
if mi is not None:
|
||||||
if get_cover:
|
if get_cover:
|
||||||
# Always get the cover, because the value can be wrong if the
|
# Always get the cover, because the value can be wrong if the
|
||||||
@ -699,44 +702,41 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
|
|
||||||
self.gm_missed += 1
|
self.gm_missed += 1
|
||||||
mi = Metadata(None)
|
mi = Metadata(None)
|
||||||
self.data.set(idx, self.FIELD_MAP['all_metadata'], mi,
|
self.data.set(idx, fm['all_metadata'], mi, row_is_id = index_is_id)
|
||||||
row_is_id = index_is_id)
|
|
||||||
|
|
||||||
aut_list = self.authors_with_sort_strings(idx, index_is_id=index_is_id)
|
aut_list = row[fm['au_map']]
|
||||||
|
aut_list = [p.split(':::') for p in aut_list.split(':#:')]
|
||||||
aum = []
|
aum = []
|
||||||
aus = {}
|
aus = {}
|
||||||
for (author, author_sort) in aut_list:
|
for (author, author_sort) in aut_list:
|
||||||
aum.append(author)
|
aum.append(author)
|
||||||
aus[author] = author_sort
|
aus[author] = author_sort.replace('|', ',')
|
||||||
mi.title = self.title(idx, index_is_id=index_is_id)
|
mi.title = row[fm['title']]
|
||||||
mi.authors = aum
|
mi.authors = aum
|
||||||
mi.author_sort = self.author_sort(idx, index_is_id=index_is_id)
|
mi.author_sort = row[fm['author_sort']]
|
||||||
mi.author_sort_map = aus
|
mi.author_sort_map = aus
|
||||||
mi.comments = self.comments(idx, index_is_id=index_is_id)
|
mi.comments = row[fm['comments']]
|
||||||
mi.publisher = self.publisher(idx, index_is_id=index_is_id)
|
mi.publisher = row[fm['publisher']]
|
||||||
mi.timestamp = self.timestamp(idx, index_is_id=index_is_id)
|
mi.timestamp = row[fm['timestamp']]
|
||||||
mi.pubdate = self.pubdate(idx, index_is_id=index_is_id)
|
mi.pubdate = row[fm['pubdate']]
|
||||||
mi.uuid = self.uuid(idx, index_is_id=index_is_id)
|
mi.uuid = row[fm['uuid']]
|
||||||
mi.title_sort = self.title_sort(idx, index_is_id=index_is_id)
|
mi.title_sort = row[fm['sort']]
|
||||||
mi.formats = self.formats(idx, index_is_id=index_is_id,
|
formats = row[fm['formats']]
|
||||||
verify_formats=False)
|
if not formats:
|
||||||
if hasattr(mi.formats, 'split'):
|
formats = None
|
||||||
mi.formats = mi.formats.split(',')
|
mi.formats = formats
|
||||||
else:
|
tags = row[fm['tags']]
|
||||||
mi.formats = None
|
|
||||||
tags = self.tags(idx, index_is_id=index_is_id)
|
|
||||||
if tags:
|
if tags:
|
||||||
mi.tags = [i.strip() for i in tags.split(',')]
|
mi.tags = [i.strip() for i in tags.split(',')]
|
||||||
mi.series = self.series(idx, index_is_id=index_is_id)
|
mi.series = row[fm['series']]
|
||||||
if mi.series:
|
if mi.series:
|
||||||
mi.series_index = self.series_index(idx, index_is_id=index_is_id)
|
mi.series_index = row[fm['series_index']]
|
||||||
mi.rating = self.rating(idx, index_is_id=index_is_id)
|
mi.rating = row[fm['rating']]
|
||||||
mi.isbn = self.isbn(idx, index_is_id=index_is_id)
|
mi.isbn = row[fm['isbn']]
|
||||||
id = idx if index_is_id else self.id(idx)
|
id = idx if index_is_id else self.id(idx)
|
||||||
mi.application_id = id
|
mi.application_id = id
|
||||||
mi.id = id
|
mi.id = id
|
||||||
for key,meta in self.field_metadata.iteritems():
|
for key, meta in self.field_metadata.custom_iteritems():
|
||||||
if meta['is_custom']:
|
|
||||||
mi.set_user_metadata(key, meta)
|
mi.set_user_metadata(key, meta)
|
||||||
mi.set(key, val=self.get_custom(idx, label=meta['label'],
|
mi.set(key, val=self.get_custom(idx, label=meta['label'],
|
||||||
index_is_id=index_is_id),
|
index_is_id=index_is_id),
|
||||||
@ -877,18 +877,17 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
|
|
||||||
def formats(self, index, index_is_id=False, verify_formats=True):
|
def formats(self, index, index_is_id=False, verify_formats=True):
|
||||||
''' Return available formats as a comma separated list or None if there are no available formats '''
|
''' Return available formats as a comma separated list or None if there are no available formats '''
|
||||||
id = index if index_is_id else self.id(index)
|
id_ = index if index_is_id else self.id(index)
|
||||||
try:
|
formats = self.data.get(id_, self.FIELD_MAP['formats'], row_is_id=True)
|
||||||
formats = self.conn.get('SELECT format FROM data WHERE book=?', (id,))
|
if not formats:
|
||||||
formats = map(lambda x:x[0], formats)
|
|
||||||
except:
|
|
||||||
return None
|
return None
|
||||||
if not verify_formats:
|
if not verify_formats:
|
||||||
return ','.join(formats)
|
return formats
|
||||||
|
formats = formats.split(',')
|
||||||
ans = []
|
ans = []
|
||||||
for format in formats:
|
for fmt in formats:
|
||||||
if self.format_abspath(id, format, index_is_id=True) is not None:
|
if self.format_abspath(id_, fmt, index_is_id=True) is not None:
|
||||||
ans.append(format)
|
ans.append(fmt)
|
||||||
if not ans:
|
if not ans:
|
||||||
return None
|
return None
|
||||||
return ','.join(ans)
|
return ','.join(ans)
|
||||||
@ -1607,6 +1606,10 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
','.join([a.replace(',', '|') for a in authors]),
|
','.join([a.replace(',', '|') for a in authors]),
|
||||||
row_is_id=True)
|
row_is_id=True)
|
||||||
self.data.set(id, self.FIELD_MAP['author_sort'], ss, row_is_id=True)
|
self.data.set(id, self.FIELD_MAP['author_sort'], ss, row_is_id=True)
|
||||||
|
aum = self.authors_with_sort_strings(id, index_is_id=True)
|
||||||
|
self.data.set(id, self.FIELD_MAP['au_map'],
|
||||||
|
':#:'.join([':::'.join((au.replace(',', '|'), aus)) for (au, aus) in aum]),
|
||||||
|
row_is_id=True)
|
||||||
|
|
||||||
def set_authors(self, id, authors, notify=True, commit=True):
|
def set_authors(self, id, authors, notify=True, commit=True):
|
||||||
'''
|
'''
|
||||||
|
@ -180,6 +180,15 @@ class FieldMetadata(dict):
|
|||||||
'search_terms':['author_sort'],
|
'search_terms':['author_sort'],
|
||||||
'is_custom':False,
|
'is_custom':False,
|
||||||
'is_category':False}),
|
'is_category':False}),
|
||||||
|
('au_map', {'table':None,
|
||||||
|
'column':None,
|
||||||
|
'datatype':'text',
|
||||||
|
'is_multiple':',',
|
||||||
|
'kind':'field',
|
||||||
|
'name':None,
|
||||||
|
'search_terms':[],
|
||||||
|
'is_custom':False,
|
||||||
|
'is_category':False}),
|
||||||
('comments', {'table':None,
|
('comments', {'table':None,
|
||||||
'column':None,
|
'column':None,
|
||||||
'datatype':'text',
|
'datatype':'text',
|
||||||
@ -400,6 +409,12 @@ class FieldMetadata(dict):
|
|||||||
for key in self._tb_cats:
|
for key in self._tb_cats:
|
||||||
yield (key, self._tb_cats[key])
|
yield (key, self._tb_cats[key])
|
||||||
|
|
||||||
|
def custom_iteritems(self):
|
||||||
|
for key in self._tb_cats:
|
||||||
|
fm = self._tb_cats[key]
|
||||||
|
if fm['is_custom']:
|
||||||
|
yield (key, self._tb_cats[key])
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
return list(self.iteritems())
|
return list(self.iteritems())
|
||||||
|
|
||||||
|
@ -756,7 +756,7 @@ class BrowseServer(object):
|
|||||||
sort = self.browse_sort_book_list(items, list_sort)
|
sort = self.browse_sort_book_list(items, list_sort)
|
||||||
ids = [x[0] for x in items]
|
ids = [x[0] for x in items]
|
||||||
html = render_book_list(ids, self.opts.url_prefix,
|
html = render_book_list(ids, self.opts.url_prefix,
|
||||||
suffix=_('in search')+': '+query)
|
suffix=_('in search')+': '+xml(query))
|
||||||
return self.browse_template(sort, category=False, initial_search=query).format(
|
return self.browse_template(sort, category=False, initial_search=query).format(
|
||||||
title=_('Matching books'),
|
title=_('Matching books'),
|
||||||
script='booklist();', main=html)
|
script='booklist();', main=html)
|
||||||
|
@ -87,6 +87,23 @@ class SortedConcatenate(object):
|
|||||||
class SafeSortedConcatenate(SortedConcatenate):
|
class SafeSortedConcatenate(SortedConcatenate):
|
||||||
sep = '|'
|
sep = '|'
|
||||||
|
|
||||||
|
class AumSortedConcatenate(object):
|
||||||
|
'''String concatenation aggregator for the author sort map'''
|
||||||
|
def __init__(self):
|
||||||
|
self.ans = {}
|
||||||
|
|
||||||
|
def step(self, ndx, author, sort):
|
||||||
|
if author is not None:
|
||||||
|
self.ans[ndx] = author + ':::' + sort
|
||||||
|
|
||||||
|
def finalize(self):
|
||||||
|
keys = self.ans.keys()
|
||||||
|
if len(keys) == 0:
|
||||||
|
return None
|
||||||
|
if len(keys) == 1:
|
||||||
|
return self.ans[keys[0]]
|
||||||
|
return ':#:'.join([self.ans[v] for v in sorted(keys)])
|
||||||
|
|
||||||
class Connection(sqlite.Connection):
|
class Connection(sqlite.Connection):
|
||||||
|
|
||||||
def get(self, *args, **kw):
|
def get(self, *args, **kw):
|
||||||
@ -155,6 +172,7 @@ class DBThread(Thread):
|
|||||||
c_ext_loaded = load_c_extensions(self.conn)
|
c_ext_loaded = load_c_extensions(self.conn)
|
||||||
self.conn.row_factory = sqlite.Row if self.row_factory else lambda cursor, row : list(row)
|
self.conn.row_factory = sqlite.Row if self.row_factory else lambda cursor, row : list(row)
|
||||||
self.conn.create_aggregate('concat', 1, Concatenate)
|
self.conn.create_aggregate('concat', 1, Concatenate)
|
||||||
|
self.conn.create_aggregate('aum_sortconcat', 3, AumSortedConcatenate)
|
||||||
if not c_ext_loaded:
|
if not c_ext_loaded:
|
||||||
self.conn.create_aggregate('sortconcat', 2, SortedConcatenate)
|
self.conn.create_aggregate('sortconcat', 2, SortedConcatenate)
|
||||||
self.conn.create_aggregate('sort_concat', 2, SafeSortedConcatenate)
|
self.conn.create_aggregate('sort_concat', 2, SafeSortedConcatenate)
|
||||||
|
@ -98,9 +98,10 @@ class _Parser(object):
|
|||||||
m = 'Formatter: ' + message + _(' near ')
|
m = 'Formatter: ' + message + _(' near ')
|
||||||
if self.lex_pos > 0:
|
if self.lex_pos > 0:
|
||||||
m = '{0} {1}'.format(m, self.prog[self.lex_pos-1][1])
|
m = '{0} {1}'.format(m, self.prog[self.lex_pos-1][1])
|
||||||
m = '{0} {1}'.format(m, self.prog[self.lex_pos][1])
|
elif self.lex_pos < len(self.prog):
|
||||||
if self.lex_pos < len(self.prog):
|
|
||||||
m = '{0} {1}'.format(m, self.prog[self.lex_pos+1][1])
|
m = '{0} {1}'.format(m, self.prog[self.lex_pos+1][1])
|
||||||
|
else:
|
||||||
|
m = '{0} {1}'.format(m, _('end of program'))
|
||||||
raise ValueError(m)
|
raise ValueError(m)
|
||||||
|
|
||||||
def token(self):
|
def token(self):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user