Sync to trunk.

This commit is contained in:
John Schember 2011-01-09 15:23:47 -05:00
commit 950fcbee09
18 changed files with 383 additions and 129 deletions

View File

@ -0,0 +1,35 @@
from calibre.web.feeds.news import BasicNewsRecipe
class Cicero(BasicNewsRecipe):
timefmt = ' [%Y-%m-%d]'
title = u'Cicero'
__author__ = 'mad@sharktooth.de'
description = u'Magazin f\xfcr politische Kultur'
oldest_article = 7
language = 'de'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
publisher = 'Ringier Publishing'
category = 'news, politics, Germany'
encoding = 'iso-8859-1'
publication_type = 'magazine'
masthead_url = 'http://www.cicero.de/img2/cicero_logo_rss.gif'
feeds = [
(u'Das gesamte Portfolio', u'http://www.cicero.de/rss/rss.php?ress_id='),
#(u'Alle Heft-Inhalte', u'http://www.cicero.de/rss/rss.php?ress_id=heft'),
#(u'Alle Online-Inhalte', u'http://www.cicero.de/rss/rss.php?ress_id=online'),
#(u'Berliner Republik', u'http://www.cicero.de/rss/rss.php?ress_id=4'),
#(u'Weltb\xfchne', u'http://www.cicero.de/rss/rss.php?ress_id=1'),
#(u'Salon', u'http://www.cicero.de/rss/rss.php?ress_id=7'),
#(u'Kapital', u'http://www.cicero.de/rss/rss.php?ress_id=6'),
#(u'Netzst\xfccke', u'http://www.cicero.de/rss/rss.php?ress_id=9'),
#(u'Leinwand', u'http://www.cicero.de/rss/rss.php?ress_id=12'),
#(u'Bibliothek', u'http://www.cicero.de/rss/rss.php?ress_id=15'),
(u'Kolumne - Alle Kolulmnen', u'http://www.cicero.de/rss/rss2.php?ress_id='),
#(u'Kolumne - Schreiber, Berlin', u'http://www.cicero.de/rss/rss2.php?ress_id=35'),
#(u'Kolumne - TV Kritik', u'http://www.cicero.de/rss/rss2.php?ress_id=34')
]
def print_version(self, url):
return 'http://www.cicero.de/page_print.php?' + url.rpartition('?')[2]

View File

@ -0,0 +1,122 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '08 Januery 2011, desUBIKado'
__author__ = 'desUBIKado'
__description__ = 'Daily newspaper from Biscay'
__version__ = 'v0.08'
__date__ = '08, Januery 2011'
'''
[url]http://www.elcorreo.com/[/url]
'''
import time
import re
from calibre.web.feeds.news import BasicNewsRecipe
class heraldo(BasicNewsRecipe):
__author__ = 'desUBIKado'
description = 'Daily newspaper from Biscay'
title = u'El Correo'
publisher = 'Vocento'
category = 'News, politics, culture, economy, general interest'
oldest_article = 2
delay = 1
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
language = 'es'
timefmt = '[%a, %d %b, %Y]'
encoding = 'iso-8859-1'
remove_empty_feeds = True
remove_javascript = False
feeds = [
(u'Portada', u'http://www.elcorreo.com/vizcaya/portada.xml'),
(u'Local', u'http://www.elcorreo.com/vizcaya/rss/feeds/vizcaya.xml'),
(u'Internacional', u'hhttp://www.elcorreo.com/vizcaya/rss/feeds/internacional.xml'),
(u'Econom\xeda', u'http://www.elcorreo.com/vizcaya/rss/feeds/economia.xml'),
(u'Pol\xedtica', u'http://www.elcorreo.com/vizcaya/rss/feeds/politica.xml'),
(u'Opini\xf3n', u'http://www.elcorreo.com/vizcaya/rss/feeds/opinion.xml'),
(u'Deportes', u'http://www.elcorreo.com/vizcaya/rss/feeds/deportes.xml'),
(u'Sociedad', u'http://www.elcorreo.com/vizcaya/rss/feeds/sociedad.xml'),
(u'Cultura', u'http://www.elcorreo.com/vizcaya/rss/feeds/cultura.xml'),
(u'Televisi\xf3n', u'http://www.elcorreo.com/vizcaya/rss/feeds/television.xml'),
(u'Gente', u'http://www.elcorreo.com/vizcaya/rss/feeds/gente.xml')
]
keep_only_tags = [
dict(name='div', attrs={'class':['grouphead','date','art_head','story-texto','text','colC_articulo','contenido_comentarios']}),
dict(name='div' , attrs={'id':['articulo','story-texto','story-entradilla']})
]
remove_tags = [
dict(name='div', attrs={'class':['art_barra','detalles-opinion','formdenunciar','modulo calculadoras','nubetags','pie']}),
dict(name='div', attrs={'class':['mod_lomas','bloque_lomas','blm_header','link-app3','link-app4','botones_listado']}),
dict(name='div', attrs={'class':['navegacion_galeria','modulocanalpromocion','separa','separacion','compartir','tags_relacionados']}),
dict(name='div', attrs={'class':['moduloBuscadorDeportes','modulo-gente','moddestacadopeq','OpcArt','articulopiniones']}),
dict(name='div', attrs={'class':['modulo-especial','publiEspecial']}),
dict(name='div', attrs={'id':['articulopina']}),
dict(name='br', attrs={'class':'clear'}),
dict(name='form', attrs={'name':'frm_conversor2'})
]
remove_tags_before = dict(name='div' , attrs={'class':'articulo '})
remove_tags_after = dict(name='div' , attrs={'class':'comentarios'})
def get_cover_url(self):
cover = None
st = time.localtime()
year = str(st.tm_year)
month = "%.2d" % st.tm_mon
day = "%.2d" % st.tm_mday
#[url]http://img.kiosko.net/2011/01/02/es/elcorreo.750.jpg[/url]
#[url]http://info.elcorreo.com/pdf/06012011-viz.pdf[/url]
cover='http://info.elcorreo.com/pdf/'+ day + month + year +'-viz.pdf'
br = BasicNewsRecipe.get_browser()
try:
br.open(cover)
except:
self.log("\nPortada no disponible")
cover ='http://www.elcorreo.com/vizcaya/noticias/201002/02/Media/logo-elcorreo-nuevo.png'
return cover
extra_css = '''
h1, .headline {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:30px;}
h2, .subhead {font-family:Arial,Helvetica,sans-serif; font-style:italic; font-weight:normal;font-size:18px;}
h3, .overhead {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:16px;}
h4 {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:16px;}
h5 {font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:16px;}
h6 {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:16px;}
.date,.byline, .photo {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:14px;}
img{margin-bottom: 0.4em}
'''
preprocess_regexps = [
# To present the image of the embedded video
(re.compile(r'var RUTA_IMAGEN', re.DOTALL|re.IGNORECASE), lambda match: '</script><img src'),
(re.compile(r'.jpg";', re.DOTALL|re.IGNORECASE), lambda match: '.jpg">'),
(re.compile(r'var SITIO = "elcorreo";', re.DOTALL|re.IGNORECASE), lambda match: '<SCRIPT TYPE="text/JavaScript"'),
# To separate paragraphs with a blank line
(re.compile(r'<div class="p"', re.DOTALL|re.IGNORECASE), lambda match: '<p></p><div class="p"'),
# To put a blank line between the subtitle and the date and time of the news
(re.compile(r'<div class="date">', re.DOTALL|re.IGNORECASE), lambda match: '<br><div class="date">'),
# To put a blank line between the intro of the embedded videos and the previous text
(re.compile(r'<div class="video"', re.DOTALL|re.IGNORECASE), lambda match: '<br><div class="video"'),
# To view photos from the first when these are presented as a gallery
(re.compile(r'src="/img/shim.gif"', re.DOTALL|re.IGNORECASE), lambda match: ''),
(re.compile(r'rel=', re.DOTALL|re.IGNORECASE), lambda match: 'src='),
# To remove the link of the title
(re.compile(r'<h1 class="headline">\n<a href="', re.DOTALL|re.IGNORECASE), lambda match: '<h1 class="'),
(re.compile(r'</a>\n</h1>', re.DOTALL|re.IGNORECASE), lambda match: '</h1>'),
]

View File

@ -3,29 +3,31 @@ __license__ = 'GPL v3'
__copyright__ = '04 December 2010, desUBIKado' __copyright__ = '04 December 2010, desUBIKado'
__author__ = 'desUBIKado' __author__ = 'desUBIKado'
__description__ = 'Daily newspaper from Aragon' __description__ = 'Daily newspaper from Aragon'
__version__ = 'v0.03' __version__ = 'v0.04'
__date__ = '11, December 2010' __date__ = '6, Januery 2011'
''' '''
[url]http://www.heraldo.es/[/url] [url]http://www.heraldo.es/[/url]
''' '''
import time import time
import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class heraldo(BasicNewsRecipe): class heraldo(BasicNewsRecipe):
__author__ = 'desUBIKado' __author__ = 'desUBIKado'
description = 'Daily newspaper from Aragon' description = 'Daily newspaper from Aragon'
title = u'Heraldo de Aragon' title = u'Heraldo de Aragon'
publisher = 'OJD Nielsen' publisher = 'OJD Nielsen'
category = 'News, politics, culture, economy, general interest' category = 'News, politics, culture, economy, general interest'
language = 'es' language = 'es'
timefmt = '[%a, %d %b, %Y]' timefmt = '[%a, %d %b, %Y]'
oldest_article = 1 oldest_article = 2
delay = 1
max_articles_per_feed = 100 max_articles_per_feed = 100
use_embedded_content = False use_embedded_content = False
remove_javascript = True remove_javascript = True
no_stylesheets = True no_stylesheets = True
recursion = 10
feeds = [ feeds = [
(u'Portadas', u'http://www.heraldo.es/index.php/mod.portadas/mem.rss') (u'Portadas', u'http://www.heraldo.es/index.php/mod.portadas/mem.rss')
@ -37,29 +39,39 @@ class heraldo(BasicNewsRecipe):
remove_tags = [dict(name='a', attrs={'class':['com flo-r','enl-if','enl-df']}), remove_tags = [dict(name='a', attrs={'class':['com flo-r','enl-if','enl-df']}),
dict(name='div', attrs={'class':['brb-b-s con marg-btt','cnt-rel con']}), dict(name='div', attrs={'class':['brb-b-s con marg-btt','cnt-rel con']}),
dict(name='form', attrs={'class':'form'})] dict(name='form', attrs={'class':'form'}),
dict(name='ul', attrs={'id':['cont-tags','pag-1']})]
remove_tags_before = dict(name='div' , attrs={'id':'dts'}) remove_tags_before = dict(name='div' , attrs={'id':'dts'})
remove_tags_after = dict(name='div' , attrs={'id':'com'}) remove_tags_after = dict(name='div' , attrs={'id':'com'})
def get_cover_url(self): def get_cover_url(self):
cover = None cover = None
st = time.localtime() st = time.localtime()
year = str(st.tm_year) year = str(st.tm_year)
month = "%.2d" % st.tm_mon month = "%.2d" % st.tm_mon
day = "%.2d" % st.tm_mday day = "%.2d" % st.tm_mday
#[url]http://oldorigin-www.heraldo.es/20101211/primeras/portada_aragon.pdf[/url] #[url]http://oldorigin-www.heraldo.es/20101211/primeras/portada_aragon.pdf[/url]
cover='http://oldorigin-www.heraldo.es/'+ year + month + day +'/primeras/portada_aragon.pdf' cover='http://oldorigin-www.heraldo.es/'+ year + month + day +'/primeras/portada_aragon.pdf'
br = BasicNewsRecipe.get_browser() br = BasicNewsRecipe.get_browser()
try: try:
br.open(cover) br.open(cover)
except: except:
self.log("\nPortada no disponible") self.log("\nPortada no disponible")
cover ='http://www.heraldo.es/MODULOS/global/publico/interfaces/img/logo-Heraldo.png' cover ='http://www.heraldo.es/MODULOS/global/publico/interfaces/img/logo-Heraldo.png'
return cover return cover
extra_css = ''' extra_css = '''
h2{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:xx-large;} .con strong{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:16px;}
''' .con h2{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:30px;}
.con span{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:12px;}
.ent {font-family:Arial,Helvetica,sans-serif; font-weight:normal; font-style:italic; font-size:18px;}
img{margin-bottom: 0.4em}
'''
preprocess_regexps = [
# To separate the comments with a blank line
(re.compile(r'<div id="com"', re.DOTALL|re.IGNORECASE), lambda match: '<br><div id="com"')
]

View File

@ -117,7 +117,6 @@ if iswindows:
poppler_inc_dirs = consolidate('POPPLER_INC_DIR', poppler_inc_dirs = consolidate('POPPLER_INC_DIR',
r'%s\poppler;%s'%(sw_inc_dir, sw_inc_dir)) r'%s\poppler;%s'%(sw_inc_dir, sw_inc_dir))
popplerqt4_inc_dirs = poppler_inc_dirs + [poppler_inc_dirs[1]+r'\qt4']
poppler_lib_dirs = consolidate('POPPLER_LIB_DIR', sw_lib_dir) poppler_lib_dirs = consolidate('POPPLER_LIB_DIR', sw_lib_dir)
popplerqt4_lib_dirs = poppler_lib_dirs popplerqt4_lib_dirs = poppler_lib_dirs
poppler_libs = ['poppler'] poppler_libs = ['poppler']
@ -131,7 +130,6 @@ elif isosx:
fc_lib = '/sw/lib' fc_lib = '/sw/lib'
poppler_inc_dirs = consolidate('POPPLER_INC_DIR', poppler_inc_dirs = consolidate('POPPLER_INC_DIR',
'/sw/build/poppler-0.14.5/poppler:/sw/build/poppler-0.14.5') '/sw/build/poppler-0.14.5/poppler:/sw/build/poppler-0.14.5')
popplerqt4_inc_dirs = poppler_inc_dirs + [poppler_inc_dirs[0]+'/qt4']
poppler_lib_dirs = consolidate('POPPLER_LIB_DIR', poppler_lib_dirs = consolidate('POPPLER_LIB_DIR',
'/sw/lib') '/sw/lib')
poppler_libs = ['poppler'] poppler_libs = ['poppler']
@ -150,9 +148,6 @@ else:
# Include directories # Include directories
poppler_inc_dirs = pkgconfig_include_dirs('poppler', poppler_inc_dirs = pkgconfig_include_dirs('poppler',
'POPPLER_INC_DIR', '/usr/include/poppler') 'POPPLER_INC_DIR', '/usr/include/poppler')
popplerqt4_inc_dirs = pkgconfig_include_dirs('poppler-qt4', '', '')
if not popplerqt4_inc_dirs:
popplerqt4_inc_dirs = poppler_inc_dirs + [poppler_inc_dirs[0]+'/qt4']
png_inc_dirs = pkgconfig_include_dirs('libpng', 'PNG_INC_DIR', png_inc_dirs = pkgconfig_include_dirs('libpng', 'PNG_INC_DIR',
'/usr/include') '/usr/include')
magick_inc_dirs = pkgconfig_include_dirs('MagickWand', 'MAGICK_INC', '/usr/include/ImageMagick') magick_inc_dirs = pkgconfig_include_dirs('MagickWand', 'MAGICK_INC', '/usr/include/ImageMagick')
@ -187,20 +182,17 @@ if not poppler_inc_dirs or not os.path.exists(
poppler_error = \ poppler_error = \
('Poppler not found on your system. Various PDF related', ('Poppler not found on your system. Various PDF related',
' functionality will not work. Use the POPPLER_INC_DIR and', ' functionality will not work. Use the POPPLER_INC_DIR and',
' POPPLER_LIB_DIR environment variables.') ' POPPLER_LIB_DIR environment variables. calibre requires '
' the poppler XPDF headers. If your distro does not '
popplerqt4_error = None ' include them you will have to re-compile poppler '
if not popplerqt4_inc_dirs or not os.path.exists( ' by hand with --enable-xpdf-headers')
os.path.join(popplerqt4_inc_dirs[-1], 'poppler-qt4.h')):
popplerqt4_error = \
('Poppler Qt4 bindings not found on your system.')
magick_error = None magick_error = None
if not magick_inc_dirs or not os.path.exists(os.path.join(magick_inc_dirs[0], if not magick_inc_dirs or not os.path.exists(os.path.join(magick_inc_dirs[0],
'wand')): 'wand')):
magick_error = ('ImageMagick not found on your system. ' magick_error = ('ImageMagick not found on your system. '
'Try setting the environment variables MAGICK_INC ' 'Try setting the environment variables MAGICK_INC '
'and MAGICK_LIB to help calibre locate the inclue and libbrary ' 'and MAGICK_LIB to help calibre locate the include and library '
'files.') 'files.')
podofo_lib = os.environ.get('PODOFO_LIB_DIR', podofo_lib) podofo_lib = os.environ.get('PODOFO_LIB_DIR', podofo_lib)

View File

@ -79,7 +79,7 @@ class DocAnalysis(object):
elif format == 'spanned_html': elif format == 'spanned_html':
linere = re.compile('(?<=<span).*?(?=</span>)', re.DOTALL) linere = re.compile('(?<=<span).*?(?=</span>)', re.DOTALL)
elif format == 'txt': elif format == 'txt':
linere = re.compile('.*?\n', re.DOTALL) linere = re.compile('.*?\n')
self.lines = linere.findall(raw) self.lines = linere.findall(raw)
def line_length(self, percent): def line_length(self, percent):
@ -177,7 +177,7 @@ class Dehyphenator(object):
def __init__(self): def __init__(self):
# Add common suffixes to the regex below to increase the likelihood of a match - # Add common suffixes to the regex below to increase the likelihood of a match -
# don't add suffixes which are also complete words, such as 'able' or 'sex' # don't add suffixes which are also complete words, such as 'able' or 'sex'
self.removesuffixes = re.compile(r"((ed)?ly|('e)?s|a?(t|s)?ion(s|al(ly)?)?|ings?|er|(i)?ous|(i|a)ty|(it)?ies|ive|gence|istic(ally)?|(e|a)nce|ment(s)?|ism|ated|(e|u)ct(ed)?|ed|(i|ed)?ness|(e|a)ncy|ble|ier|al|ex)$", re.IGNORECASE) self.removesuffixes = re.compile(r"((ed)?ly|('e)?s|a?(t|s)?ion(s|al(ly)?)?|ings?|er|(i)?ous|(i|a)ty|(it)?ies|ive|gence|istic(ally)?|(e|a)nce|m?ents?|ism|ated|(e|u)ct(ed)?|ed|(i|ed)?ness|(e|a)ncy|ble|ier|al|ex|ian)$", re.IGNORECASE)
# remove prefixes if the prefix was not already the point of hyphenation # remove prefixes if the prefix was not already the point of hyphenation
self.prefixes = re.compile(r'^(dis|re|un|in|ex)$', re.IGNORECASE) self.prefixes = re.compile(r'^(dis|re|un|in|ex)$', re.IGNORECASE)
self.removeprefix = re.compile(r'^(dis|re|un|in|ex)', re.IGNORECASE) self.removeprefix = re.compile(r'^(dis|re|un|in|ex)', re.IGNORECASE)
@ -199,7 +199,7 @@ class Dehyphenator(object):
searchresult = self.html.find(lookupword.lower()) searchresult = self.html.find(lookupword.lower())
except: except:
return hyphenated return hyphenated
if self.format == 'html_cleanup': if self.format == 'html_cleanup' or self.format == 'txt_cleanup':
if self.html.find(lookupword) != -1 or searchresult != -1: if self.html.find(lookupword) != -1 or searchresult != -1:
#print "Cleanup:returned dehyphenated word: " + str(dehyphenated) #print "Cleanup:returned dehyphenated word: " + str(dehyphenated)
return dehyphenated return dehyphenated
@ -225,10 +225,15 @@ class Dehyphenator(object):
intextmatch = re.compile(u'(?<=.{%i})(?P<firstpart>[^\[\]\\\^\$\.\|\?\*\+\(\)“"\s>]+)(-|)\s*(?=<)(?P<wraptags>(</span>)?\s*(</[iubp]>\s*){1,2}(?P<up2threeblanks><(p|div)[^>]*>\s*(<p[^>]*>\s*</p>\s*)?</(p|div)>\s+){0,3}\s*(<[iubp][^>]*>\s*){1,2}(<span[^>]*>)?)\s*(?P<secondpart>[\w\d]+)' % length) intextmatch = re.compile(u'(?<=.{%i})(?P<firstpart>[^\[\]\\\^\$\.\|\?\*\+\(\)“"\s>]+)(-|)\s*(?=<)(?P<wraptags>(</span>)?\s*(</[iubp]>\s*){1,2}(?P<up2threeblanks><(p|div)[^>]*>\s*(<p[^>]*>\s*</p>\s*)?</(p|div)>\s+){0,3}\s*(<[iubp][^>]*>\s*){1,2}(<span[^>]*>)?)\s*(?P<secondpart>[\w\d]+)' % length)
elif format == 'pdf': elif format == 'pdf':
intextmatch = re.compile(u'(?<=.{%i})(?P<firstpart>[^\[\]\\\^\$\.\|\?\*\+\(\)“"\s>]+)(-|)\s*(?P<wraptags><p>|</[iub]>\s*<p>\s*<[iub]>)\s*(?P<secondpart>[\w\d]+)'% length) intextmatch = re.compile(u'(?<=.{%i})(?P<firstpart>[^\[\]\\\^\$\.\|\?\*\+\(\)“"\s>]+)(-|)\s*(?P<wraptags><p>|</[iub]>\s*<p>\s*<[iub]>)\s*(?P<secondpart>[\w\d]+)'% length)
elif format == 'txt':
intextmatch = re.compile(u'(?<=.{%i})(?P<firstpart>[^\[\]\\\^\$\.\|\?\*\+\(\)“"\s>]+)(-|)(\u0020|\u0009)*(?P<wraptags>(\n(\u0020|\u0009)*)+)(?P<secondpart>[\w\d]+)'% length)
elif format == 'individual_words': elif format == 'individual_words':
intextmatch = re.compile(u'>[^<]*\b(?P<firstpart>[^\[\]\\\^\$\.\|\?\*\+\(\)"\s>]+)(-|)(?P<secondpart)\w+)\b[^<]*<') # for later, not called anywhere yet intextmatch = re.compile(u'>[^<]*\b(?P<firstpart>[^\[\]\\\^\$\.\|\?\*\+\(\)"\s>]+)(-|)\u0020*(?P<secondpart>\w+)\b[^<]*<') # for later, not called anywhere yet
elif format == 'html_cleanup': elif format == 'html_cleanup':
intextmatch = re.compile(u'(?P<firstpart>[^\[\]\\\^\$\.\|\?\*\+\(\)“"\s>]+)(-|)\s*(?=<)(?P<wraptags></span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*(?P<secondpart>[\w\d]+)') intextmatch = re.compile(u'(?P<firstpart>[^\[\]\\\^\$\.\|\?\*\+\(\)“"\s>]+)(-|)\s*(?=<)(?P<wraptags></span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*(?P<secondpart>[\w\d]+)')
elif format == 'txt_cleanup':
intextmatch = re.compile(u'(?P<firstpart>\w+)(-|)(?P<wraptags>\s+)(?P<secondpart>[\w\d]+)')
html = intextmatch.sub(self.dehyphenate, html) html = intextmatch.sub(self.dehyphenate, html)
return html return html

View File

@ -190,7 +190,7 @@ class PreProcessor(object):
line_ending = "\s*</(span|p|div)>\s*(</(p|span|div)>)?" line_ending = "\s*</(span|p|div)>\s*(</(p|span|div)>)?"
blanklines = "\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*" blanklines = "\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*"
line_opening = "<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*" line_opening = "<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*"
txt_line_wrap = u"(\u0020|\u0009)*\n" txt_line_wrap = u"((\u0020|\u0009)*\n){1,4}"
unwrap_regex = lookahead+line_ending+blanklines+line_opening unwrap_regex = lookahead+line_ending+blanklines+line_opening
if format == 'txt': if format == 'txt':
@ -357,6 +357,6 @@ class PreProcessor(object):
html = blankreg.sub('\n'+r'\g<openline>'+u'\u00a0'+r'\g<closeline>', html) html = blankreg.sub('\n'+r'\g<openline>'+u'\u00a0'+r'\g<closeline>', html)
# Center separator lines # Center separator lines
html = re.sub(u'<p>\s*(?P<break>([*#•]+\s*)+)\s*</p>', '<p style="text-align:center">' + '\g<break>' + '</p>', html) html = re.sub(u'<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*(?P<break>([*#•]+\s*)+)\s*(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)>)?\s*</(?P=outer)>', '<p style="text-align:center">' + '\g<break>' + '</p>', html)
return html return html

View File

@ -324,14 +324,16 @@ class Metadata(object):
if metadata is None: if metadata is None:
traceback.print_stack() traceback.print_stack()
return return
metadata = copy.deepcopy(metadata) m = {}
if '#value#' not in metadata: for k in metadata:
if metadata['datatype'] == 'text' and metadata['is_multiple']: m[k] = copy.copy(metadata[k])
metadata['#value#'] = [] if '#value#' not in m:
if m['datatype'] == 'text' and m['is_multiple']:
m['#value#'] = []
else: else:
metadata['#value#'] = None m['#value#'] = None
_data = object.__getattribute__(self, '_data') _data = object.__getattribute__(self, '_data')
_data['user_metadata'][field] = metadata _data['user_metadata'][field] = m
def template_to_attribute(self, other, ops): def template_to_attribute(self, other, ops):
''' '''

View File

@ -7,11 +7,12 @@ __docformat__ = 'restructuredtext en'
import os import os
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator
from calibre.ebooks.chardet import detect from calibre.ebooks.chardet import detect
from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \ from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \
separate_paragraphs_single_line, separate_paragraphs_print_formatted, \ separate_paragraphs_single_line, separate_paragraphs_print_formatted, \
preserve_spaces, detect_paragraph_type, detect_formatting_type, \ preserve_spaces, detect_paragraph_type, detect_formatting_type, \
convert_heuristic convert_heuristic, normalize_line_endings
from calibre import _ent_pat, xml_entity_to_unicode from calibre import _ent_pat, xml_entity_to_unicode
class TXTInput(InputFormatPlugin): class TXTInput(InputFormatPlugin):
@ -23,7 +24,7 @@ class TXTInput(InputFormatPlugin):
options = set([ options = set([
OptionRecommendation(name='paragraph_type', recommended_value='auto', OptionRecommendation(name='paragraph_type', recommended_value='auto',
choices=['auto', 'block', 'single', 'print'], choices=['auto', 'block', 'single', 'print', 'unformatted'],
help=_('Paragraph structure.\n' help=_('Paragraph structure.\n'
'choices are [\'auto\', \'block\', \'single\', \'print\', \'unformatted\']\n' 'choices are [\'auto\', \'block\', \'single\', \'print\', \'unformatted\']\n'
'* auto: Try to auto detect paragraph type.\n' '* auto: Try to auto detect paragraph type.\n'
@ -31,7 +32,7 @@ class TXTInput(InputFormatPlugin):
'* single: Assume every line is a paragraph.\n' '* single: Assume every line is a paragraph.\n'
'* print: Assume every line starting with 2+ spaces or a tab ' '* print: Assume every line starting with 2+ spaces or a tab '
'starts a paragraph.' 'starts a paragraph.'
'* unformatted: Most lines have hard line breaks, few/no spaces or indents.')), '* unformatted: Most lines have hard line breaks, few/no blank lines or indents.')),
OptionRecommendation(name='formatting_type', recommended_value='auto', OptionRecommendation(name='formatting_type', recommended_value='auto',
choices=['auto', 'none', 'heuristic', 'markdown'], choices=['auto', 'none', 'heuristic', 'markdown'],
help=_('Formatting used within the document.' help=_('Formatting used within the document.'
@ -73,6 +74,13 @@ class TXTInput(InputFormatPlugin):
if options.preserve_spaces: if options.preserve_spaces:
txt = preserve_spaces(txt) txt = preserve_spaces(txt)
# Normalize line endings
txt = normalize_line_endings(txt)
# Get length for hyphen removal and punctuation unwrap
docanalysis = DocAnalysis('txt', txt)
length = docanalysis.line_length(.5)
if options.formatting_type == 'auto': if options.formatting_type == 'auto':
options.formatting_type = detect_formatting_type(txt) options.formatting_type = detect_formatting_type(txt)
@ -93,8 +101,13 @@ class TXTInput(InputFormatPlugin):
else: else:
log.debug('Auto detected paragraph type as %s' % options.paragraph_type) log.debug('Auto detected paragraph type as %s' % options.paragraph_type)
# Dehyphenate
dehyphenator = Dehyphenator()
txt = dehyphenator(txt,'txt', length)
# We don't check for block because the processor assumes block. # We don't check for block because the processor assumes block.
# single and print at transformed to block for processing. # single and print at transformed to block for processing.
if options.paragraph_type == 'single' or options.paragraph_type == 'unformatted': if options.paragraph_type == 'single' or options.paragraph_type == 'unformatted':
txt = separate_paragraphs_single_line(txt) txt = separate_paragraphs_single_line(txt)
elif options.paragraph_type == 'print': elif options.paragraph_type == 'print':
@ -102,10 +115,8 @@ class TXTInput(InputFormatPlugin):
if options.paragraph_type == 'unformatted': if options.paragraph_type == 'unformatted':
from calibre.ebooks.conversion.utils import PreProcessor from calibre.ebooks.conversion.utils import PreProcessor
from calibre.ebooks.conversion.preprocess import DocAnalysis
# get length # get length
docanalysis = DocAnalysis('txt', txt)
length = docanalysis.line_length(.5)
# unwrap lines based on punctuation # unwrap lines based on punctuation
preprocessor = PreProcessor(options, log=getattr(self, 'log', None)) preprocessor = PreProcessor(options, log=getattr(self, 'log', None))
txt = preprocessor.punctuation_unwrap(length, txt, 'txt') txt = preprocessor.punctuation_unwrap(length, txt, 'txt')
@ -117,6 +128,10 @@ class TXTInput(InputFormatPlugin):
else: else:
html = convert_basic(txt, epub_split_size_kb=flow_size) html = convert_basic(txt, epub_split_size_kb=flow_size)
# Dehyphenate in cleanup mode for missed txt and markdown conversion
dehyphenator = Dehyphenator()
html = dehyphenator(html,'txt_cleanup', length)
html = dehyphenator(html,'html_cleanup', length)
from calibre.customize.ui import plugin_for_input_format from calibre.customize.ui import plugin_for_input_format
html_input = plugin_for_input_format('html') html_input = plugin_for_input_format('html')

View File

@ -80,9 +80,12 @@ def convert_markdown(txt, title='', disable_toc=False):
safe_mode=False) safe_mode=False)
return HTML_TEMPLATE % (title, md.convert(txt)) return HTML_TEMPLATE % (title, md.convert(txt))
def separate_paragraphs_single_line(txt): def normalize_line_endings(txt):
txt = txt.replace('\r\n', '\n') txt = txt.replace('\r\n', '\n')
txt = txt.replace('\r', '\n') txt = txt.replace('\r', '\n')
return txt
def separate_paragraphs_single_line(txt):
txt = re.sub(u'(?<=.)\n(?=.)', '\n\n', txt) txt = re.sub(u'(?<=.)\n(?=.)', '\n\n', txt)
return txt return txt
@ -117,7 +120,7 @@ def detect_paragraph_type(txt):
single: Each line is a paragraph. single: Each line is a paragraph.
print: Each paragraph starts with a 2+ spaces or a tab print: Each paragraph starts with a 2+ spaces or a tab
and ends when a new paragraph is reached. and ends when a new paragraph is reached.
unformatted: most lines have hard line breaks, few/no spaces or indents unformatted: most lines have hard line breaks, few/no blank lines or indents
returns block, single, print, unformatted returns block, single, print, unformatted
''' '''
@ -130,14 +133,20 @@ def detect_paragraph_type(txt):
hardbreaks = docanalysis.line_histogram(.55) hardbreaks = docanalysis.line_histogram(.55)
if hardbreaks: if hardbreaks:
# Check for print # Determine print percentage
tab_line_count = len(re.findall('(?mu)^(\t|\s{2,}).+$', txt)) tab_line_count = len(re.findall('(?mu)^(\t|\s{2,}).+$', txt))
if tab_line_count / float(txt_line_count) >= .15: print_percent = tab_line_count / float(txt_line_count)
return 'print'
# Check for block # Determine block percentage
empty_line_count = len(re.findall('(?mu)^\s*$', txt)) empty_line_count = len(re.findall('(?mu)^\s*$', txt))
if empty_line_count / float(txt_line_count) >= .15: block_percent = empty_line_count / float(txt_line_count)
# Compare the two types - the type with the larger number of instances wins
# in cases where only one or the other represents the vast majority of the document neither wins
if print_percent >= block_percent:
if .15 <= print_percent <= .75:
return 'print'
elif .15 <= block_percent <= .75:
return 'block' return 'block'
# Assume unformatted text with hardbreaks if nothing else matches # Assume unformatted text with hardbreaks if nothing else matches

View File

@ -1266,8 +1266,8 @@ class DeviceMixin(object): # {{{
# Force a reset if the caches are not initialized # Force a reset if the caches are not initialized
if reset or not hasattr(self, 'db_book_title_cache'): if reset or not hasattr(self, 'db_book_title_cache'):
# Build a cache (map) of the library, so the search isn't On**2 # Build a cache (map) of the library, so the search isn't On**2
self.db_book_title_cache = {} db_book_title_cache = {}
self.db_book_uuid_cache = {} db_book_uuid_cache = {}
# It might be possible to get here without having initialized the # It might be possible to get here without having initialized the
# library view. In this case, simply give up # library view. In this case, simply give up
try: try:
@ -1278,8 +1278,8 @@ class DeviceMixin(object): # {{{
for id in db.data.iterallids(): for id in db.data.iterallids():
mi = db.get_metadata(id, index_is_id=True) mi = db.get_metadata(id, index_is_id=True)
title = clean_string(mi.title) title = clean_string(mi.title)
if title not in self.db_book_title_cache: if title not in db_book_title_cache:
self.db_book_title_cache[title] = \ db_book_title_cache[title] = \
{'authors':{}, 'author_sort':{}, 'db_ids':{}} {'authors':{}, 'author_sort':{}, 'db_ids':{}}
# If there are multiple books in the library with the same title # If there are multiple books in the library with the same title
# and author, then remember the last one. That is OK, because as # and author, then remember the last one. That is OK, because as
@ -1287,12 +1287,14 @@ class DeviceMixin(object): # {{{
# as another. # as another.
if mi.authors: if mi.authors:
authors = clean_string(authors_to_string(mi.authors)) authors = clean_string(authors_to_string(mi.authors))
self.db_book_title_cache[title]['authors'][authors] = mi db_book_title_cache[title]['authors'][authors] = mi
if mi.author_sort: if mi.author_sort:
aus = clean_string(mi.author_sort) aus = clean_string(mi.author_sort)
self.db_book_title_cache[title]['author_sort'][aus] = mi db_book_title_cache[title]['author_sort'][aus] = mi
self.db_book_title_cache[title]['db_ids'][mi.application_id] = mi db_book_title_cache[title]['db_ids'][mi.application_id] = mi
self.db_book_uuid_cache[mi.uuid] = mi db_book_uuid_cache[mi.uuid] = mi
self.db_book_title_cache = db_book_title_cache
self.db_book_uuid_cache = db_book_uuid_cache
# Now iterate through all the books on the device, setting the # Now iterate through all the books on the device, setting the
# in_library field. If the UUID matches a book in the library, then # in_library field. If the UUID matches a book in the library, then

View File

@ -5,11 +5,11 @@ __license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
from PyQt4 import QtGui from PyQt4.Qt import Qt, QLineEdit, QComboBox, SIGNAL, QListWidgetItem
from PyQt4.Qt import Qt
from calibre.gui2 import error_dialog from calibre.gui2 import error_dialog
from calibre.gui2.device import device_name_for_plugboards from calibre.gui2.device import device_name_for_plugboards
from calibre.gui2.dialogs.template_dialog import TemplateDialog
from calibre.gui2.preferences import ConfigWidgetBase, test_widget from calibre.gui2.preferences import ConfigWidgetBase, test_widget
from calibre.gui2.preferences.plugboard_ui import Ui_Form from calibre.gui2.preferences.plugboard_ui import Ui_Form
from calibre.customize.ui import metadata_writers, device_plugins from calibre.customize.ui import metadata_writers, device_plugins
@ -17,6 +17,27 @@ from calibre.library.save_to_disk import plugboard_any_format_value, \
plugboard_any_device_value, plugboard_save_to_disk_value plugboard_any_device_value, plugboard_save_to_disk_value
from calibre.utils.formatter import validation_formatter from calibre.utils.formatter import validation_formatter
class LineEditWithTextBox(QLineEdit):
'''
Extend the context menu of a QLineEdit to include more actions.
'''
def contextMenuEvent(self, event):
menu = self.createStandardContextMenu()
menu.addSeparator()
action_open_editor = menu.addAction(_('Open Editor'))
self.connect(action_open_editor, SIGNAL('triggered()'), self.open_editor)
menu.exec_(event.globalPos())
def open_editor(self):
t = TemplateDialog(self, self.text())
if t.exec_():
self.setText(t.textbox.toPlainText())
class ConfigWidget(ConfigWidgetBase, Ui_Form): class ConfigWidget(ConfigWidgetBase, Ui_Form):
def genesis(self, gui): def genesis(self, gui):
@ -72,10 +93,10 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
self.source_widgets = [] self.source_widgets = []
self.dest_widgets = [] self.dest_widgets = []
for i in range(0, len(self.dest_fields)-1): for i in range(0, len(self.dest_fields)-1):
w = QtGui.QLineEdit(self) w = LineEditWithTextBox(self)
self.source_widgets.append(w) self.source_widgets.append(w)
self.fields_layout.addWidget(w, 5+i, 0, 1, 1) self.fields_layout.addWidget(w, 5+i, 0, 1, 1)
w = QtGui.QComboBox(self) w = QComboBox(self)
self.dest_widgets.append(w) self.dest_widgets.append(w)
self.fields_layout.addWidget(w, 5+i, 1, 1, 1) self.fields_layout.addWidget(w, 5+i, 1, 1, 1)
@ -297,7 +318,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
for op in self.current_plugboards[f][d]: for op in self.current_plugboards[f][d]:
ops.append('([' + op[0] + '] -> ' + op[1] + ')') ops.append('([' + op[0] + '] -> ' + op[1] + ')')
txt = '%s:%s = %s\n'%(f, d, ', '.join(ops)) txt = '%s:%s = %s\n'%(f, d, ', '.join(ops))
item = QtGui.QListWidgetItem(txt) item = QListWidgetItem(txt)
item.setData(Qt.UserRole, (f, d)) item.setData(Qt.UserRole, (f, d))
self.existing_plugboards.addItem(item) self.existing_plugboards.addItem(item)
self.refilling = False self.refilling = False

View File

@ -486,7 +486,7 @@ class ResultCache(SearchQueryParser): # {{{
q = query q = query
for id_ in candidates: for id_ in candidates:
item = self._data[id] item = self._data[id_]
if item is None: continue if item is None: continue
if col_datatype[loc] == 'bool': # complexity caused by the two-/three-value tweak if col_datatype[loc] == 'bool': # complexity caused by the two-/three-value tweak

View File

@ -151,6 +151,8 @@ class CustomColumns(object):
return v return v
def adapt_number(x, d): def adapt_number(x, d):
if x is None:
return None
if isinstance(x, (str, unicode, bytes)): if isinstance(x, (str, unicode, bytes)):
if x.lower() == 'none': if x.lower() == 'none':
return None return None

View File

@ -256,7 +256,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
'pubdate', 'pubdate',
'flags', 'flags',
'uuid', 'uuid',
'has_cover' 'has_cover',
('au_map', 'authors', 'author', 'aum_sortconcat(link.id, authors.name, authors.sort)')
] ]
lines = [] lines = []
for col in columns: for col in columns:
@ -273,9 +274,9 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
self.FIELD_MAP = {'id':0, 'title':1, 'authors':2, 'timestamp':3, self.FIELD_MAP = {'id':0, 'title':1, 'authors':2, 'timestamp':3,
'size':4, 'rating':5, 'tags':6, 'comments':7, 'series':8, 'size':4, 'rating':5, 'tags':6, 'comments':7, 'series':8,
'publisher':9, 'series_index':10, 'publisher':9, 'series_index':10, 'sort':11, 'author_sort':12,
'sort':11, 'author_sort':12, 'formats':13, 'isbn':14, 'path':15, 'formats':13, 'isbn':14, 'path':15, 'lccn':16, 'pubdate':17,
'lccn':16, 'pubdate':17, 'flags':18, 'uuid':19, 'cover':20} 'flags':18, 'uuid':19, 'cover':20, 'au_map':21}
for k,v in self.FIELD_MAP.iteritems(): for k,v in self.FIELD_MAP.iteritems():
self.field_metadata.set_field_record_index(k, v, prefer_custom=False) self.field_metadata.set_field_record_index(k, v, prefer_custom=False)
@ -687,9 +688,11 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
Convenience method to return metadata as a :class:`Metadata` object. Convenience method to return metadata as a :class:`Metadata` object.
Note that the list of formats is not verified. Note that the list of formats is not verified.
''' '''
row = self.data._data[idx] if index_is_id else self.data[idx]
fm = self.FIELD_MAP
self.gm_count += 1 self.gm_count += 1
mi = self.data.get(idx, self.FIELD_MAP['all_metadata'], mi = row[self.FIELD_MAP['all_metadata']]
row_is_id = index_is_id)
if mi is not None: if mi is not None:
if get_cover: if get_cover:
# Always get the cover, because the value can be wrong if the # Always get the cover, because the value can be wrong if the
@ -699,49 +702,46 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
self.gm_missed += 1 self.gm_missed += 1
mi = Metadata(None) mi = Metadata(None)
self.data.set(idx, self.FIELD_MAP['all_metadata'], mi, self.data.set(idx, fm['all_metadata'], mi, row_is_id = index_is_id)
row_is_id = index_is_id)
aut_list = self.authors_with_sort_strings(idx, index_is_id=index_is_id) aut_list = row[fm['au_map']]
aut_list = [p.split(':::') for p in aut_list.split(':#:')]
aum = [] aum = []
aus = {} aus = {}
for (author, author_sort) in aut_list: for (author, author_sort) in aut_list:
aum.append(author) aum.append(author)
aus[author] = author_sort aus[author] = author_sort.replace('|', ',')
mi.title = self.title(idx, index_is_id=index_is_id) mi.title = row[fm['title']]
mi.authors = aum mi.authors = aum
mi.author_sort = self.author_sort(idx, index_is_id=index_is_id) mi.author_sort = row[fm['author_sort']]
mi.author_sort_map = aus mi.author_sort_map = aus
mi.comments = self.comments(idx, index_is_id=index_is_id) mi.comments = row[fm['comments']]
mi.publisher = self.publisher(idx, index_is_id=index_is_id) mi.publisher = row[fm['publisher']]
mi.timestamp = self.timestamp(idx, index_is_id=index_is_id) mi.timestamp = row[fm['timestamp']]
mi.pubdate = self.pubdate(idx, index_is_id=index_is_id) mi.pubdate = row[fm['pubdate']]
mi.uuid = self.uuid(idx, index_is_id=index_is_id) mi.uuid = row[fm['uuid']]
mi.title_sort = self.title_sort(idx, index_is_id=index_is_id) mi.title_sort = row[fm['sort']]
mi.formats = self.formats(idx, index_is_id=index_is_id, formats = row[fm['formats']]
verify_formats=False) if not formats:
if hasattr(mi.formats, 'split'): formats = None
mi.formats = mi.formats.split(',') mi.formats = formats
else: tags = row[fm['tags']]
mi.formats = None
tags = self.tags(idx, index_is_id=index_is_id)
if tags: if tags:
mi.tags = [i.strip() for i in tags.split(',')] mi.tags = [i.strip() for i in tags.split(',')]
mi.series = self.series(idx, index_is_id=index_is_id) mi.series = row[fm['series']]
if mi.series: if mi.series:
mi.series_index = self.series_index(idx, index_is_id=index_is_id) mi.series_index = row[fm['series_index']]
mi.rating = self.rating(idx, index_is_id=index_is_id) mi.rating = row[fm['rating']]
mi.isbn = self.isbn(idx, index_is_id=index_is_id) mi.isbn = row[fm['isbn']]
id = idx if index_is_id else self.id(idx) id = idx if index_is_id else self.id(idx)
mi.application_id = id mi.application_id = id
mi.id = id mi.id = id
for key,meta in self.field_metadata.iteritems(): for key, meta in self.field_metadata.custom_iteritems():
if meta['is_custom']: mi.set_user_metadata(key, meta)
mi.set_user_metadata(key, meta) mi.set(key, val=self.get_custom(idx, label=meta['label'],
mi.set(key, val=self.get_custom(idx, label=meta['label'], index_is_id=index_is_id),
index_is_id=index_is_id), extra=self.get_custom_extra(idx, label=meta['label'],
extra=self.get_custom_extra(idx, label=meta['label'], index_is_id=index_is_id))
index_is_id=index_is_id))
if get_cover: if get_cover:
mi.cover = self.cover(id, index_is_id=True, as_path=True) mi.cover = self.cover(id, index_is_id=True, as_path=True)
return mi return mi
@ -877,18 +877,17 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
def formats(self, index, index_is_id=False, verify_formats=True): def formats(self, index, index_is_id=False, verify_formats=True):
''' Return available formats as a comma separated list or None if there are no available formats ''' ''' Return available formats as a comma separated list or None if there are no available formats '''
id = index if index_is_id else self.id(index) id_ = index if index_is_id else self.id(index)
try: formats = self.data.get(id_, self.FIELD_MAP['formats'], row_is_id=True)
formats = self.conn.get('SELECT format FROM data WHERE book=?', (id,)) if not formats:
formats = map(lambda x:x[0], formats)
except:
return None return None
if not verify_formats: if not verify_formats:
return ','.join(formats) return formats
formats = formats.split(',')
ans = [] ans = []
for format in formats: for fmt in formats:
if self.format_abspath(id, format, index_is_id=True) is not None: if self.format_abspath(id_, fmt, index_is_id=True) is not None:
ans.append(format) ans.append(fmt)
if not ans: if not ans:
return None return None
return ','.join(ans) return ','.join(ans)
@ -1607,6 +1606,10 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
','.join([a.replace(',', '|') for a in authors]), ','.join([a.replace(',', '|') for a in authors]),
row_is_id=True) row_is_id=True)
self.data.set(id, self.FIELD_MAP['author_sort'], ss, row_is_id=True) self.data.set(id, self.FIELD_MAP['author_sort'], ss, row_is_id=True)
aum = self.authors_with_sort_strings(id, index_is_id=True)
self.data.set(id, self.FIELD_MAP['au_map'],
':#:'.join([':::'.join((au.replace(',', '|'), aus)) for (au, aus) in aum]),
row_is_id=True)
def set_authors(self, id, authors, notify=True, commit=True): def set_authors(self, id, authors, notify=True, commit=True):
''' '''

View File

@ -180,6 +180,15 @@ class FieldMetadata(dict):
'search_terms':['author_sort'], 'search_terms':['author_sort'],
'is_custom':False, 'is_custom':False,
'is_category':False}), 'is_category':False}),
('au_map', {'table':None,
'column':None,
'datatype':'text',
'is_multiple':',',
'kind':'field',
'name':None,
'search_terms':[],
'is_custom':False,
'is_category':False}),
('comments', {'table':None, ('comments', {'table':None,
'column':None, 'column':None,
'datatype':'text', 'datatype':'text',
@ -400,6 +409,12 @@ class FieldMetadata(dict):
for key in self._tb_cats: for key in self._tb_cats:
yield (key, self._tb_cats[key]) yield (key, self._tb_cats[key])
def custom_iteritems(self):
for key in self._tb_cats:
fm = self._tb_cats[key]
if fm['is_custom']:
yield (key, self._tb_cats[key])
def items(self): def items(self):
return list(self.iteritems()) return list(self.iteritems())

View File

@ -756,7 +756,7 @@ class BrowseServer(object):
sort = self.browse_sort_book_list(items, list_sort) sort = self.browse_sort_book_list(items, list_sort)
ids = [x[0] for x in items] ids = [x[0] for x in items]
html = render_book_list(ids, self.opts.url_prefix, html = render_book_list(ids, self.opts.url_prefix,
suffix=_('in search')+': '+query) suffix=_('in search')+': '+xml(query))
return self.browse_template(sort, category=False, initial_search=query).format( return self.browse_template(sort, category=False, initial_search=query).format(
title=_('Matching books'), title=_('Matching books'),
script='booklist();', main=html) script='booklist();', main=html)

View File

@ -87,6 +87,23 @@ class SortedConcatenate(object):
class SafeSortedConcatenate(SortedConcatenate): class SafeSortedConcatenate(SortedConcatenate):
sep = '|' sep = '|'
class AumSortedConcatenate(object):
'''String concatenation aggregator for the author sort map'''
def __init__(self):
self.ans = {}
def step(self, ndx, author, sort):
if author is not None:
self.ans[ndx] = author + ':::' + sort
def finalize(self):
keys = self.ans.keys()
if len(keys) == 0:
return None
if len(keys) == 1:
return self.ans[keys[0]]
return ':#:'.join([self.ans[v] for v in sorted(keys)])
class Connection(sqlite.Connection): class Connection(sqlite.Connection):
def get(self, *args, **kw): def get(self, *args, **kw):
@ -155,6 +172,7 @@ class DBThread(Thread):
c_ext_loaded = load_c_extensions(self.conn) c_ext_loaded = load_c_extensions(self.conn)
self.conn.row_factory = sqlite.Row if self.row_factory else lambda cursor, row : list(row) self.conn.row_factory = sqlite.Row if self.row_factory else lambda cursor, row : list(row)
self.conn.create_aggregate('concat', 1, Concatenate) self.conn.create_aggregate('concat', 1, Concatenate)
self.conn.create_aggregate('aum_sortconcat', 3, AumSortedConcatenate)
if not c_ext_loaded: if not c_ext_loaded:
self.conn.create_aggregate('sortconcat', 2, SortedConcatenate) self.conn.create_aggregate('sortconcat', 2, SortedConcatenate)
self.conn.create_aggregate('sort_concat', 2, SafeSortedConcatenate) self.conn.create_aggregate('sort_concat', 2, SafeSortedConcatenate)

View File

@ -98,9 +98,10 @@ class _Parser(object):
m = 'Formatter: ' + message + _(' near ') m = 'Formatter: ' + message + _(' near ')
if self.lex_pos > 0: if self.lex_pos > 0:
m = '{0} {1}'.format(m, self.prog[self.lex_pos-1][1]) m = '{0} {1}'.format(m, self.prog[self.lex_pos-1][1])
m = '{0} {1}'.format(m, self.prog[self.lex_pos][1]) elif self.lex_pos < len(self.prog):
if self.lex_pos < len(self.prog):
m = '{0} {1}'.format(m, self.prog[self.lex_pos+1][1]) m = '{0} {1}'.format(m, self.prog[self.lex_pos+1][1])
else:
m = '{0} {1}'.format(m, _('end of program'))
raise ValueError(m) raise ValueError(m)
def token(self): def token(self):