merge from trunk, minor preprocess tweaks

2025-07-09 03:04:10 -04:00 · 2011-01-13 17:02:44 +08:00 · 2011-01-13 17:02:44 +08:00 · 8d02ad93a7
commit 8d02ad93a7
parent 9832b7118b 187331af81
18 changed files with 370 additions and 68 deletions
--- a/resources/recipes/cicero.recipe
+++ b/resources/recipes/cicero.recipe
@ -0,0 +1,35 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Cicero(BasicNewsRecipe):
+    timefmt               = ' [%Y-%m-%d]'
+    title                 = u'Cicero'
+    __author__            = 'mad@sharktooth.de'
+    description           = u'Magazin f\xfcr politische Kultur'
+    oldest_article        = 7
+    language              = 'de'
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    publisher             = 'Ringier Publishing'
+    category              = 'news, politics, Germany'
+    encoding              = 'iso-8859-1'
+    publication_type      = 'magazine'
+    masthead_url          = 'http://www.cicero.de/img2/cicero_logo_rss.gif'
+    feeds                 = [
+(u'Das gesamte Portfolio', u'http://www.cicero.de/rss/rss.php?ress_id='),
+#(u'Alle Heft-Inhalte', u'http://www.cicero.de/rss/rss.php?ress_id=heft'),
+#(u'Alle Online-Inhalte', u'http://www.cicero.de/rss/rss.php?ress_id=online'),
+#(u'Berliner Republik', u'http://www.cicero.de/rss/rss.php?ress_id=4'),
+#(u'Weltb\xfchne', u'http://www.cicero.de/rss/rss.php?ress_id=1'),
+#(u'Salon', u'http://www.cicero.de/rss/rss.php?ress_id=7'),
+#(u'Kapital', u'http://www.cicero.de/rss/rss.php?ress_id=6'),
+#(u'Netzst\xfccke', u'http://www.cicero.de/rss/rss.php?ress_id=9'),
+#(u'Leinwand', u'http://www.cicero.de/rss/rss.php?ress_id=12'),
+#(u'Bibliothek', u'http://www.cicero.de/rss/rss.php?ress_id=15'),
+(u'Kolumne - Alle Kolulmnen', u'http://www.cicero.de/rss/rss2.php?ress_id='),
+#(u'Kolumne - Schreiber, Berlin', u'http://www.cicero.de/rss/rss2.php?ress_id=35'),
+#(u'Kolumne - TV Kritik', u'http://www.cicero.de/rss/rss2.php?ress_id=34')
+]
+
+    def print_version(self, url):
+        return 'http://www.cicero.de/page_print.php?' + url.rpartition('?')[2]
--- a/resources/recipes/cnetjapan.recipe
+++ b/resources/recipes/cnetjapan.recipe
@ -11,7 +11,7 @@ class CNetJapan(BasicNewsRecipe):
                      (u'CNet Blog', u'http://feed.japan.cnet.com/rss/blog/index.rdf')
                        ]
    language       = 'ja'
-    encoding       = 'Shift_JIS'
+    encoding       = 'utf-8'
    remove_javascript = True

    preprocess_regexps = [
--- a/resources/recipes/el_correo.recipe
+++ b/resources/recipes/el_correo.recipe
@ -0,0 +1,122 @@
+#!/usr/bin/env  python
+__license__     = 'GPL v3'
+__copyright__   = '08 Januery 2011, desUBIKado'
+__author__      = 'desUBIKado'
+__description__ = 'Daily newspaper from Biscay'
+__version__     = 'v0.08'
+__date__        = '08, Januery 2011'
+'''
+[url]http://www.elcorreo.com/[/url]
+'''
+
+import time
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class heraldo(BasicNewsRecipe):
+    __author__            = 'desUBIKado'
+    description           = 'Daily newspaper from Biscay'
+    title                 = u'El Correo'
+    publisher             = 'Vocento'
+    category              = 'News, politics, culture, economy, general interest'
+    oldest_article        = 2
+    delay                 = 1
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    language              = 'es'
+    timefmt               = '[%a, %d %b, %Y]'
+    encoding              = 'iso-8859-1'
+    remove_empty_feeds    = True
+    remove_javascript     = False
+
+    feeds              = [
+                           (u'Portada',       u'http://www.elcorreo.com/vizcaya/portada.xml'),
+                           (u'Local',         u'http://www.elcorreo.com/vizcaya/rss/feeds/vizcaya.xml'),
+               (u'Internacional', u'hhttp://www.elcorreo.com/vizcaya/rss/feeds/internacional.xml'),
+               (u'Econom\xeda',   u'http://www.elcorreo.com/vizcaya/rss/feeds/economia.xml'),
+                           (u'Pol\xedtica',   u'http://www.elcorreo.com/vizcaya/rss/feeds/politica.xml'),
+               (u'Opini\xf3n',    u'http://www.elcorreo.com/vizcaya/rss/feeds/opinion.xml'),
+               (u'Deportes',      u'http://www.elcorreo.com/vizcaya/rss/feeds/deportes.xml'),
+                           (u'Sociedad',      u'http://www.elcorreo.com/vizcaya/rss/feeds/sociedad.xml'),
+               (u'Cultura',       u'http://www.elcorreo.com/vizcaya/rss/feeds/cultura.xml'),
+               (u'Televisi\xf3n', u'http://www.elcorreo.com/vizcaya/rss/feeds/television.xml'),
+               (u'Gente',         u'http://www.elcorreo.com/vizcaya/rss/feeds/gente.xml')
+                         ]
+
+    keep_only_tags     = [
+                          dict(name='div', attrs={'class':['grouphead','date','art_head','story-texto','text','colC_articulo','contenido_comentarios']}),
+                          dict(name='div' , attrs={'id':['articulo','story-texto','story-entradilla']})
+                         ]
+
+    remove_tags        = [
+                          dict(name='div', attrs={'class':['art_barra','detalles-opinion','formdenunciar','modulo calculadoras','nubetags','pie']}),
+                          dict(name='div', attrs={'class':['mod_lomas','bloque_lomas','blm_header','link-app3','link-app4','botones_listado']}),
+                          dict(name='div', attrs={'class':['navegacion_galeria','modulocanalpromocion','separa','separacion','compartir','tags_relacionados']}),
+                          dict(name='div', attrs={'class':['moduloBuscadorDeportes','modulo-gente','moddestacadopeq','OpcArt','articulopiniones']}),
+                          dict(name='div', attrs={'class':['modulo-especial','publiEspecial']}),
+                          dict(name='div', attrs={'id':['articulopina']}),
+                          dict(name='br', attrs={'class':'clear'}),
+                          dict(name='form', attrs={'name':'frm_conversor2'})
+                         ]
+
+    remove_tags_before = dict(name='div' , attrs={'class':'articulo  '})
+    remove_tags_after  = dict(name='div' , attrs={'class':'comentarios'})
+
+    def get_cover_url(self):
+        cover = None
+        st = time.localtime()
+        year = str(st.tm_year)
+        month = "%.2d" % st.tm_mon
+        day = "%.2d" % st.tm_mday
+        #[url]http://img.kiosko.net/2011/01/02/es/elcorreo.750.jpg[/url]
+                #[url]http://info.elcorreo.com/pdf/06012011-viz.pdf[/url]
+        cover='http://info.elcorreo.com/pdf/'+ day + month + year +'-viz.pdf'
+
+        br = BasicNewsRecipe.get_browser()
+        try:
+            br.open(cover)
+        except:
+            self.log("\nPortada no disponible")
+            cover ='http://www.elcorreo.com/vizcaya/noticias/201002/02/Media/logo-elcorreo-nuevo.png'
+        return cover
+
+    extra_css = '''
+                    h1, .headline {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:30px;}
+                    h2, .subhead {font-family:Arial,Helvetica,sans-serif; font-style:italic; font-weight:normal;font-size:18px;}
+                    h3, .overhead {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:16px;}
+                    h4 {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:16px;}
+                    h5 {font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:16px;}
+                    h6 {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:16px;}
+                    .date,.byline, .photo {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:14px;}
+                    img{margin-bottom: 0.4em}
+                '''
+
+
+
+    preprocess_regexps = [
+
+ # To present the image of the embedded video
+                           (re.compile(r'var RUTA_IMAGEN', re.DOTALL|re.IGNORECASE), lambda match: '</script><img src'),
+                           (re.compile(r'.jpg";', re.DOTALL|re.IGNORECASE), lambda match: '.jpg">'),
+                           (re.compile(r'var SITIO = "elcorreo";', re.DOTALL|re.IGNORECASE), lambda match: '<SCRIPT TYPE="text/JavaScript"'),
+
+# To separate paragraphs with a blank line
+                           (re.compile(r'<div class="p"', re.DOTALL|re.IGNORECASE), lambda match: '<p></p><div class="p"'),
+
+# To put a blank line between the subtitle and the date and time of the news
+                           (re.compile(r'<div class="date">', re.DOTALL|re.IGNORECASE), lambda match: '<br><div class="date">'),
+
+# To put a blank line between the intro of the embedded videos and the previous text
+                           (re.compile(r'<div class="video"', re.DOTALL|re.IGNORECASE), lambda match: '<br><div class="video"'),
+
+# To view photos from the first when these are presented as a gallery
+                           (re.compile(r'src="/img/shim.gif"', re.DOTALL|re.IGNORECASE), lambda match: ''),
+                           (re.compile(r'rel=', re.DOTALL|re.IGNORECASE), lambda match: 'src='),
+
+# To remove the link of the title
+                           (re.compile(r'<h1 class="headline">\n<a href="', re.DOTALL|re.IGNORECASE), lambda match: '<h1 class="'),
+                           (re.compile(r'</a>\n</h1>', re.DOTALL|re.IGNORECASE), lambda match: '</h1>'),
+
+                         ]
+
--- a/resources/recipes/heraldo.recipe
+++ b/resources/recipes/heraldo.recipe
@ -3,29 +3,31 @@ __license__     = 'GPL v3'
 __copyright__   = '04 December 2010, desUBIKado'
 __author__      = 'desUBIKado'
 __description__ = 'Daily newspaper from Aragon'
-__version__     = 'v0.03'
-__date__        = '11, December 2010'
+__version__     = 'v0.04'
+__date__        = '6, Januery 2011'
 '''
 [url]http://www.heraldo.es/[/url]
 '''

 import time
+import re
 from calibre.web.feeds.news import BasicNewsRecipe

 class heraldo(BasicNewsRecipe):
-    __author__        = 'desUBIKado'
-    description   = 'Daily newspaper from Aragon'
+    __author__     = 'desUBIKado'
+    description    = 'Daily newspaper from Aragon'
    title          = u'Heraldo de Aragon'
    publisher      = 'OJD Nielsen'
    category       = 'News, politics, culture, economy, general interest'
    language       = 'es'
    timefmt        = '[%a, %d %b, %Y]'
-    oldest_article = 1
+    oldest_article = 2
+    delay          = 1
    max_articles_per_feed = 100
    use_embedded_content  = False
    remove_javascript = True
    no_stylesheets = True
-    recursion      = 10
+

    feeds          = [
                        (u'Portadas', u'http://www.heraldo.es/index.php/mod.portadas/mem.rss')
@ -37,29 +39,39 @@ class heraldo(BasicNewsRecipe):

    remove_tags        = [dict(name='a', attrs={'class':['com flo-r','enl-if','enl-df']}),
                          dict(name='div', attrs={'class':['brb-b-s con marg-btt','cnt-rel con']}),
-                          dict(name='form', attrs={'class':'form'})]
+                          dict(name='form', attrs={'class':'form'}),
+                          dict(name='ul', attrs={'id':['cont-tags','pag-1']})]

    remove_tags_before = dict(name='div' , attrs={'id':'dts'})
    remove_tags_after  = dict(name='div' , attrs={'id':'com'})

    def get_cover_url(self):
-        cover = None
-        st = time.localtime()
-        year = str(st.tm_year)
-        month = "%.2d" % st.tm_mon
-        day = "%.2d" % st.tm_mday
+       cover = None
+       st = time.localtime()
+       year = str(st.tm_year)
+       month = "%.2d" % st.tm_mon
+       day = "%.2d" % st.tm_mday
 		#[url]http://oldorigin-www.heraldo.es/20101211/primeras/portada_aragon.pdf[/url]
-        cover='http://oldorigin-www.heraldo.es/'+ year +  month + day +'/primeras/portada_aragon.pdf'
-        br = BasicNewsRecipe.get_browser()
-        try:
-            br.open(cover)
-        except:
-            self.log("\nPortada no disponible")
-            cover ='http://www.heraldo.es/MODULOS/global/publico/interfaces/img/logo-Heraldo.png'
-        return cover
-
+       cover='http://oldorigin-www.heraldo.es/'+ year +  month + day +'/primeras/portada_aragon.pdf'
+       br = BasicNewsRecipe.get_browser()
+       try:
+           br.open(cover)
+       except:
+           self.log("\nPortada no disponible")
+           cover ='http://www.heraldo.es/MODULOS/global/publico/interfaces/img/logo-Heraldo.png'
+       return cover


    extra_css = '''
-                    h2{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:xx-large;}
-		'''
+                    .con strong{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:16px;}
+                    .con h2{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:30px;}
+                    .con span{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:12px;}
+                    .ent {font-family:Arial,Helvetica,sans-serif; font-weight:normal; font-style:italic; font-size:18px;}
+                    img{margin-bottom: 0.4em}
+                '''
+
+    preprocess_regexps = [
+
+# To separate the comments with a blank line
+                           (re.compile(r'<div id="com"', re.DOTALL|re.IGNORECASE), lambda match: '<br><div id="com"')
+                         ]
--- a/resources/recipes/tyzden.recipe
+++ b/resources/recipes/tyzden.recipe
@ -0,0 +1,80 @@
+#!/usr/bin/env  python
+__license__   = 'GPL v3'
+__copyright__ = '2011, Miroslav Vasko zemiak@gmail.com'
+
+'''
+.tyzden, a weekly news magazine (a week old issue)
+'''
+from calibre import strftime
+from calibre.web.feeds.news import BasicNewsRecipe
+from datetime import date
+import re
+
+class TyzdenRecipe(BasicNewsRecipe):
+    __license__  = 'GPL v3'
+    __author__ = 'zemiak'
+    language = 'sk'
+    version = 1
+
+    publisher = u'www.tyzden.sk'
+    category = u'Magazine'
+    description = u'A conservative weekly magazine. The latest free issue'
+
+    today = date.today()
+    iso = today.isocalendar()
+    year = iso[0]
+    weeknum = iso[1]
+
+    if (weeknum > 1):
+        weeknum -= 1
+
+    title = u'.tyzden ' + str(weeknum) + '/' + str(year)
+
+    base_url_path = 'http://www.tyzden.sk/casopis/' + str(year) + '/' + str(weeknum)
+    base_url = base_url_path + '.html'
+
+    oldest_article = 20
+    max_articles_per_feed = 100
+    remove_javascript = True
+
+    use_embedded_content    = False
+    no_stylesheets = True
+
+    keep_only_tags = []
+    keep_only_tags.append(dict(name = 'h1'))
+    keep_only_tags.append(dict(name = 'div', attrs = {'class': 'text_area top_nofoto'}))
+    keep_only_tags.append(dict(name = 'div', attrs = {'class': 'text_block'}))
+
+    remove_tags_after = [dict(name = 'div', attrs = {'class': 'text_block'})]
+
+    def find_sections(self):
+        soup = self.index_to_soup(self.base_url)
+        # find cover pic
+	imgdiv = soup.find('div', attrs = {'class': 'foto'})
+	if imgdiv is not None:
+            img = imgdiv.find('img')
+            if img is not None:
+                self.cover_url = 'http://www.tyzden.sk/' + img['src']
+        # end find cover pic
+
+        for s in soup.findAll('a', attrs={'href': re.compile(r'rubrika/.*')}):
+            yield (self.tag_to_string(s), s)
+
+    def find_articles(self, soup):
+        for art in soup.findAllNext('a'):
+            if (not art['href'].startswith('casopis/')):
+                break;
+
+            url = art['href']
+            title = self.tag_to_string(art)
+            yield {
+                    'title': title, 'url':self.base_url_path + '/' + url, 'description':title,
+                    'date' : strftime('%a, %d %b'),
+                    }
+
+    def parse_index(self):
+        feeds = []
+        for title, soup in self.find_sections():
+            feeds.append((title, list(self.find_articles(soup))))
+
+        return feeds
--- a/setup/build_environment.py
+++ b/setup/build_environment.py
@ -117,7 +117,6 @@ if iswindows:
    poppler_inc_dirs = consolidate('POPPLER_INC_DIR',
            r'%s\poppler;%s'%(sw_inc_dir, sw_inc_dir))

-    popplerqt4_inc_dirs = poppler_inc_dirs + [poppler_inc_dirs[1]+r'\qt4']
    poppler_lib_dirs = consolidate('POPPLER_LIB_DIR', sw_lib_dir)
    popplerqt4_lib_dirs = poppler_lib_dirs
    poppler_libs = ['poppler']
@ -131,7 +130,6 @@ elif isosx:
    fc_lib = '/sw/lib'
    poppler_inc_dirs = consolidate('POPPLER_INC_DIR',
            '/sw/build/poppler-0.14.5/poppler:/sw/build/poppler-0.14.5')
-    popplerqt4_inc_dirs = poppler_inc_dirs + [poppler_inc_dirs[0]+'/qt4']
    poppler_lib_dirs = consolidate('POPPLER_LIB_DIR',
            '/sw/lib')
    poppler_libs = ['poppler']
@ -150,9 +148,6 @@ else:
    # Include directories
    poppler_inc_dirs = pkgconfig_include_dirs('poppler',
        'POPPLER_INC_DIR', '/usr/include/poppler')
-    popplerqt4_inc_dirs = pkgconfig_include_dirs('poppler-qt4', '', '')
-    if not popplerqt4_inc_dirs:
-        popplerqt4_inc_dirs = poppler_inc_dirs + [poppler_inc_dirs[0]+'/qt4']
    png_inc_dirs = pkgconfig_include_dirs('libpng', 'PNG_INC_DIR',
        '/usr/include')
    magick_inc_dirs = pkgconfig_include_dirs('MagickWand', 'MAGICK_INC', '/usr/include/ImageMagick')
@ -187,20 +182,17 @@ if not poppler_inc_dirs or not os.path.exists(
    poppler_error = \
    ('Poppler not found on your system. Various PDF related',
    ' functionality will not work. Use the POPPLER_INC_DIR and',
-    ' POPPLER_LIB_DIR environment variables.')
-
-popplerqt4_error = None
-if not popplerqt4_inc_dirs or not os.path.exists(
-        os.path.join(popplerqt4_inc_dirs[-1], 'poppler-qt4.h')):
-    popplerqt4_error = \
-            ('Poppler Qt4 bindings not found on your system.')
+    ' POPPLER_LIB_DIR environment variables. calibre requires '
+    ' the poppler XPDF headers. If your distro does not '
+    ' include them you will have to re-compile poppler '
+    ' by hand with --enable-xpdf-headers')

 magick_error = None
 if not magick_inc_dirs or not os.path.exists(os.path.join(magick_inc_dirs[0],
    'wand')):
    magick_error = ('ImageMagick not found on your system. '
            'Try setting the environment variables MAGICK_INC '
-            'and MAGICK_LIB to help calibre locate the inclue and libbrary '
+            'and MAGICK_LIB to help calibre locate the include and library '
            'files.')

 podofo_lib = os.environ.get('PODOFO_LIB_DIR', podofo_lib)
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -53,6 +53,9 @@ class ANDROID(USBMS):
            # LG
            0x1004 : { 0x61cc : [0x100] },

+            # Archos
+            0x0e79 : { 0x1420 : [0x0216]},
+
            }
    EBOOK_DIR_MAIN = ['eBooks/import', 'wordplayer/calibretransfer', 'Books']
    EXTRA_CUSTOMIZATION_MESSAGE = _('Comma separated list of directories to '
@ -61,18 +64,19 @@ class ANDROID(USBMS):
    EXTRA_CUSTOMIZATION_DEFAULT = ', '.join(EBOOK_DIR_MAIN)

    VENDOR_NAME      = ['HTC', 'MOTOROLA', 'GOOGLE_', 'ANDROID', 'ACER',
-            'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX', 'GOOGLE']
+            'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX', 'GOOGLE', 'ARCHOS']
    WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
            '__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
            'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',
            'SCH-I500_CARD', 'SPH-D700_CARD', 'MB810', 'GT-P1000', 'DESIRE',
-            'SGH-T849', '_MB300']
+            'SGH-T849', '_MB300', 'A70S']
    WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
-            'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD']
+            'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
+            'A70S']

-    OSX_MAIN_MEM = 'HTC Android Phone Media'
+    OSX_MAIN_MEM = 'Android Device Main Memory'

-    MAIN_MEMORY_VOLUME_LABEL  = 'Android Phone Internal Memory'
+    MAIN_MEMORY_VOLUME_LABEL  = 'Android Device Main Memory'

    SUPPORTS_SUB_DIRS = True

--- a/src/calibre/devices/prs505/driver.py
+++ b/src/calibre/devices/prs505/driver.py
@ -76,12 +76,23 @@ class PRS505(USBMS):
                'sending DRMed books in which you cannot change the cover.'
                ' WARNING: This option should only be used with newer '
                'SONY readers: 350, 650, 950 and newer.'),
+            _('Refresh separate covers when using automatic management (newer readers)') +
+                ':::' +
+                _('Set this option to have separate book covers uploaded '
+                  'every time you connect your device. Unset this option if '
+                  'you have so many books on the reader that performance is '
+                  'unacceptable.')
    ]
    EXTRA_CUSTOMIZATION_DEFAULT = [
                ', '.join(['series', 'tags']),
+                False,
                False
    ]

+    OPT_COLLECTIONS    = 0
+    OPT_UPLOAD_COVERS  = 1
+    OPT_REFRESH_COVERS = 2
+
    plugboard = None
    plugboard_func = None

@ -171,7 +182,7 @@ class PRS505(USBMS):
        opts = self.settings()
        if opts.extra_customization:
            collections = [x.strip() for x in
-                    opts.extra_customization[0].split(',')]
+                    opts.extra_customization[self.OPT_COLLECTIONS].split(',')]
        else:
            collections = []
        debug_print('PRS505: collection fields:', collections)
@ -183,6 +194,20 @@ class PRS505(USBMS):
        c.update(blists, collections, pb)
        c.write()

+        if opts.extra_customization[self.OPT_REFRESH_COVERS]:
+            debug_print('PRS505: uploading covers in sync_booklists')
+            for idx,bl in blists.items():
+                prefix = self._card_a_prefix if idx == 1 else \
+                                self._card_b_prefix if idx == 2 \
+                                    else self._main_prefix
+                for book in bl:
+                    p = os.path.join(prefix, book.lpath)
+                    self._upload_cover(os.path.dirname(p),
+                                      os.path.splitext(os.path.basename(p))[0],
+                                      book, p)
+        else:
+            debug_print('PRS505: NOT uploading covers in sync_booklists')
+
        USBMS.sync_booklists(self, booklists, end_session=end_session)
        debug_print('PRS505: finished sync_booklists')

@ -199,11 +224,14 @@ class PRS505(USBMS):

    def upload_cover(self, path, filename, metadata, filepath):
        opts = self.settings()
-        if not opts.extra_customization[1]:
+        if not opts.extra_customization[self.OPT_UPLOAD_COVERS]:
            # Building thumbnails disabled
-            debug_print('PRS505: not uploading covers')
+            debug_print('PRS505: not uploading cover')
            return
-        debug_print('PRS505: uploading covers')
+        debug_print('PRS505: uploading cover')
+        self._upload_cover(path, filename, metadata, filepath)
+
+    def _upload_cover(self, path, filename, metadata, filepath):
        if metadata.thumbnail and metadata.thumbnail[-1]:
            path = path.replace('/', os.sep)
            is_main = path.startswith(self._main_prefix)
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@ -480,7 +480,7 @@ class HTMLPreProcessor(object):
                end_rules.append((re.compile(u'(?<=.{%i}[–—])\s*<p>\s*(?=[[a-z\d])' % length), lambda match: ''))
                end_rules.append(
                    # Un wrap using punctuation
-                    (re.compile(u'(?<=.{%i}([a-zäëïöüàèìòùáćéíóńśúâêîôûçąężıãõñæøþðß,:)\IA\u00DF]|(?<!\&\w{4});))\s*(?P<ital></(i|b|u)>)?\s*(</p>\s*<p>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines),
+                    (re.compile(u'(?<=.{%i}([a-zäëïöüàèìòùáćéíóńśúâêîôûçąężıãõñæøþðßě,:)\IA\u00DF]|(?<!\&\w{4});))\s*(?P<ital></(i|b|u)>)?\s*(</p>\s*<p>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines),
                )

        for rule in self.PREPROCESS + start_rules:
--- a/src/calibre/ebooks/conversion/utils.py
+++ b/src/calibre/ebooks/conversion/utils.py
@ -186,20 +186,20 @@ class PreProcessor(object):

    def punctuation_unwrap(self, length, content, format):
        # define the pieces of the regex
-        lookahead = "(?<=.{"+str(length)+"}([a-zäëïöüàèìòùáćéíóńśúâêîôûçąężıãõñæøþðß,:)\IA\u00DF]|(?<!\&\w{4});))" # (?<!\&\w{4});) is a semicolon not part of an entity
+        lookahead = "(?<=.{"+str(length)+"}([a-zäëïöüàèìòùáćéíóńśúâêîôûçąężıãõñæøþðßě,:)\IA\u00DF]|(?<!\&\w{4});))" # (?<!\&\w{4});) is a semicolon not part of an entity
        line_ending = "\s*</(span|p|div)>\s*(</(p|span|div)>)?"
        blanklines = "\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*"
        line_opening = "<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*"
        txt_line_wrap = u"((\u0020|\u0009)*\n){1,4}"
-        
+
        unwrap_regex = lookahead+line_ending+blanklines+line_opening
        if format == 'txt':
            unwrap_regex = lookahead+txt_line_wrap
-        
+
        unwrap = re.compile(u"%s" % unwrap_regex, re.UNICODE)
        content = unwrap.sub(' ', content)
        return content
-       
+

    def __call__(self, html):
        self.log("*********  Preprocessing HTML  *********")
--- a/src/calibre/ebooks/rtf/input.py
+++ b/src/calibre/ebooks/rtf/input.py
@ -296,7 +296,7 @@ class RTFInput(InputFormatPlugin):
                        u'<p>\u00a0</p>\n'.encode('utf-8'), res)
            if self.opts.preprocess_html:
                preprocessor = PreProcessor(self.opts, log=getattr(self, 'log', None))
-                res = preprocessor(res)
+                res = preprocessor(res.decode('utf-8')).encode('utf-8')
            f.write(res)
        self.write_inline_css(inline_class, border_styles)
        stream.seek(0)
--- a/src/calibre/ebooks/txt/input.py
+++ b/src/calibre/ebooks/txt/input.py
@ -53,7 +53,7 @@ class TXTInput(InputFormatPlugin):
    def convert(self, stream, options, file_ext, log,
                accelerators):
        log.debug('Reading text from file...')
-        
+
        txt = stream.read()
        # Get the encoding of the document.
        if options.input_encoding:
@ -80,7 +80,7 @@ class TXTInput(InputFormatPlugin):
        # Get length for hyphen removal and punctuation unwrap
        docanalysis = DocAnalysis('txt', txt)
        length = docanalysis.line_length(.5)
-            
+
        if options.formatting_type == 'auto':
            options.formatting_type = detect_formatting_type(txt)

@ -122,7 +122,7 @@ class TXTInput(InputFormatPlugin):
                txt = preprocessor.punctuation_unwrap(length, txt, 'txt')

            flow_size = getattr(options, 'flow_size', 0)
-            
+
            if options.formatting_type == 'heuristic':
                html = convert_heuristic(txt, epub_split_size_kb=flow_size)
            else:
--- a/src/calibre/ebooks/txt/txtml.py
+++ b/src/calibre/ebooks/txt/txtml.py
@ -218,6 +218,10 @@ class TXTMLizer(object):

        if tag in SPACE_TAGS:
            text.append(u' ')
+            
+        # Scene breaks.
+        if tag == 'hr':
+            text.append('\n\n* * *\n\n')

        # Process tags that contain text.
        if hasattr(elem, 'text') and elem.text:
--- a/src/calibre/gui2/preferences/plugboard.py
+++ b/src/calibre/gui2/preferences/plugboard.py
@ -5,11 +5,11 @@ __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-from PyQt4 import QtGui
-from PyQt4.Qt import Qt
+from PyQt4.Qt import Qt, QLineEdit, QComboBox, SIGNAL, QListWidgetItem

 from calibre.gui2 import error_dialog
 from calibre.gui2.device import device_name_for_plugboards
+from calibre.gui2.dialogs.template_dialog import TemplateDialog
 from calibre.gui2.preferences import ConfigWidgetBase, test_widget
 from calibre.gui2.preferences.plugboard_ui import Ui_Form
 from calibre.customize.ui import metadata_writers, device_plugins
@ -17,6 +17,27 @@ from calibre.library.save_to_disk import plugboard_any_format_value, \
                        plugboard_any_device_value, plugboard_save_to_disk_value
 from calibre.utils.formatter import validation_formatter

+
+class LineEditWithTextBox(QLineEdit):
+
+    '''
+    Extend the context menu of a QLineEdit to include more actions.
+    '''
+
+    def contextMenuEvent(self, event):
+        menu = self.createStandardContextMenu()
+        menu.addSeparator()
+
+        action_open_editor = menu.addAction(_('Open Editor'))
+
+        self.connect(action_open_editor, SIGNAL('triggered()'), self.open_editor)
+        menu.exec_(event.globalPos())
+
+    def open_editor(self):
+        t = TemplateDialog(self, self.text())
+        if t.exec_():
+            self.setText(t.textbox.toPlainText())
+
 class ConfigWidget(ConfigWidgetBase, Ui_Form):

    def genesis(self, gui):
@ -72,10 +93,10 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
        self.source_widgets = []
        self.dest_widgets = []
        for i in range(0, len(self.dest_fields)-1):
-            w = QtGui.QLineEdit(self)
+            w = LineEditWithTextBox(self)
            self.source_widgets.append(w)
            self.fields_layout.addWidget(w, 5+i, 0, 1, 1)
-            w = QtGui.QComboBox(self)
+            w = QComboBox(self)
            self.dest_widgets.append(w)
            self.fields_layout.addWidget(w, 5+i, 1, 1, 1)

@ -297,7 +318,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
                for op in self.current_plugboards[f][d]:
                    ops.append('([' + op[0] + '] -> ' + op[1] + ')')
                txt = '%s:%s = %s\n'%(f, d, ', '.join(ops))
-                item = QtGui.QListWidgetItem(txt)
+                item = QListWidgetItem(txt)
                item.setData(Qt.UserRole, (f, d))
                self.existing_plugboards.addItem(item)
        self.refilling = False
--- a/src/calibre/library/custom_columns.py
+++ b/src/calibre/library/custom_columns.py
@ -151,6 +151,8 @@ class CustomColumns(object):
            return v

        def adapt_number(x, d):
+            if x is None:
+                return None
            if isinstance(x, (str, unicode, bytes)):
                if x.lower() == 'none':
                    return None
@ -195,8 +197,8 @@ class CustomColumns(object):
            data = self.custom_column_num_map[num]
        row = self.data._data[idx] if index_is_id else self.data[idx]
        ans = row[self.FIELD_MAP[data['num']]]
-        if ans and data['is_multiple'] and data['datatype'] == 'text':
-            ans = ans.split('|')
+        if data['is_multiple'] and data['datatype'] == 'text':
+            ans = ans.split('|') if ans else []
            if data['display'].get('sort_alpha', False):
                ans.sort(cmp=lambda x,y:cmp(x.lower(), y.lower()))
        return ans
--- a/src/calibre/library/server/browse.py
+++ b/src/calibre/library/server/browse.py
@ -756,7 +756,7 @@ class BrowseServer(object):
        sort = self.browse_sort_book_list(items, list_sort)
        ids = [x[0] for x in items]
        html = render_book_list(ids, self.opts.url_prefix,
-                suffix=_('in search')+': '+query)
+                suffix=_('in search')+': '+xml(query))
        return self.browse_template(sort, category=False, initial_search=query).format(
                title=_('Matching books'),
                script='booklist();', main=html)
--- a/src/calibre/library/sqlite.py
+++ b/src/calibre/library/sqlite.py
@ -98,9 +98,10 @@ class AumSortedConcatenate(object):

    def finalize(self):
        keys = self.ans.keys()
-        if len(keys) == 0:
-            return None
-        if len(keys) == 1:
+        l = len(keys)
+        if l == 0:
+            return 'Unknown:::Unknown'
+        if l == 1:
            return self.ans[keys[0]]
        return ':#:'.join([self.ans[v] for v in sorted(keys)])

--- a/src/calibre/utils/formatter.py
+++ b/src/calibre/utils/formatter.py
@ -98,9 +98,10 @@ class _Parser(object):
        m = 'Formatter: ' + message + _(' near ')
        if self.lex_pos > 0:
            m = '{0} {1}'.format(m, self.prog[self.lex_pos-1][1])
-        m = '{0} {1}'.format(m, self.prog[self.lex_pos][1])
-        if self.lex_pos < len(self.prog):
+        elif self.lex_pos < len(self.prog):
            m = '{0} {1}'.format(m, self.prog[self.lex_pos+1][1])
+        else:
+            m = '{0} {1}'.format(m, _('end of program'))
        raise ValueError(m)

    def token(self):