diff --git a/Changelog.yaml b/Changelog.yaml index 71f7f1c52b..702378719f 100644 --- a/Changelog.yaml +++ b/Changelog.yaml @@ -55,7 +55,7 @@ - title: "Add search to the plugin preferences dialog" bug fixes: - - title: "Fix a bug that could cause fiels to be lost when changing metadata on east asian windows installs if the title and/or author is very long." + - title: "Fix a bug that could cause files to be lost when changing metadata on east asian windows installs if the title and/or author is very long." tickets: [8620] - title: "Tag browser: Fix searching with items in a user category not owrking if the main category is hidden" @@ -88,7 +88,7 @@ - title: "Do not discard the result of a conversion if the user opens the edit metadata dialog while the conversion is running" tickets: [8672] - - title: "CHM Input: When the chm file lacks a hhc, lookf for index.html instead" + - title: "CHM Input: When the chm file lacks a hhc, look for index.html instead" tickets: [8688] - title: "EPUB Input: Filter some invalid media types from the spine" diff --git a/resources/default_tweaks.py b/resources/default_tweaks.py index f1abfbe7ea..893c8b6b6a 100644 --- a/resources/default_tweaks.py +++ b/resources/default_tweaks.py @@ -39,7 +39,7 @@ completer_append_separator = False # The algorithm used to copy author to author_sort # Possible values are: -# invert: use "fn ln" -> "ln, fn" (the original algorithm) +# invert: use "fn ln" -> "ln, fn" (the default algorithm) # copy : copy author to author_sort without modification # comma : use 'copy' if there is a ',' in the name, otherwise use 'invert' # nocomma : "fn ln" -> "ln fn" (without the comma) diff --git a/resources/images/news/kopalniawiedzy.png b/resources/images/news/kopalniawiedzy.png new file mode 100644 index 0000000000..73a4a338aa Binary files /dev/null and b/resources/images/news/kopalniawiedzy.png differ diff --git a/resources/images/news/korespondent.png b/resources/images/news/korespondent.png new file mode 100644 index 0000000000..e2724b11b4 Binary files /dev/null and b/resources/images/news/korespondent.png differ diff --git a/resources/jacket/stylesheet.css b/resources/jacket/stylesheet.css index c45f8fe977..56bef24ac2 100644 --- a/resources/jacket/stylesheet.css +++ b/resources/jacket/stylesheet.css @@ -113,8 +113,8 @@ table.cbj_header tr.cbj_series { /* display:none; */ } -table.cbj_header tr.cbj_pubdate { - /* Uncomment the next line to remove 'Published' from banner section */ +table.cbj_header tr.cbj_pubdata { + /* Uncomment the next line to remove 'Published (year of publication)' from banner section */ /* display:none; */ } diff --git a/resources/recipes/cinebel_be.recipe b/resources/recipes/cinebel_be.recipe index ec76bfc894..024050eb67 100644 --- a/resources/recipes/cinebel_be.recipe +++ b/resources/recipes/cinebel_be.recipe @@ -1,7 +1,7 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2008, Lionel Bergeret ' +__copyright__ = '2008-2011, Lionel Bergeret ' ''' cinebel.be ''' @@ -14,14 +14,14 @@ class Cinebel(BasicNewsRecipe): description = u'Cinema news from Belgium in French' publisher = u'cinebel.be' category = 'news, cinema, movie, Belgium' - oldest_article = 3 - encoding = 'utf8' - language = 'fr_BE' + oldest_article = 15 + language = 'fr' max_articles_per_feed = 20 no_stylesheets = True use_embedded_content = False timefmt = ' [%d %b %Y]' + filterDuplicates = True keep_only_tags = [ dict(name = 'span', attrs = {'class': 'movieMainTitle'}) @@ -35,6 +35,13 @@ class Cinebel(BasicNewsRecipe): ,(u'Top 10' , u'http://www.cinebel.be/Servlets/RssServlet?languageCode=fr&rssType=2' ) ] + def preprocess_html(self, soup): + for alink in soup.findAll('a'): + if alink.has_key('href'): + tstr = "Site officiel: " + alink['href'] + alink.replaceWith(tstr) + return soup + def get_cover_url(self): cover_url = 'http://www.cinebel.be/portal/resources/common/logo_index.gif' return cover_url diff --git a/resources/recipes/dhnet_be.recipe b/resources/recipes/dhnet_be.recipe index ef4d1736e3..d55470a765 100644 --- a/resources/recipes/dhnet_be.recipe +++ b/resources/recipes/dhnet_be.recipe @@ -1,7 +1,7 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2008, Lionel Bergeret ' +__copyright__ = '2008-2011, Lionel Bergeret ' ''' dhnet.be ''' @@ -16,7 +16,8 @@ class DHNetBe(BasicNewsRecipe): publisher = u'dhnet.be' category = 'news, Belgium' oldest_article = 3 - language = 'fr_BE' + language = 'fr' + masthead_url = 'http://www.dhnet.be/images/homepage_logo_dh.gif' max_articles_per_feed = 20 no_stylesheets = True @@ -34,6 +35,13 @@ class DHNetBe(BasicNewsRecipe): ,(u'La Une Info' , u'http://www.dhnet.be/rss/dhinfos/' ) ] + def preprocess_html(self, soup): + for alink in soup.findAll('a'): + if alink.string is not None: + tstr = alink.string + alink.replaceWith(tstr) + return soup + def get_cover_url(self): cover_url = strftime('http://pdf-online.dhnet.be/pdfonline/image/%Y%m%d/dh_%Y%m%d_nam_infoge_001.pdf.L.jpg') return cover_url diff --git a/resources/recipes/europa_press.recipe b/resources/recipes/europa_press.recipe new file mode 100644 index 0000000000..ace0f8b6d1 --- /dev/null +++ b/resources/recipes/europa_press.recipe @@ -0,0 +1,55 @@ +__license__ = 'GPL v3' +__author__ = 'Luis Hernandez' +__copyright__ = 'Luis Hernandez' +__version__ = 'v1.0' +__date__ = '30 January 2011' + +''' +www.europapress.es +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe1294946868(BasicNewsRecipe): + + title = u'Europa Press' + author = 'Luis Hernandez' + description = 'spanish news agency' + + oldest_article = 2 + max_articles_per_feed = 100 + + remove_javascript = True + no_stylesheets = True + use_embedded_content = False + + language = 'es' + timefmt = '[%a, %d %b, %Y]' + + remove_tags_before = dict(name='div' , attrs={'class':['nivel1 bg_3col']}) + remove_tags_after = dict(name='div' , attrs={'id':['ImprimirEnviarNoticia']}) + + remove_tags = [ + dict(name='ul', attrs={'id':['entidadesNoticia','MenuSecciones']}) + ,dict(name='div', attrs={'id':['ImprimirEnviarNoticia','PublicidadSuperior','CabeceraDerecha','Comentarios','comentarios full fbConnectAPI','ComentarEstaNoticia','ctl00_Superior_Main_MasEnChance_cajamasnoticias','gl_chn','videos_portada_derecha','galeria_portada_central','galeria_portada_central_boxes']}) + ,dict(name='div', attrs={'class':['infoRelacionada','col_1','buscador','caja doblecolumna strong','CHANCE_EP_Encuesta_frontal text','seccionportada col_0','seccion header','text','pie caption_over']}) + ,dict(name='a', attrs={'class':['buscadorLabel']}) + ,dict(name='span', attrs={'class':['editado']}) + ,dict(name='table') + ,dict(name='li') + ] + + + feeds = [ + (u'Portada' , u'http://www.europapress.es/rss/rss.aspx') + ,(u'Nacional' , u'http://www.europapress.es/rss/rss.aspx?ch=66') + ,(u'Internacional' , u'http://www.europapress.es/rss/rss.aspx?ch=69') + ,(u'Economia' , u'http://www.europapress.es/rss/rss.aspx?ch=136') + ,(u'Deportes' , u'http://www.europapress.es/rss/rss.aspx?ch=67') + ,(u'Cultura' , u'http://www.europapress.es/rss/rss.aspx?ch=126') + ,(u'Sociedad' , u'http://www.europapress.es/rss/rss.aspx?ch=73') + ,(u'Motor' , u'http://www.europapress.es/rss/rss.aspx?ch=435') + ,(u'CHANCE' , u'http://www.europapress.es/rss/rss.aspx?ch=549') + ,(u'Comunicados' , u'http://www.europapress.es/rss/rss.aspx?ch=137') + ] + diff --git a/resources/recipes/irish_times.recipe b/resources/recipes/irish_times.recipe index 0ac130ed7a..83ea496b2c 100644 --- a/resources/recipes/irish_times.recipe +++ b/resources/recipes/irish_times.recipe @@ -35,7 +35,7 @@ class IrishTimes(BasicNewsRecipe): def print_version(self, url): if url.count('rss.feedsportal.com'): u = 'http://www.irishtimes.com' + \ - (((url[69:].replace('0C','/')).replace('0A','0'))).replace('0Bhtml/story01.htm','_pf.html') + (((url[70:].replace('0C','/')).replace('0A','0'))).replace('0Bhtml/story01.htm','_pf.html') else: u = url.replace('.html','_pf.html') return u diff --git a/resources/recipes/kopalniawiedzy.recipe b/resources/recipes/kopalniawiedzy.recipe new file mode 100644 index 0000000000..79aa913498 --- /dev/null +++ b/resources/recipes/kopalniawiedzy.recipe @@ -0,0 +1,80 @@ +# -*- coding: utf-8 -*- +__license__ = 'GPL v3' +__copyright__ = '2011, Attis ' +__version__ = 'v. 0.1' + +import re +from calibre.web.feeds.recipes import BasicNewsRecipe + +class KopalniaWiedzy(BasicNewsRecipe): + title = u'Kopalnia Wiedzy' + publisher = u'Kopalnia Wiedzy' + description = u'Ciekawostki ze świata nauki i techniki' + encoding = 'utf-8' + __author__ = 'Attis' + language = 'pl' + oldest_article = 7 + max_articles_per_feed = 100 + INDEX = u'http://kopalniawiedzy.pl/' + remove_javascript = True + no_stylesheets = True + + remove_tags = [{'name':'p', 'attrs': {'class': 'keywords'} }] + remove_tags_after = dict(attrs={'class':'ad-square'}) + keep_only_tags = [dict(name="div", attrs={'id':'articleContent'})] + extra_css = '.topimage {margin-top: 30px}' + + preprocess_regexps = [ + (re.compile(u''), + lambda match: '' ), + (re.compile(u'

'), + lambda match: '') + ] + + feeds = [ + (u'Biologia', u'http://kopalniawiedzy.pl/wiadomosci_biologia.rss'), + (u'Medycyna', u'http://kopalniawiedzy.pl/wiadomosci_medycyna.rss'), + (u'Psychologia', u'http://kopalniawiedzy.pl/wiadomosci_psychologia.rss'), + (u'Technologie', u'http://kopalniawiedzy.pl/wiadomosci_technologie.rss'), + (u'Ciekawostki', u'http://kopalniawiedzy.pl/wiadomosci_ciekawostki.rss'), + (u'Artykuły', u'http://kopalniawiedzy.pl/artykuly.rss') + ] + + def is_link_wanted(self, url, tag): + return tag['class'] == 'next' + + def remove_beyond(self, tag, next): + while tag is not None and getattr(tag, 'name', None) != 'body': + after = getattr(tag, next) + while after is not None: + ns = getattr(tag, next) + after.extract() + after = ns + tag = tag.parent + + def append_page(self, soup, appendtag, position): + pager = soup.find('a',attrs={'class':'next'}) + if pager: + nexturl = self.INDEX + pager['href'] + soup2 = self.index_to_soup(nexturl) + texttag = soup2.find('div', attrs={'id':'articleContent'}) + + tag = texttag.find(attrs={'class':'pages'}) + self.remove_beyond(tag, 'nextSibling') + + newpos = len(texttag.contents) + self.append_page(soup2,texttag,newpos) + + appendtag.insert(position,texttag) + + + def preprocess_html(self, soup): + self.append_page(soup, soup.body, 3) + + for item in soup.findAll('div',attrs={'class':'pages'}): + item.extract() + + for item in soup.findAll('p', attrs={'class':'wykop'}): + item.extract() + + return soup diff --git a/resources/recipes/korespondent.recipe b/resources/recipes/korespondent.recipe new file mode 100644 index 0000000000..aa9cf6e828 --- /dev/null +++ b/resources/recipes/korespondent.recipe @@ -0,0 +1,40 @@ +# -*- coding: utf-8 -*- + +__license__ = 'GPL v3' +__copyright__ = '2011, Attis ' +__version__ = 'v. 0.1' + +import re +from calibre.web.feeds.recipes import BasicNewsRecipe + +class KorespondentPL(BasicNewsRecipe): + title = u'Korespondent.pl' + publisher = u'Korespondent.pl' + description = u'Centrum wolnorynkowe - serwis ludzi wolnych' + encoding = 'utf-8' + __author__ = 'Attis' + language = 'pl' + oldest_article = 15 + max_articles_per_feed = 100 + remove_javascript = True + no_stylesheets = True + + keep_only_tags = [dict(name='div', attrs={'class':'publicystyka'})] + remove_tags = [{'name': 'meta'}, {'name':'div', 'attrs': {'class': 'zdjecie'} }] + extra_css = '.naglowek {font-size: small}\n .tytul {font-size: x-large; padding-bottom: 10px; padding-top: 30px} \n .external {font-size: small}' + + preprocess_regexps = [ + (re.compile(u'' ), + (re.compile(u'

Więcej'), + lambda match:'Więcej' ), + (re.compile(u'target="_blank"'), + lambda match:'target="_blank" class="external"' ), + (re.compile(u'

\nPoczytaj inne teksty w Serwisie wolnorynkowym Korespondent.pl.*', re.DOTALL|re.IGNORECASE), + lambda match: ''), + ] + + feeds = [(u'Serwis informacyjny', u'http://korespondent.pl/rss.xml')] + diff --git a/resources/recipes/lalibre_be.recipe b/resources/recipes/lalibre_be.recipe index 53e346bf12..a6356be828 100644 --- a/resources/recipes/lalibre_be.recipe +++ b/resources/recipes/lalibre_be.recipe @@ -1,7 +1,7 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2008, Lionel Bergeret ' +__copyright__ = '2008-2011, Lionel Bergeret ' ''' lalibre.be ''' @@ -16,18 +16,18 @@ class LaLibre(BasicNewsRecipe): publisher = u'lalibre.be' category = 'news, Belgium' oldest_article = 3 - language = 'fr_BE' + language = 'fr' + masthead_url = 'http://www.lalibre.be/img/logoLaLibre.gif' max_articles_per_feed = 20 no_stylesheets = True use_embedded_content = False timefmt = ' [%d %b %Y]' - keep_only_tags = [ - dict(name = 'div', attrs = {'id': 'articleHat'}) - ,dict(name = 'p', attrs = {'id': 'publicationDate'}) - ,dict(name = 'div', attrs = {'id': 'articleText'}) - ] + remove_tags_before = dict(name = 'div', attrs = {'class': 'extraMainContent'}) + remove_tags_after = dict(name = 'div', attrs = {'id': 'articleText'}) + + remove_tags = [dict(name = 'div', attrs = {'id': 'strongArticleLinks'})] feeds = [ (u'L\'actu' , u'http://www.lalibre.be/rss/?section=10' ) @@ -38,6 +38,13 @@ class LaLibre(BasicNewsRecipe): ,(u'Societe' , u'http://www.lalibre.be/rss/?section=12' ) ] + def preprocess_html(self, soup): + for alink in soup.findAll('a'): + if alink.string is not None: + tstr = alink.string + alink.replaceWith(tstr) + return soup + def get_cover_url(self): cover_url = strftime('http://pdf-online.lalibre.be/pdfonline/image/%Y%m%d/llb_%Y%m%d_nam_libre_001.pdf.L.jpg') return cover_url diff --git a/resources/recipes/lameuse_be.recipe b/resources/recipes/lameuse_be.recipe index 03b7f84a5f..7166d01103 100644 --- a/resources/recipes/lameuse_be.recipe +++ b/resources/recipes/lameuse_be.recipe @@ -1,7 +1,7 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2008, Lionel Bergeret ' +__copyright__ = '2008-2011, Lionel Bergeret ' ''' lameuse.be ''' @@ -16,8 +16,8 @@ class LaMeuse(BasicNewsRecipe): publisher = u'lameuse.be' category = 'news, Belgium' oldest_article = 3 - encoding = 'utf8' - language = 'fr_BE' + language = 'fr' + masthead_url = 'http://www.lameuse.be/images/SPV3/logo_header_LM.gif' max_articles_per_feed = 20 no_stylesheets = True @@ -32,6 +32,11 @@ class LaMeuse(BasicNewsRecipe): dict(name = 'div', attrs = {'class': 'sb-group'}) ,dict(name = 'div', attrs = {'id': 'share'}) ,dict(name = 'div', attrs = {'id': 'commentaires'}) + ,dict(name = 'ul', attrs = {'class': 'right liensutiles'}) + ,dict(name = 'ul', attrs = {'class': 'bas liensutiles'}) + ,dict(name = 'p', attrs = {'class': 'ariane'}) + ,dict(name = 'div', attrs = {'class': 'inner-bloc'}) + ,dict(name = 'div', attrs = {'class': 'block-01'}) ] feeds = [ diff --git a/resources/recipes/lavenir_be.recipe b/resources/recipes/lavenir_be.recipe index 68be449ae5..4c2c8a00a2 100644 --- a/resources/recipes/lavenir_be.recipe +++ b/resources/recipes/lavenir_be.recipe @@ -1,7 +1,7 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2008, Lionel Bergeret ' +__copyright__ = '2008-2011, Lionel Bergeret ' ''' lavenir.net ''' @@ -15,8 +15,7 @@ class LAvenir(BasicNewsRecipe): publisher = u'lavenir.net' category = 'news, Belgium' oldest_article = 3 - encoding = 'utf8' - language = 'fr_BE' + language = 'fr' max_articles_per_feed = 20 no_stylesheets = True @@ -35,6 +34,13 @@ class LAvenir(BasicNewsRecipe): ,(u'Societe' , u'http://www.lavenir.net/rss.aspx?foto=1&intro=1§ion=info&info=12e1a2f4-7e03-4cf1-afec-016869072317' ) ] + def preprocess_html(self, soup): + for alink in soup.findAll('a'): + if alink.string is not None: + tstr = alink.string + alink.replaceWith(tstr) + return soup + def get_cover_url(self): cover_url = 'http://www.lavenir.net/extra/Static/journal/Pdf/1/UNE_Nationale.PDF' return cover_url diff --git a/resources/recipes/lesoir_be.recipe b/resources/recipes/lesoir_be.recipe index 6b6891c3b8..64fd2fa65c 100644 --- a/resources/recipes/lesoir_be.recipe +++ b/resources/recipes/lesoir_be.recipe @@ -1,7 +1,7 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2008, Lionel Bergeret ' +__copyright__ = '2008-2011, Lionel Bergeret ' ''' lesoir.be ''' @@ -16,7 +16,8 @@ class LeSoirBe(BasicNewsRecipe): publisher = u'lesoir.be' category = 'news, Belgium' oldest_article = 3 - language = 'fr_BE' + language = 'fr' + masthead_url = 'http://pdf.lesoir.be/pdf/images/SOIR//logo.gif' max_articles_per_feed = 20 no_stylesheets = True diff --git a/resources/recipes/new_yorker.recipe b/resources/recipes/new_yorker.recipe index d69a4df24f..9eeb8b31ee 100644 --- a/resources/recipes/new_yorker.recipe +++ b/resources/recipes/new_yorker.recipe @@ -54,10 +54,10 @@ class NewYorker(BasicNewsRecipe): ,dict(attrs={'id':['show-header','show-footer'] }) ] remove_attributes = ['lang'] - feeds = [(u'The New Yorker', u'http://www.newyorker.com/services/rss/feeds/everything.xml')] + feeds = [(u'The New Yorker', u'http://www.newyorker.com/services/mrss/feeds/everything.xml')] def print_version(self, url): - return 'http://www.newyorker.com' + url + '?printable=true' + return url + '?printable=true' def image_url_processor(self, baseurl, url): return url.strip() diff --git a/resources/recipes/radio_prague.recipe b/resources/recipes/radio_prague.recipe new file mode 100644 index 0000000000..2e228e06a9 --- /dev/null +++ b/resources/recipes/radio_prague.recipe @@ -0,0 +1,43 @@ + +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe1291540961(BasicNewsRecipe): + + title = u'Radio Praha' + __author__ = 'Francois Pellicaan' + description = 'News and information from and about The Czech republic. ' + oldest_article = 7 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + remove_empty_feeds = True + encoding = 'utf8' + publisher = 'Radio Prague' + category = 'News' + language = 'en_CZ' + publication_type = 'newsportal' + + extra_css = 'h1 .section { display: block; text-transform: uppercase; font-size: 10px; margin-top: 4em; } \n .title { font-size: 14px; margin-top: 4em; } \n a.photo { display: block; clear:both; } \n .caption { font-size: 9px; display: block; clear:both; padding:0px 0px 20px 0px; } \n a { font-type: normal; }' + + + keep_only_tags = [ + dict(name='div', attrs={'class':['main']}) + ] + remove_tags = [ + dict(name='div', attrs={'class':['cleaner', 'options', 'toolsXXL']}), + dict(name='ul', attrs={'class':['tools']}) + ] + feeds = [ + (u'Current Affairs', 'http://www.radio.cz/feeds/rss/en/themes/curraffrs.xml'), + (u'Society', 'http://www.radio.cz/feeds/rss/en/themes/society.xml'), + (u'European Union', 'http:http://www.radio.cz/feeds/rss/en/themes/eu.xml'), + (u'Foreign policy', 'http://www.radio.cz/feeds/rss/en/themes/foreignpolicy.xml'), + (u'Business', 'http://www.radio.cz/feeds/rss/en/themes/business.xml'), + (u'Culture', 'http://www.radio.cz/feeds/rss/en/themes/culture.xml'), + (u'Czechs abroad', 'http://www.radio.cz/feeds/rss/en/themes/czechabroad.xml'), + (u'History', 'http://www.radio.cz/feeds/rss/en/themes/history.xml'), + (u'Nature', 'http://www.radio.cz/feeds/rss/en/themes/nature.xml'), + (u'Science', 'http://www.radio.cz/feeds/rss/en/themes/science.xml'), + (u'Sport', 'http://www.radio.cz/feeds/rss/en/themes/sport.xml'), + (u'Travel', 'http://www.radio.cz/feeds/rss/en/themes/travel.xml'), + ] diff --git a/resources/recipes/radio_praha.recipe b/resources/recipes/radio_praha.recipe new file mode 100644 index 0000000000..9f14a55e40 --- /dev/null +++ b/resources/recipes/radio_praha.recipe @@ -0,0 +1,44 @@ +# -*- coding: utf-8 -*- + +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe1291540961(BasicNewsRecipe): + + title = u'Radio Praha' + __author__ = 'Francois Pellicaan' + description = u'Česká oficiální mezinárodní vysílací stanice.' + oldest_article = 7 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + remove_empty_feeds = True + encoding = 'utf8' + publisher = u'Český rozhlas' + category = 'News' + language = 'cs' + publication_type = 'newsportal' + + extra_css = u'h1 .section { display: block; text-transform: uppercase; font-size: 10px; margin-top: 4em; } \n .title { font-size: 14px; margin-top: 4em; } \n a.photo { display: block; clear:both; } \n .caption { font-size: 9px; display: block; clear:both; padding:0px 0px 20px 0px; } \n a { font-type: normal; }' + + + keep_only_tags = [ + dict(name='div', attrs={'class':['main']}) + ] + remove_tags = [ + dict(name='div', attrs={'class':['cleaner', 'options', 'toolsXXL']}), + dict(name='ul', attrs={'class':['tools']}) + ] + feeds = [ + (u'Domácí politika', 'http://www.radio.cz/feeds/rss/cs/oblast/dompol.xml'), + (u'Společnost', 'http://www.radio.cz/feeds/rss/cs/oblast/spolecnost.xml'), + (u'Evropská unie', 'http://www.radio.cz/feeds/rss/cs/oblast/eu.xml'), + (u'Zahraniční politika', 'http://www.radio.cz/feeds/rss/cs/oblast/zahrpol.xml'), + (u'Ekonomika', 'http://www.radio.cz/feeds/rss/cs/oblast/ekonomika.xml'), + (u'Kultura', 'http://www.radio.cz/feeds/rss/cs/oblast/kultura.xml'), + (u'Krajané', 'http://www.radio.cz/feeds/rss/cs/oblast/krajane.xml'), + (u'Historie', 'http://www.radio.cz/feeds/rss/cs/oblast/historie.xml'), + (u'Příroda', 'http://www.radio.cz/feeds/rss/cs/oblast/priroda.xml'), + (u'Věda', 'http://www.radio.cz/feeds/rss/cs/oblast/veda.xml'), + (u'Sport', 'http://www.radio.cz/feeds/rss/cs/oblast/sport.xml'), + (u'Cestování', 'http://www.radio.cz/feeds/rss/cs/oblast/cestovani.xml'), + ] diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index d48bbb3d62..b1688e58c6 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -325,6 +325,17 @@ class TXTMetadataReader(MetadataReaderPlugin): from calibre.ebooks.metadata.txt import get_metadata return get_metadata(stream) +class TXTZMetadataReader(MetadataReaderPlugin): + + name = 'Read TXTZ metadata' + file_types = set(['txtz']) + description = _('Read metadata from %s files') % 'TXTZ' + author = 'John Schember' + + def get_metadata(self, stream, ftype): + from calibre.ebooks.metadata.txtz import get_metadata + return get_metadata(stream) + class ZipMetadataReader(MetadataReaderPlugin): name = 'Read ZIP metadata' @@ -412,6 +423,17 @@ class TOPAZMetadataWriter(MetadataWriterPlugin): from calibre.ebooks.metadata.topaz import set_metadata set_metadata(stream, mi) +class TXTZMetadataWriter(MetadataWriterPlugin): + + name = 'Set TXTZ metadata' + file_types = set(['txtz']) + description = _('Set metadata from %s files') % 'TXTZ' + author = 'John Schember' + + def set_metadata(self, stream, mi, type): + from calibre.ebooks.metadata.txtz import set_metadata + set_metadata(stream, mi) + # }}} from calibre.ebooks.comic.input import ComicInput @@ -446,6 +468,7 @@ from calibre.ebooks.rb.output import RBOutput from calibre.ebooks.rtf.output import RTFOutput from calibre.ebooks.tcr.output import TCROutput from calibre.ebooks.txt.output import TXTOutput +from calibre.ebooks.txt.output import TXTZOutput from calibre.ebooks.html.output import HTMLOutput from calibre.ebooks.snb.output import SNBOutput @@ -534,6 +557,7 @@ plugins += [ RTFOutput, TCROutput, TXTOutput, + TXTZOutput, HTMLOutput, SNBOutput, ] @@ -770,6 +794,17 @@ class Toolbar(PreferencesPlugin): description = _('Customize the toolbars and context menus, changing which' ' actions are available in each') +class Search(PreferencesPlugin): + name = 'Search' + icon = I('search.png') + gui_name = _('Customize searching') + category = 'Interface' + gui_category = _('Interface') + category_order = 1 + name_order = 5 + config_widget = 'calibre.gui2.preferences.search' + description = _('Customize the way searching for books works in calibre') + class InputOptions(PreferencesPlugin): name = 'Input Options' icon = I('arrow-down.png') @@ -920,7 +955,7 @@ class Misc(PreferencesPlugin): config_widget = 'calibre.gui2.preferences.misc' description = _('Miscellaneous advanced configuration') -plugins += [LookAndFeel, Behavior, Columns, Toolbar, InputOptions, +plugins += [LookAndFeel, Behavior, Columns, Toolbar, Search, InputOptions, CommonOptions, OutputOptions, Adding, Saving, Sending, Plugboard, Email, Server, Plugins, Tweaks, Misc, TemplateFunctions] diff --git a/src/calibre/devices/android/driver.py b/src/calibre/devices/android/driver.py index 5912e40a69..e9021461eb 100644 --- a/src/calibre/devices/android/driver.py +++ b/src/calibre/devices/android/driver.py @@ -19,10 +19,15 @@ class ANDROID(USBMS): VENDOR_ID = { # HTC - 0x0bb4 : { 0x0c02 : [0x100, 0x0227, 0x0226], 0x0c01 : [0x100, - 0x0227, 0x0226], 0x0ff9 - : [0x0100, 0x0227, 0x0226], 0x0c87: [0x0100, 0x0227, 0x0226], - 0xc92 : [0x100], 0xc97: [0x226], 0xc99 : [0x0100]}, + 0x0bb4 : { 0x0c02 : [0x100, 0x0227, 0x0226], + 0x0c01 : [0x100, 0x0227, 0x0226], + 0x0ff9 : [0x0100, 0x0227, 0x0226], + 0x0c87 : [0x0100, 0x0227, 0x0226], + 0xc92 : [0x100], + 0xc97 : [0x226], + 0xc99 : [0x0100], + 0xca3 : [0x100], + }, # Eken 0x040d : { 0x8510 : [0x0001], 0x0851 : [0x1] }, @@ -57,6 +62,9 @@ class ANDROID(USBMS): # Archos 0x0e79 : { 0x1419: [0x0216], 0x1420 : [0x0216], 0x1422 : [0x0216]}, + # Huawei + 0x45e : { 0x00e1 : [0x007], }, + } EBOOK_DIR_MAIN = ['eBooks/import', 'wordplayer/calibretransfer', 'Books'] EXTRA_CUSTOMIZATION_MESSAGE = _('Comma separated list of directories to ' @@ -66,12 +74,13 @@ class ANDROID(USBMS): VENDOR_NAME = ['HTC', 'MOTOROLA', 'GOOGLE_', 'ANDROID', 'ACER', 'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX', 'GOOGLE', 'ARCHOS', - 'TELECHIP'] + 'TELECHIP', 'HUAWEI', ] WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE', '__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897', 'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'SCH-I500_CARD', 'SPH-D700_CARD', 'MB810', 'GT-P1000', 'DESIRE', - 'SGH-T849', '_MB300', 'A70S', 'S_ANDROID', 'A101IT', 'A70H'] + 'SGH-T849', '_MB300', 'A70S', 'S_ANDROID', 'A101IT', 'A70H', + 'IDEOS_TABLET'] WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD', 'A70S', 'A101IT'] diff --git a/src/calibre/devices/eb600/driver.py b/src/calibre/devices/eb600/driver.py index e38f72aea5..5374c6c4e2 100644 --- a/src/calibre/devices/eb600/driver.py +++ b/src/calibre/devices/eb600/driver.py @@ -172,10 +172,10 @@ class INVESBOOK(EB600): gui_name = 'Inves Book 600' FORMATS = ['epub', 'mobi', 'prc', 'fb2', 'html', 'pdf', 'rtf', 'txt'] + BCD = [0x110, 0x323] - VENDOR_NAME = 'INVES_E6' - WINDOWS_MAIN_MEM = '00INVES_E600' - WINDOWS_CARD_A_MEM = '00INVES_E600' + VENDOR_NAME = ['INVES_E6', 'INVES-WI'] + WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['00INVES_E600', 'INVES-WIBOOK'] class BOOQ(EB600): name = 'Booq Device Interface' diff --git a/src/calibre/ebooks/__init__.py b/src/calibre/ebooks/__init__.py index 4dc97f43ed..49604ae682 100644 --- a/src/calibre/ebooks/__init__.py +++ b/src/calibre/ebooks/__init__.py @@ -25,7 +25,7 @@ class DRMError(ValueError): class ParserError(ValueError): pass -BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'htm', 'xhtm', +BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'txtz', 'htm', 'xhtm', 'html', 'xhtml', 'pdf', 'pdb', 'pdr', 'prc', 'mobi', 'azw', 'doc', 'epub', 'fb2', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip', 'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'mbp', 'tan', 'snb'] diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index 691aa307d7..6fafbb992e 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -245,17 +245,17 @@ class Dehyphenator(object): self.html = html self.format = format if format == 'html': - intextmatch = re.compile(u'(?<=.{%i})(?P[^\[\]\\\^\$\.\|\?\*\+\(\)“"\s>]+)(-|‐)\s*(?=<)(?P()?\s*(\s*){1,2}(?P<(p|div)[^>]*>\s*(]*>\s*

\s*)?\s+){0,3}\s*(<[iubp][^>]*>\s*){1,2}(]*>)?)\s*(?P[\w\d]+)' % length) + intextmatch = re.compile(u'(?<=.{%i})(?P[^\W\-]+)(-|‐)\s*(?=<)(?P()?\s*(\s*){1,2}(?P<(p|div)[^>]*>\s*(]*>\s*

\s*)?\s+){0,3}\s*(<[iubp][^>]*>\s*){1,2}(]*>)?)\s*(?P[\w\d]+)' % length) elif format == 'pdf': - intextmatch = re.compile(u'(?<=.{%i})(?P[^\[\]\\\^\$\.\|\?\*\+\(\)“"\s>]+)(-|‐)\s*(?P

|\s*

\s*<[iub]>)\s*(?P[\w\d]+)'% length) + intextmatch = re.compile(u'(?<=.{%i})(?P[^\W\-]+)(-|‐)\s*(?P

|\s*

\s*<[iub]>)\s*(?P[\w\d]+)'% length) elif format == 'txt': - intextmatch = re.compile(u'(?<=.{%i})(?P[^\[\]\\\^\$\.\|\?\*\+\(\)“"\s>]+)(-|‐)(\u0020|\u0009)*(?P(\n(\u0020|\u0009)*)+)(?P[\w\d]+)'% length) + intextmatch = re.compile(u'(?<=.{%i})(?P[^\W\-]+)(-|‐)(\u0020|\u0009)*(?P(\n(\u0020|\u0009)*)+)(?P[\w\d]+)'% length) elif format == 'individual_words': - intextmatch = re.compile(u'(?!<)(?P\w+)(-|‐)\s*(?P\w+)(?![^<]*?>)') + intextmatch = re.compile(u'(?!<)(?P[^\W\-]+)(-|‐)\s*(?P\w+)(?![^<]*?>)') elif format == 'html_cleanup': - intextmatch = re.compile(u'(?P[^\[\]\\\^\$\.\|\?\*\+\(\)“"\s>]+)(-|‐)\s*(?=<)(?P\s*(\s*<[iubp][^>]*>\s*)?]*>|\s*<[iubp][^>]*>)?\s*(?P[\w\d]+)') + intextmatch = re.compile(u'(?P[^\W\-]+)(-|‐)\s*(?=<)(?P\s*(\s*<[iubp][^>]*>\s*)?]*>|\s*<[iubp][^>]*>)?\s*(?P[\w\d]+)') elif format == 'txt_cleanup': - intextmatch = re.compile(u'(?P\w+)(-|‐)(?P\s+)(?P[\w\d]+)') + intextmatch = re.compile(u'(?P[^\W\-]+)(-|‐)(?P\s+)(?P[\w\d]+)') html = intextmatch.sub(self.dehyphenate, html) diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py index 63eca10714..e58bbca1bd 100644 --- a/src/calibre/ebooks/conversion/utils.py +++ b/src/calibre/ebooks/conversion/utils.py @@ -11,6 +11,7 @@ from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator from calibre.utils.logging import default_log from calibre.utils.wordcount import get_wordcount_obj + class HeuristicProcessor(object): def __init__(self, extra_opts=None, log=None): @@ -34,10 +35,15 @@ class HeuristicProcessor(object): self.line_close = "()?\s*()?\s*()?\s*" self.single_blank = re.compile(r'(\s*]*>\s*

)', re.IGNORECASE) self.scene_break_open = '

' + self.common_in_text_endings = u'[\"\'—’”,\.!\?\…\)„\w]' + self.common_in_text_beginnings = u'[\w\'\"“‘‛]' def is_pdftohtml(self, src): return '' in src[:1000] + def is_abbyy(self, src): + return '' + return '<'+tag+' '+pstyle+'>' else: - return '

'+span + return '<'+tag+' '+pstyle+'>'+span else: if not span: - return '

' + return '<'+tag+' style="text-indent:3%">' else: - return '

'+span + return '<'+tag+' style="text-indent:3%">'+span def no_markup(self, raw, percent): ''' @@ -149,17 +156,17 @@ class HeuristicProcessor(object): ] ITALICIZE_STYLE_PATS = [ - r'(?msu)(?<=\s)_(?P\S[^_]{0,40}?\S)?_(?=[\s\.,\!\?])', - r'(?msu)(?<=\s)/(?P\S[^/]{0,40}?\S)?/(?=[\s\.,\!\?])', - r'(?msu)(?<=\s)~~(?P\S[^~]{0,40}?\S)?~~(?=[\s\.,\!\?])', - r'(?msu)(?<=\s)\*(?P\S[^\*]{0,40}?\S)?\*(?=[\s\.,\!\?])', - r'(?msu)(?<=\s)~(?P\S[^~]{0,40}?\S)?~(?=[\s\.,\!\?])', - r'(?msu)(?<=\s)_/(?P\S[^/_]{0,40}?\S)?/_(?=[\s\.,\!\?])', - r'(?msu)(?<=\s)_\*(?P\S[^\*_]{0,40}?\S)?\*_(?=[\s\.,\!\?])', - r'(?msu)(?<=\s)\*/(?P\S[^/\*]{0,40}?\S)?/\*(?=[\s\.,\!\?])', - r'(?msu)(?<=\s)_\*/(?P\S[^\*_]{0,40}?\S)?/\*_(?=[\s\.,\!\?])', - r'(?msu)(?<=\s)/:(?P\S[^:/]{0,40}?\S)?:/(?=[\s\.,\!\?])', - r'(?msu)(?<=\s)\|:(?P\S[^:\|]{0,40}?\S)?:\|(?=[\s\.,\!\?])', + r'(?msu)(?<=[\s>])_(?P[^_]+)?_', + r'(?msu)(?<=[\s>])/(?P[^/]+)?/', + r'(?msu)(?<=[\s>])~~(?P[^~]+)?~~', + r'(?msu)(?<=[\s>])\*(?P[^\*]+)?\*', + r'(?msu)(?<=[\s>])~(?P[^~]+)?~', + r'(?msu)(?<=[\s>])_/(?P[^/_]+)?/_', + r'(?msu)(?<=[\s>])_\*(?P[^\*_]+)?\*_', + r'(?msu)(?<=[\s>])\*/(?P[^/\*]+)?/\*', + r'(?msu)(?<=[\s>])_\*/(?P[^\*_]+)?/\*_', + r'(?msu)(?<=[\s>])/:(?P[^:/]+)?:/', + r'(?msu)(?<=[\s>])\|:(?P[^:\|]+)?:\|', ] for word in ITALICIZE_WORDS: @@ -363,7 +370,7 @@ class HeuristicProcessor(object): return html def fix_nbsp_indents(self, html): - txtindent = re.compile(ur'[^>]*)>\s*(?P(]*>\s*)+)?\s*(\u00a0){2,}', re.IGNORECASE) + txtindent = re.compile(ur'<(?Pp|div)(?P[^>]*)>\s*(?P(]*>\s*)+)?\s*(\u00a0){2,}', re.IGNORECASE) html = txtindent.sub(self.insert_indent, html) if self.found_indents > 1: self.log.debug("replaced "+unicode(self.found_indents)+ " nbsp indents with inline styles") @@ -516,6 +523,111 @@ class HeuristicProcessor(object): return scene_break + def abbyy_processor(self, html): + abbyy_line = re.compile('((?P[^\"]*?);?">)(?P.*?)(?P

)|(?P]*>))', re.IGNORECASE) + empty_paragraph = '\n

\n' + self.in_blockquote = False + self.previous_was_paragraph = False + html = re.sub(']*>', '', html) + + def check_paragraph(content): + content = re.sub('\s*]*>\s*', '', content) + if re.match('.*[\"\'.!?:]$', content): + #print "detected this as a paragraph" + return True + else: + return False + + def convert_styles(match): + #print "raw styles are: "+match.group('styles') + content = match.group('content') + #print "raw content is: "+match.group('content') + image = match.group('image') + + is_paragraph = False + text_align = '' + text_indent = '' + paragraph_before = '' + paragraph_after = '' + blockquote_open = '\n
\n' + blockquote_close = '
\n' + indented_text = 'text-indent:3%;' + blockquote_open_loop = '' + blockquote_close_loop = '' + debugabby = False + + if image: + debugabby = True + if self.in_blockquote: + self.in_blockquote = False + blockquote_close_loop = blockquote_close + self.previous_was_paragraph = False + return blockquote_close_loop+'\n'+image+'\n' + else: + styles = match.group('styles').split(';') + is_paragraph = check_paragraph(content) + #print "styles for this line are: "+str(styles) + split_styles = [] + for style in styles: + #print "style is: "+str(style) + newstyle = style.split(':') + #print "newstyle is: "+str(newstyle) + split_styles.append(newstyle) + styles = split_styles + for style, setting in styles: + if style == 'text-align' and setting != 'left': + text_align = style+':'+setting+';' + if style == 'text-indent': + setting = int(re.sub('\s*pt\s*', '', setting)) + if 9 < setting < 14: + text_indent = indented_text + else: + text_indent = style+':'+str(setting)+'pt;' + if style == 'padding': + setting = re.sub('pt', '', setting).split(' ') + if int(setting[1]) < 16 and int(setting[3]) < 16: + if self.in_blockquote: + debugabby = True + if is_paragraph: + self.in_blockquote = False + blockquote_close_loop = blockquote_close + if int(setting[3]) > 8 and text_indent == '': + text_indent = indented_text + if int(setting[0]) > 5: + paragraph_before = empty_paragraph + if int(setting[2]) > 5: + paragraph_after = empty_paragraph + elif not self.in_blockquote and self.previous_was_paragraph: + debugabby = True + self.in_blockquote = True + blockquote_open_loop = blockquote_open + if debugabby: + self.log.debug('\n\n******\n') + self.log.debug('padding top is: '+str(setting[0])) + self.log.debug('padding right is:' + +str(setting[1])) + self.log.debug('padding bottom is: ' + + str(setting[2])) + self.log.debug('padding left is: ' + +str(setting[3])) + + #print "text-align is: "+str(text_align) + #print "\n***\nline is:\n "+str(match.group(0))+'\n' + if debugabby: + #print "this line is a paragraph = "+str(is_paragraph)+", previous line was "+str(self.previous_was_paragraph) + self.log.debug("styles for this line were:", styles) + self.log.debug('newline is:') + self.log.debug(blockquote_open_loop+blockquote_close_loop+ + paragraph_before+'

'+content+'

'+paragraph_after+'\n\n\n\n\n') + #print "is_paragraph is "+str(is_paragraph)+", previous_was_paragraph is "+str(self.previous_was_paragraph) + self.previous_was_paragraph = is_paragraph + #print "previous_was_paragraph is now set to "+str(self.previous_was_paragraph)+"\n\n\n" + return blockquote_open_loop+blockquote_close_loop+paragraph_before+'

'+content+'

'+paragraph_after + + html = abbyy_line.sub(convert_styles, html) + return html + def __call__(self, html): self.log.debug("********* Heuristic processing HTML *********") @@ -530,6 +642,10 @@ class HeuristicProcessor(object): self.log.warn("flow is too short, not running heuristics") return html + is_abbyy = self.is_abbyy(html) + if is_abbyy: + html = self.abbyy_processor(html) + # Arrange line feeds and

tags so the line_length and no_markup functions work correctly html = self.arrange_htm_line_endings(html) #self.dump(html, 'after_arrange_line_endings') @@ -638,7 +754,7 @@ class HeuristicProcessor(object): blanks_count = len(self.any_multi_blank.findall(html)) if blanks_count >= 1: html = self.merge_blanks(html, blanks_count) - scene_break_regex = self.line_open+'(?![\w\'\"])(?P((?P((?!\s)\W))\s*(?P=break_char)?)+)\s*'+self.line_close + scene_break_regex = self.line_open+'(?!('+self.common_in_text_beginnings+'|.*?'+self.common_in_text_endings+'<))(?P((?P((?!\s)\W))\s*(?P=break_char)?)+)\s*'+self.line_close scene_break = re.compile(r'%s' % scene_break_regex, re.IGNORECASE|re.UNICODE) # If the user has enabled scene break replacement, then either softbreaks # or 'hard' scene breaks are replaced, depending on which is in use diff --git a/src/calibre/ebooks/metadata/txt.py b/src/calibre/ebooks/metadata/txt.py index 79713774e3..70d3c72ae0 100644 --- a/src/calibre/ebooks/metadata/txt.py +++ b/src/calibre/ebooks/metadata/txt.py @@ -1,16 +1,20 @@ -'''Read meta information from TXT files''' - -from __future__ import with_statement +# -*- coding: utf-8 -*- __license__ = 'GPL v3' __copyright__ = '2009, John Schember ' +''' +Read meta information from TXT files +''' + import re from calibre.ebooks.metadata import MetaInformation def get_metadata(stream, extract_cover=True): - """ Return metadata as a L{MetaInfo} object """ + ''' + Return metadata as a L{MetaInfo} object + ''' mi = MetaInformation(_('Unknown'), [_('Unknown')]) stream.seek(0) diff --git a/src/calibre/ebooks/metadata/txtz.py b/src/calibre/ebooks/metadata/txtz.py new file mode 100644 index 0000000000..ae6efb4838 --- /dev/null +++ b/src/calibre/ebooks/metadata/txtz.py @@ -0,0 +1,38 @@ +# -*- coding: utf-8 -*- + +__license__ = 'GPL v3' +__copyright__ = '2011, John Schember ' + +''' +Read meta information from TXT files +''' + +import os + +from cStringIO import StringIO + +from calibre.ebooks.metadata import MetaInformation +from calibre.ebooks.metadata.opf2 import OPF, metadata_to_opf +from calibre.ptempfile import TemporaryDirectory +from calibre.utils.zipfile import ZipFile, safe_replace + +def get_metadata(stream, extract_cover=True): + ''' + Return metadata as a L{MetaInfo} object + ''' + mi = MetaInformation(_('Unknown'), [_('Unknown')]) + stream.seek(0) + + with TemporaryDirectory('_untxtz_mdata') as tdir: + try: + zf = ZipFile(stream) + zf.extract('metadata.opf', tdir) + with open(os.path.join(tdir, 'metadata.opf'), 'rb') as opff: + mi = OPF(opff).to_book_metadata() + except: + return mi + return mi + +def set_metadata(stream, mi): + opf = StringIO(metadata_to_opf(mi)) + safe_replace(stream, 'metadata.opf', opf) diff --git a/src/calibre/ebooks/oeb/transforms/jacket.py b/src/calibre/ebooks/oeb/transforms/jacket.py index 84f2dd5d6a..d3b66d1e81 100644 --- a/src/calibre/ebooks/oeb/transforms/jacket.py +++ b/src/calibre/ebooks/oeb/transforms/jacket.py @@ -15,6 +15,7 @@ from calibre import guess_type, strftime from calibre.ebooks.BeautifulSoup import BeautifulSoup from calibre.ebooks.oeb.base import XPath, XHTML_NS, XHTML from calibre.library.comments import comments_to_html +from calibre.utils.date import is_date_undefined JACKET_XPATH = '//h:meta[@name="calibre-content" and @content="jacket"]' @@ -109,7 +110,7 @@ def get_rating(rating, rchar, e_rchar): def render_jacket(mi, output_profile, alt_title=_('Unknown'), alt_tags=[], alt_comments='', - alt_publisher=('Unknown publisher')): + alt_publisher=('')): css = P('jacket/stylesheet.css', data=True).decode('utf-8') try: @@ -127,10 +128,13 @@ def render_jacket(mi, output_profile, try: publisher = mi.publisher if mi.publisher else alt_publisher except: - publisher = _('Unknown publisher') + publisher = '' try: - pubdate = strftime(u'%Y', mi.pubdate.timetuple()) + if is_date_undefined(mi.pubdate): + pubdate = '' + else: + pubdate = strftime(u'%Y', mi.pubdate.timetuple()) except: pubdate = '' @@ -175,19 +179,24 @@ def render_jacket(mi, output_profile, soup = BeautifulSoup(generated_html) if not series: series_tag = soup.find(attrs={'class':'cbj_series'}) - series_tag.extract() + if series_tag is not None: + series_tag.extract() if not rating: rating_tag = soup.find(attrs={'class':'cbj_rating'}) - rating_tag.extract() + if rating_tag is not None: + rating_tag.extract() if not tags: tags_tag = soup.find(attrs={'class':'cbj_tags'}) - tags_tag.extract() + if tags_tag is not None: + tags_tag.extract() if not pubdate: - pubdate_tag = soup.find(attrs={'class':'cbj_pubdate'}) - pubdate_tag.extract() + pubdate_tag = soup.find(attrs={'class':'cbj_pubdata'}) + if pubdate_tag is not None: + pubdate_tag.extract() if output_profile.short_name != 'kindle': hr_tag = soup.find('hr', attrs={'class':'cbj_kindle_banner_hr'}) - hr_tag.extract() + if hr_tag is not None: + hr_tag.extract() return soup.renderContents(None) diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/txt/input.py index e1392ef732..8ab1524b02 100644 --- a/src/calibre/ebooks/txt/input.py +++ b/src/calibre/ebooks/txt/input.py @@ -4,23 +4,27 @@ __license__ = 'GPL 3' __copyright__ = '2009, John Schember ' __docformat__ = 'restructuredtext en' +import glob import os +from calibre import _ent_pat, xml_entity_to_unicode from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator from calibre.ebooks.chardet import detect from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \ separate_paragraphs_single_line, separate_paragraphs_print_formatted, \ preserve_spaces, detect_paragraph_type, detect_formatting_type, \ - normalize_line_endings, convert_textile -from calibre import _ent_pat, xml_entity_to_unicode + normalize_line_endings, convert_textile, remove_indents, block_to_single_line, \ + separate_hard_scene_breaks +from calibre.ptempfile import TemporaryDirectory +from calibre.utils.zipfile import ZipFile class TXTInput(InputFormatPlugin): name = 'TXT Input' author = 'John Schember' description = 'Convert TXT files to HTML' - file_types = set(['txt']) + file_types = set(['txt', 'txtz']) options = set([ OptionRecommendation(name='paragraph_type', recommended_value='auto', @@ -47,6 +51,9 @@ class TXTInput(InputFormatPlugin): OptionRecommendation(name='preserve_spaces', recommended_value=False, help=_('Normally extra spaces are condensed into a single space. ' 'With this option all spaces will be displayed.')), + OptionRecommendation(name='txt_in_remove_indents', recommended_value=False, + help=_('Normally extra space at the beginning of lines is retained. ' + 'With this option they will be removed.')), OptionRecommendation(name="markdown_disable_toc", recommended_value=False, help=_('Do not insert a Table of Contents into the output text.')), ]) @@ -54,9 +61,23 @@ class TXTInput(InputFormatPlugin): def convert(self, stream, options, file_ext, log, accelerators): self.log = log + txt = '' log.debug('Reading text from file...') + length = 0 - txt = stream.read() + # Extract content from zip archive. + if file_ext == 'txtz': + log.debug('De-compressing content to temporary directory...') + with TemporaryDirectory('_untxtz') as tdir: + zf = ZipFile(stream) + zf.extractall(tdir) + + txts = glob.glob(os.path.join(tdir, '*.txt')) + for t in txts: + with open(t, 'rb') as tf: + txt += tf.read() + else: + txt = stream.read() # Get the encoding of the document. if options.input_encoding: @@ -77,20 +98,6 @@ class TXTInput(InputFormatPlugin): # Normalize line endings txt = normalize_line_endings(txt) - # Detect formatting - if options.formatting_type == 'auto': - options.formatting_type = detect_formatting_type(txt) - log.debug('Auto detected formatting as %s' % options.formatting_type) - - if options.formatting_type == 'heuristic': - setattr(options, 'enable_heuristics', True) - setattr(options, 'markup_chapter_headings', True) - setattr(options, 'italicize_common_cases', True) - setattr(options, 'fix_indents', True) - setattr(options, 'delete_blank_paragraphs', True) - setattr(options, 'format_scene_breaks', True) - setattr(options, 'dehyphenate', True) - # Determine the paragraph type of the document. if options.paragraph_type == 'auto': options.paragraph_type = detect_paragraph_type(txt) @@ -100,27 +107,52 @@ class TXTInput(InputFormatPlugin): else: log.debug('Auto detected paragraph type as %s' % options.paragraph_type) - # Preserve spaces will replace multiple spaces to a space - # followed by the   entity. - if options.preserve_spaces: - txt = preserve_spaces(txt) + # Detect formatting + if options.formatting_type == 'auto': + options.formatting_type = detect_formatting_type(txt) + log.debug('Auto detected formatting as %s' % options.formatting_type) - # Get length for hyphen removal and punctuation unwrap - docanalysis = DocAnalysis('txt', txt) - length = docanalysis.line_length(.5) + if options.formatting_type == 'heuristic': + setattr(options, 'enable_heuristics', True) + setattr(options, 'unwrap_lines', False) + setattr(options, 'smarten_punctuation', True) # Reformat paragraphs to block formatting based on the detected type. # We don't check for block because the processor assumes block. # single and print at transformed to block for processing. - if options.paragraph_type == 'single' or options.paragraph_type == 'unformatted': + if options.paragraph_type == 'single': txt = separate_paragraphs_single_line(txt) elif options.paragraph_type == 'print': + txt = separate_hard_scene_breaks(txt) txt = separate_paragraphs_print_formatted(txt) + txt = block_to_single_line(txt) elif options.paragraph_type == 'unformatted': from calibre.ebooks.conversion.utils import HeuristicProcessor # unwrap lines based on punctuation + docanalysis = DocAnalysis('txt', txt) + length = docanalysis.line_length(.5) preprocessor = HeuristicProcessor(options, log=getattr(self, 'log', None)) txt = preprocessor.punctuation_unwrap(length, txt, 'txt') + txt = separate_paragraphs_single_line(txt) + else: + txt = separate_hard_scene_breaks(txt) + txt = block_to_single_line(txt) + + if getattr(options, 'enable_heuristics', False) and getattr(options, 'dehyphenate', False): + docanalysis = DocAnalysis('txt', txt) + if not length: + length = docanalysis.line_length(.5) + dehyphenator = Dehyphenator(options.verbose, log=self.log) + txt = dehyphenator(txt,'txt', length) + + # User requested transformation on the text. + if options.txt_in_remove_indents: + txt = remove_indents(txt) + + # Preserve spaces will replace multiple spaces to a space + # followed by the   entity. + if options.preserve_spaces: + txt = preserve_spaces(txt) # Process the text using the appropriate text processor. html = '' @@ -134,14 +166,8 @@ class TXTInput(InputFormatPlugin): elif options.formatting_type == 'textile': log.debug('Running text through textile conversion...') html = convert_textile(txt) - else: log.debug('Running text through basic conversion...') - if options.formatting_type == 'heuristic': - # Dehyphenate - dehyphenator = Dehyphenator(options.verbose, log=self.log) - txt = dehyphenator(txt,'txt', length) - flow_size = getattr(options, 'flow_size', 0) html = convert_basic(txt, epub_split_size_kb=flow_size) @@ -169,4 +195,11 @@ class TXTInput(InputFormatPlugin): {}) options.debug_pipeline = odi os.remove(htmlfile.name) + + # Set metadata from file. + from calibre.customize.ui import get_file_type_metadata + from calibre.ebooks.oeb.transforms.metadata import meta_info_to_oeb_metadata + mi = get_file_type_metadata(stream, file_ext) + meta_info_to_oeb_metadata(mi, oeb.metadata, log) + return oeb diff --git a/src/calibre/ebooks/txt/markdownml.py b/src/calibre/ebooks/txt/markdownml.py index 116561f355..c179378049 100644 --- a/src/calibre/ebooks/txt/markdownml.py +++ b/src/calibre/ebooks/txt/markdownml.py @@ -35,11 +35,9 @@ class MarkdownMLizer(object): html = unicode(etree.tostring(item.data, encoding=unicode)) if not self.opts.keep_links: - html = re.sub(r'<\s*a[^>]*>', '', html) - html = re.sub(r'<\s*/\s*a\s*>', '', html) + html = re.sub(r'<\s*/*\s*a[^>]*>', '', html) if not self.opts.keep_image_references: - html = re.sub(r'<\s*img[^>]*>', '', html) - html = re.sub(r'<\s*img\s*>', '', html) + html = re.sub(r'<\s*img[^>]*>', '', html)\ text = html2text(html) diff --git a/src/calibre/ebooks/txt/output.py b/src/calibre/ebooks/txt/output.py index b73a6e8908..d021cbbba6 100644 --- a/src/calibre/ebooks/txt/output.py +++ b/src/calibre/ebooks/txt/output.py @@ -5,11 +5,18 @@ __copyright__ = '2009, John Schember ' __docformat__ = 'restructuredtext en' import os +import shutil + +from lxml import etree from calibre.customize.conversion import OutputFormatPlugin, \ OptionRecommendation +from calibre.ebooks.oeb.base import OEB_IMAGES from calibre.ebooks.txt.txtml import TXTMLizer from calibre.ebooks.txt.newlines import TxtNewlines, specified_newlines +from calibre.ptempfile import TemporaryDirectory, TemporaryFile +from calibre.utils.cleantext import clean_ascii_chars +from calibre.utils.zipfile import ZipFile class TXTOutput(OutputFormatPlugin): @@ -73,6 +80,7 @@ class TXTOutput(OutputFormatPlugin): writer = TXTMLizer(log) txt = writer.extract_content(oeb_book, opts) + txt = clean_ascii_chars(txt) log.debug('\tReplacing newlines with selected type...') txt = specified_newlines(TxtNewlines(opts.newline).newline, txt) @@ -93,3 +101,32 @@ class TXTOutput(OutputFormatPlugin): if close: out_stream.close() + +class TXTZOutput(TXTOutput): + + name = 'TXTZ Output' + author = 'John Schember' + file_type = 'txtz' + + def convert(self, oeb_book, output_path, input_plugin, opts, log): + with TemporaryDirectory('_txtz_output') as tdir: + # TXT + with TemporaryFile('index.txt') as tf: + TXTOutput.convert(self, oeb_book, tf, input_plugin, opts, log) + shutil.copy(tf, os.path.join(tdir, 'index.txt')) + + # Images + for item in oeb_book.manifest: + if item.media_type in OEB_IMAGES: + path = os.path.join(tdir, os.path.dirname(item.href)) + if not os.path.exists(path): + os.makedirs(path) + with open(os.path.join(tdir, item.href), 'wb') as imgf: + imgf.write(item.data) + + # Metadata + with open(os.path.join(tdir, 'metadata.opf'), 'wb') as mdataf: + mdataf.write(etree.tostring(oeb_book.metadata.to_opf1())) + + txtz = ZipFile(output_path, 'w') + txtz.add_dir(tdir) diff --git a/src/calibre/ebooks/txt/processor.py b/src/calibre/ebooks/txt/processor.py index 193846376e..c5cd4da233 100644 --- a/src/calibre/ebooks/txt/processor.py +++ b/src/calibre/ebooks/txt/processor.py @@ -19,20 +19,23 @@ from calibre.utils.cleantext import clean_ascii_chars HTML_TEMPLATE = u'%s\n%s\n' def clean_txt(txt): + ''' + Run transformations on the text to put it into + consistent state. + ''' if isbytestring(txt): txt = txt.decode('utf-8', 'replace') # Strip whitespace from the end of the line. Also replace # all line breaks with \n. txt = '\n'.join([line.rstrip() for line in txt.splitlines()]) - # Replace whitespace at the beginning of the list with   - txt = re.sub('(?m)(?P[ ]+)', lambda mo: ' ' * mo.groups('space').count(' '), txt) - txt = re.sub('(?m)(?P[\t]+)', lambda mo: ' ' * 4 * mo.groups('space').count('\t'), txt) + # Replace whitespace at the beginning of the line with   + txt = re.sub('(?m)(?<=^)([ ]{2,}|\t+)(?=.)', ' ' * 4, txt) # Condense redundant spaces txt = re.sub('[ ]{2,}', ' ', txt) - # Remove blank lines from the beginning and end of the document. + # Remove blank space from the beginning and end of the document. txt = re.sub('^\s+(?=.)', '', txt) txt = re.sub('(?<=.)\s+$', '', txt) # Remove excessive line breaks. @@ -43,6 +46,15 @@ def clean_txt(txt): return txt def split_txt(txt, epub_split_size_kb=0): + ''' + Ensure there are split points for converting + to EPUB. A misdetected paragraph type can + result in the entire document being one giant + paragraph. In this case the EPUB parser will not + be able to determine where to split the file + to accomidate the EPUB file size limitation + and will fail. + ''' #Takes care if there is no point to split if epub_split_size_kb > 0: if isinstance(txt, unicode): @@ -60,6 +72,12 @@ def split_txt(txt, epub_split_size_kb=0): return txt def convert_basic(txt, title='', epub_split_size_kb=0): + ''' + Converts plain text to html by putting all paragraphs in +

tags. It condense and retains blank lines when necessary. + + Requires paragraphs to be in single line format. + ''' txt = clean_txt(txt) txt = split_txt(txt, epub_split_size_kb) @@ -100,14 +118,37 @@ def separate_paragraphs_single_line(txt): return txt def separate_paragraphs_print_formatted(txt): - txt = re.sub(u'(?miu)^(\t+|[ ]{2,})(?=.)', '\n\t', txt) + txt = re.sub(u'(?miu)^(?P\t+|[ ]{2,})(?=.)', lambda mo: '\n%s' % mo.group('indent'), txt) + return txt + +def separate_hard_scene_breaks(txt): + def sep_break(line): + if len(line.strip()) > 0: + return '\n%s\n' % line + else: + return line + txt = re.sub(u'(?miu)^[ \t-=~\/]+$', lambda mo: sep_break(mo.group()), txt) + return txt + +def block_to_single_line(txt): + txt = re.sub(r'(?<=.)\n(?=.)', ' ', txt) return txt def preserve_spaces(txt): + ''' + Replaces spaces multiple spaces with   entities. + ''' txt = re.sub('(?P[ ]{2,})', lambda mo: ' ' + (' ' * (len(mo.group('space')) - 1)), txt) txt = txt.replace('\t', '    ') return txt +def remove_indents(txt): + ''' + Remove whitespace at the beginning of each line. + ''' + txt = re.sub('(?miu)^\s+', '', txt) + return txt + def opf_writer(path, opf_name, manifest, spine, mi): opf = OPFCreator(path, mi) opf.create_manifest(manifest) @@ -115,7 +156,10 @@ def opf_writer(path, opf_name, manifest, spine, mi): with open(os.path.join(path, opf_name), 'wb') as opffile: opf.render(opffile) -def split_string_separator(txt, size) : +def split_string_separator(txt, size): + ''' + Splits the text by putting \n\n at the point size. + ''' if len(txt) > size: txt = ''.join([re.sub(u'\.(?P[^.]*)$', '.\n\n\g', txt[i:i+size], 1) for i in @@ -124,7 +168,7 @@ def split_string_separator(txt, size) : def detect_paragraph_type(txt): ''' - Tries to determine the formatting of the document. + Tries to determine the paragraph type of the document. block: Paragraphs are separated by a blank line. single: Each line is a paragraph. @@ -167,6 +211,16 @@ def detect_paragraph_type(txt): def detect_formatting_type(txt): + ''' + Tries to determine the formatting of the document. + + markdown: Markdown formatting is used. + textile: Textile formatting is used. + heuristic: When none of the above formatting types are + detected heuristic is returned. + ''' + # Keep a count of the number of format specific object + # that are found in the text. markdown_count = 0 textile_count = 0 @@ -176,9 +230,9 @@ def detect_formatting_type(txt): markdown_count += len(re.findall('(?mu)^=+$', txt)) markdown_count += len(re.findall('(?mu)^-+$', txt)) # Images - markdown_count += len(re.findall('(?u)!\[.*?\]\(.+?\)', txt)) + markdown_count += len(re.findall('(?u)!\[.*?\](\[|\()', txt)) # Links - markdown_count += len(re.findall('(?u)(^|(?P

[^!]))\[.*?\]\([^)]+\)', txt))
+    markdown_count += len(re.findall('(?u)^|[^!]\[.*?\](\[|\()', txt))
 
     # Check for textile
     # Headings
@@ -186,10 +240,12 @@ def detect_formatting_type(txt):
     # Block quote.
     textile_count += len(re.findall(r'(?mu)^bq\.', txt))
     # Images
-    textile_count += len(re.findall(r'\![^\s]+(?=.*?/)(:[^\s]+)*', txt))
+    textile_count += len(re.findall(r'(?mu)(?<=\!)\S+(?=\!)', txt))
     # Links
-    textile_count += len(re.findall(r'"(?=".*?\()(\(.+?\))*[^\(]+?(\(.+?\))*":[^\s]+', txt))
+    textile_count += len(re.findall(r'"[^"]*":\S+', txt))
 
+    # Decide if either markdown or textile is used in the text
+    # based on the number of unique formatting elements found.
     if markdown_count > 5 or textile_count > 5:
         if markdown_count > textile_count:
             return 'markdown'
diff --git a/src/calibre/ebooks/txt/textileml.py b/src/calibre/ebooks/txt/textileml.py
index 94834d8e79..d7e11695c5 100644
--- a/src/calibre/ebooks/txt/textileml.py
+++ b/src/calibre/ebooks/txt/textileml.py
@@ -36,11 +36,9 @@ class TextileMLizer(object):
             html = unicode(etree.tostring(item.data.find(XHTML('body')), encoding=unicode))
 
             if not self.opts.keep_links:
-                html = re.sub(r'<\s*a[^>]*>', '', html)
-                html = re.sub(r'<\s*/\s*a\s*>', '', html)
+                html = re.sub(r'<\s*/*\s*a[^>]*>', '', html)
             if not self.opts.keep_image_references:
                 html = re.sub(r'<\s*img[^>]*>', '', html)
-                html = re.sub(r'<\s*img\s*>', '', html)
 
             text = html2textile(html)
 
diff --git a/src/calibre/ebooks/txt/txtml.py b/src/calibre/ebooks/txt/txtml.py
index c2ee3f37c5..fa7bfbb380 100644
--- a/src/calibre/ebooks/txt/txtml.py
+++ b/src/calibre/ebooks/txt/txtml.py
@@ -55,6 +55,7 @@ class TXTMLizer(object):
         self.log.info('Converting XHTML to TXT...')
         self.oeb_book = oeb_book
         self.opts = opts
+        self.toc_titles = []
         self.toc_ids = []
         self.last_was_heading = False
         
@@ -94,8 +95,8 @@ class TXTMLizer(object):
         if getattr(self.opts, 'inline_toc', None):
             self.log.debug('Generating table of contents...')
             toc.append(u'%s\n\n' % _(u'Table of Contents:'))
-            for item in self.oeb_book.toc:
-                toc.append(u'* %s\n\n' % item.title)
+            for item in self.toc_titles:
+                toc.append(u'* %s\n\n' % item)
         return ''.join(toc)
 
     def create_flat_toc(self, nodes):
@@ -103,6 +104,7 @@ class TXTMLizer(object):
         Turns a hierarchical list of TOC href's into a flat list.
         '''
         for item in nodes:
+            self.toc_titles.append(item.title)
             self.toc_ids.append(item.href)
             self.create_flat_toc(item.nodes)
 
diff --git a/src/calibre/gui2/__init__.py b/src/calibre/gui2/__init__.py
index 9150172fc1..b33166dd33 100644
--- a/src/calibre/gui2/__init__.py
+++ b/src/calibre/gui2/__init__.py
@@ -50,6 +50,7 @@ gprefs.defaults['action-layout-context-menu-device'] = (
 
 gprefs.defaults['show_splash_screen'] = True
 gprefs.defaults['toolbar_icon_size'] = 'medium'
+gprefs.defaults['automerge'] = 'ignore'
 gprefs.defaults['toolbar_text'] = 'auto'
 gprefs.defaults['show_child_bar'] = False
 gprefs.defaults['font'] = None
@@ -105,9 +106,13 @@ def _config():
                 'clicked'))
     c.add_opt('asked_library_thing_password', default=False,
             help='Asked library thing password at least once.')
-    c.add_opt('search_as_you_type', default=True,
-            help='Start searching as you type. If this is disabled then search will '
-            'only take place when the Enter or Return key is pressed.')
+    c.add_opt('search_as_you_type', default=False,
+            help=_('Start searching as you type. If this is disabled then search will '
+            'only take place when the Enter or Return key is pressed.'))
+    c.add_opt('highlight_search_matches', default=False,
+            help=_('When searching, show all books with search results '
+            'highlighted instead of showing only the matches. You can use the '
+            'N or F3 keys to go to the next match.'))
     c.add_opt('save_to_disk_template_history', default=[],
         help='Previously used Save to Disk templates')
     c.add_opt('send_to_device_template_history', default=[],
diff --git a/src/calibre/gui2/actions/add.py b/src/calibre/gui2/actions/add.py
index 4236a63340..25127d3635 100644
--- a/src/calibre/gui2/actions/add.py
+++ b/src/calibre/gui2/actions/add.py
@@ -244,8 +244,8 @@ class AddAction(InterfaceAction):
                     x.decode(preferred_encoding, 'replace') for x in
                     self._adder.merged_books])
             info_dialog(self.gui, _('Merged some books'),
-                    _('Some duplicates were found and merged into the '
-                        'following existing books:'), det_msg=books, show=True)
+                    _('The following duplicate books were found and incoming book formats were '
+                        'processed and merged into your Calibre database according to your automerge settings:'), det_msg=books, show=True)
         if getattr(self._adder, 'critical', None):
             det_msg = []
             for name, log in self._adder.critical.items():
diff --git a/src/calibre/gui2/actions/next_match.py b/src/calibre/gui2/actions/next_match.py
index 79de6a2d9b..1c74719674 100644
--- a/src/calibre/gui2/actions/next_match.py
+++ b/src/calibre/gui2/actions/next_match.py
@@ -28,21 +28,12 @@ class NextMatchAction(InterfaceAction):
         self.gui.addAction(self.p_action)
         self.p_action.triggered.connect(self.move_backward)
 
-    def gui_layout_complete(self):
-        self.gui.search_highlight_only.setVisible(True)
-
     def location_selected(self, loc):
         self.can_move = loc == 'library'
-        try:
-            self.gui.search_highlight_only.setVisible(self.can_move)
-        except:
-            import traceback
-            traceback.print_exc()
 
     def move_forward(self):
         if self.can_move is None:
             self.can_move = self.gui.current_view() is self.gui.library_view
-            self.gui.search_highlight_only.setVisible(self.can_move)
 
         if self.can_move:
             self.gui.current_view().move_highlighted_row(forward=True)
@@ -50,7 +41,6 @@ class NextMatchAction(InterfaceAction):
     def move_backward(self):
         if self.can_move is None:
             self.can_move = self.gui.current_view() is self.gui.library_view
-            self.gui.search_highlight_only.setVisible(self.can_move)
 
         if self.can_move:
             self.gui.current_view().move_highlighted_row(forward=False)
diff --git a/src/calibre/gui2/actions/preferences.py b/src/calibre/gui2/actions/preferences.py
index f1fe06c43d..ee52f06aac 100644
--- a/src/calibre/gui2/actions/preferences.py
+++ b/src/calibre/gui2/actions/preferences.py
@@ -33,7 +33,8 @@ class PreferencesAction(InterfaceAction):
             x.triggered.connect(self.do_config)
 
 
-    def do_config(self, checked=False, initial_plugin=None):
+    def do_config(self, checked=False, initial_plugin=None,
+            close_after_initial=False):
         if self.gui.job_manager.has_jobs():
             d = error_dialog(self.gui, _('Cannot configure'),
                     _('Cannot configure while there are running jobs.'))
@@ -44,7 +45,8 @@ class PreferencesAction(InterfaceAction):
                     _('Cannot configure before calibre is restarted.'))
             d.exec_()
             return
-        d = Preferences(self.gui, initial_plugin=initial_plugin)
+        d = Preferences(self.gui, initial_plugin=initial_plugin,
+                close_after_initial=close_after_initial)
         d.show()
         d.run_wizard_requested.connect(self.gui.run_wizard,
                 type=Qt.QueuedConnection)
diff --git a/src/calibre/gui2/add.py b/src/calibre/gui2/add.py
index 026fabea07..f40cf0ff75 100644
--- a/src/calibre/gui2/add.py
+++ b/src/calibre/gui2/add.py
@@ -8,7 +8,7 @@ from functools import partial
 from PyQt4.Qt import QThread, QObject, Qt, QProgressDialog, pyqtSignal, QTimer
 
 from calibre.gui2.dialogs.progress import ProgressDialog
-from calibre.gui2 import question_dialog, error_dialog, info_dialog
+from calibre.gui2 import question_dialog, error_dialog, info_dialog, gprefs
 from calibre.ebooks.metadata.opf2 import OPF
 from calibre.ebooks.metadata import MetaInformation
 from calibre.constants import preferred_encoding, filesystem_encoding, DEBUG
@@ -179,23 +179,47 @@ class DBAdder(QObject): # {{{
                     cover = f.read()
             orig_formats = formats
             formats = [f for f in formats if not f.lower().endswith('.opf')]
-            if prefs['add_formats_to_existing']:
+            if prefs['add_formats_to_existing']: #automerge is on
                 identical_book_list = self.db.find_identical_books(mi)
-
-                if identical_book_list: # books with same author and nearly same title exist in db
+                if identical_book_list:  # books with same author and nearly same title exist in db
                     self.merged_books.add(mi.title)
+                    seen_fmts = set([])
+
                     for identical_book in identical_book_list:
-                        self.add_formats(identical_book, formats, replace=False)
+                        ib_fmts = self.db.formats(identical_book, index_is_id=True)
+                        if ib_fmts:
+                            seen_fmts |= set(ib_fmts.split(','))
+                        replace = gprefs['automerge'] == 'overwrite'
+                        self.add_formats(identical_book, formats,
+                                replace=replace)
+                    if gprefs['automerge'] == 'new record':
+                        incoming_fmts = \
+                            set([os.path.splitext(path)[-1].replace('.',
+                                '').upper() for path in formats])
+                        if incoming_fmts.intersection(seen_fmts):
+                            # There was at least one duplicate format
+                            # so create a new record and put the
+                            # incoming formats into it
+                            # We should arguably put only the duplicate
+                            # formats, but no real harm is done by having
+                            # all formats
+                            id_ = self.db.create_book_entry(mi, cover=cover,
+                                    add_duplicates=True)
+                            self.number_of_books_added += 1
+                            self.add_formats(id_, formats)
+
                 else:
-                    id = self.db.create_book_entry(mi, cover=cover, add_duplicates=True)
+                    # books with same author and nearly same title do not exist in db
+                    id_ = self.db.create_book_entry(mi, cover=cover, add_duplicates=True)
                     self.number_of_books_added += 1
-                    self.add_formats(id, formats)
-            else:
-                id = self.db.create_book_entry(mi, cover=cover, add_duplicates=False)
-                if id is None:
+                    self.add_formats(id_, formats)
+
+            else: #automerge is off
+                id_ = self.db.create_book_entry(mi, cover=cover, add_duplicates=False)
+                if id_ is None:
                     self.duplicates.append((mi, cover, orig_formats))
                 else:
-                    self.add_formats(id, formats)
+                    self.add_formats(id_, formats)
                     self.number_of_books_added += 1
         else:
             self.names.append(name)
diff --git a/src/calibre/gui2/complete.py b/src/calibre/gui2/complete.py
index a013065690..58020f924a 100644
--- a/src/calibre/gui2/complete.py
+++ b/src/calibre/gui2/complete.py
@@ -6,157 +6,38 @@ __copyright__ = '2011, Kovid Goyal '
 __docformat__ = 'restructuredtext en'
 
 
-from PyQt4.Qt import QLineEdit, QListView, QAbstractListModel, Qt, QTimer, \
-        QApplication, QPoint, QItemDelegate, QStyleOptionViewItem, \
-        QStyle, QEvent, pyqtSignal
+from PyQt4.Qt import QLineEdit, QAbstractListModel, Qt, \
+        QApplication, QCompleter
 
 from calibre.utils.config import tweaks
 from calibre.utils.icu import sort_key, lower
 from calibre.gui2 import NONE
 from calibre.gui2.widgets import EnComboBox
 
-class CompleterItemDelegate(QItemDelegate): # {{{
-
-    ''' Renders the current item as thought it were selected '''
-
-    def __init__(self, view):
-        self.view = view
-        QItemDelegate.__init__(self, view)
-
-    def paint(self, p, opt, idx):
-        opt = QStyleOptionViewItem(opt)
-        opt.showDecorationSelected = True
-        if self.view.currentIndex() == idx:
-            opt.state |= QStyle.State_HasFocus
-        QItemDelegate.paint(self, p, opt, idx)
-
-# }}}
-
-class CompleteWindow(QListView): # {{{
-
-    '''
-    The completion popup. For keyboard and mouse handling see
-    :meth:`eventFilter`.
-    '''
-
-    #: This signal is emitted when the user selects one of the listed
-    #: completions, by mouse or keyboard
-    completion_selected = pyqtSignal(object)
-
-    def __init__(self, widget, model):
-        self.widget = widget
-        QListView.__init__(self)
-        self.setVisible(False)
-        self.setParent(None, Qt.Popup)
-        self.setAlternatingRowColors(True)
-        self.setFocusPolicy(Qt.NoFocus)
-        self._d = CompleterItemDelegate(self)
-        self.setItemDelegate(self._d)
-        self.setModel(model)
-        self.setFocusProxy(widget)
-        self.installEventFilter(self)
-        self.clicked.connect(self.do_selected)
-        self.entered.connect(self.do_entered)
-        self.setMouseTracking(True)
-
-    def do_entered(self, idx):
-        if idx.isValid():
-            self.setCurrentIndex(idx)
-
-    def do_selected(self, idx=None):
-        idx = self.currentIndex() if idx is None else idx
-        if idx.isValid():
-            data = unicode(self.model().data(idx, Qt.DisplayRole))
-            self.completion_selected.emit(data)
-        self.hide()
-
-    def eventFilter(self, o, e):
-        if o is not self:
-            return False
-        if e.type() == e.KeyPress:
-            key = e.key()
-            if key in (Qt.Key_Escape, Qt.Key_Backtab) or \
-                    (key == Qt.Key_F4 and (e.modifiers() & Qt.AltModifier)):
-                self.hide()
-                return True
-            elif key in (Qt.Key_Enter, Qt.Key_Return, Qt.Key_Tab):
-                if key == Qt.Key_Tab and not self.currentIndex().isValid():
-                    if self.model().rowCount() > 0:
-                        self.setCurrentIndex(self.model().index(0))
-                self.do_selected()
-                return True
-            elif key in (Qt.Key_Up, Qt.Key_Down, Qt.Key_PageUp,
-                    Qt.Key_PageDown):
-                return False
-            # Send key event to associated line edit
-            self.widget.eat_focus_out = False
-            try:
-                self.widget.event(e)
-            finally:
-                self.widget.eat_focus_out = True
-            if not self.widget.hasFocus():
-                # Line edit lost focus
-                self.hide()
-            if e.isAccepted():
-                # Line edit consumed event
-                return True
-        elif e.type() == e.MouseButtonPress:
-            # Hide popup if user clicks outside it, otherwise
-            # pass event to popup
-            if not self.underMouse():
-                self.hide()
-                return True
-        elif e.type() in (e.InputMethod, e.ShortcutOverride):
-            QApplication.sendEvent(self.widget, e)
-
-        return False # Do not filter this event
-
-# }}}
-
 class CompleteModel(QAbstractListModel):
 
     def __init__(self, parent=None):
         QAbstractListModel.__init__(self, parent)
-        self.sep = ','
-        self.space_before_sep = False
         self.items = []
-        self.lowered_items = []
-        self.matches = []
 
     def set_items(self, items):
         items = [unicode(x.strip()) for x in items]
         self.items = list(sorted(items, key=lambda x: sort_key(x)))
         self.lowered_items = [lower(x) for x in self.items]
-        self.matches = []
         self.reset()
 
     def rowCount(self, *args):
-        return len(self.matches)
+        return len(self.items)
 
     def data(self, index, role):
         if role == Qt.DisplayRole:
             r = index.row()
             try:
-                return self.matches[r]
+                return self.items[r]
             except IndexError:
                 pass
         return NONE
 
-    def get_matches(self, prefix):
-        '''
-        Return all matches that (case insensitively) start with prefix
-        '''
-        prefix = lower(prefix)
-        ans = []
-        if prefix:
-            for i, test in enumerate(self.lowered_items):
-                if test.startswith(prefix):
-                    ans.append(self.items[i])
-        return ans
-
-    def update_matches(self, matches):
-        self.matches = matches
-        self.reset()
 
 class MultiCompleteLineEdit(QLineEdit):
     '''
@@ -170,16 +51,26 @@ class MultiCompleteLineEdit(QLineEdit):
     '''
 
     def __init__(self, parent=None):
-        self.eat_focus_out = True
-        self.max_visible_items = 7
-        self.current_prefix = None
         QLineEdit.__init__(self, parent)
 
+        self.sep = ','
+        self.space_before_sep = False
+
         self._model = CompleteModel(parent=self)
-        self.complete_window = CompleteWindow(self, self._model)
+        self._completer = c = QCompleter(self._model, self)
+        c.setWidget(self)
+        c.setCompletionMode(QCompleter.PopupCompletion)
+        c.setCaseSensitivity(Qt.CaseInsensitive)
+        c.setModelSorting(QCompleter.CaseInsensitivelySortedModel)
+        c.setCompletionRole(Qt.DisplayRole)
+        p = c.popup()
+        p.setMouseTracking(True)
+        p.entered.connect(self.item_entered)
+        c.popup().setAlternatingRowColors(True)
+
+        c.activated.connect(self.completion_selected,
+                type=Qt.QueuedConnection)
         self.textEdited.connect(self.text_edited)
-        self.complete_window.completion_selected.connect(self.completion_selected)
-        self.installEventFilter(self)
 
     # Interface {{{
     def update_items_cache(self, complete_items):
@@ -193,33 +84,23 @@ class MultiCompleteLineEdit(QLineEdit):
 
     # }}}
 
-    def eventFilter(self, o, e):
-        if self.eat_focus_out and o is self and e.type() == QEvent.FocusOut:
-            if self.complete_window.isVisible():
-                return True # Filter this event since the cw is visible
-        return QLineEdit.eventFilter(self, o, e)
-
-    def hide_completion_window(self):
-        self.complete_window.hide()
-
+    def item_entered(self, idx):
+        self._completer.popup().setCurrentIndex(idx)
 
     def text_edited(self, *args):
         self.update_completions()
+        self._completer.complete()
 
     def update_completions(self):
         ' Update the list of completions '
-        if not self.complete_window.isVisible() and not self.hasFocus():
-            return
         cpos = self.cursorPosition()
         text = unicode(self.text())
         prefix = text[:cpos]
         self.current_prefix = prefix
         complete_prefix = prefix.lstrip()
         if self.sep:
-            complete_prefix = prefix = prefix.split(self.sep)[-1].lstrip()
-
-        matches = self._model.get_matches(complete_prefix)
-        self.update_complete_window(matches)
+            complete_prefix = prefix.split(self.sep)[-1].lstrip()
+        self._completer.setCompletionPrefix(complete_prefix)
 
     def get_completed_text(self, text):
         '''
@@ -242,11 +123,11 @@ class MultiCompleteLineEdit(QLineEdit):
             else:
                 prefix_len = len(before_text.split(self.sep)[-1].lstrip())
                 completed_text = before_text[:cursor_pos - prefix_len] + text + after_text
-            return prefix_len, completed_text 
-                
+            return prefix_len, completed_text
+
 
     def completion_selected(self, text):
-        prefix_len, ctext = self.get_completed_text(text)
+        prefix_len, ctext = self.get_completed_text(unicode(text))
         if self.sep is None:
             self.setText(ctext)
             self.setCursorPosition(len(ctext))
@@ -255,60 +136,6 @@ class MultiCompleteLineEdit(QLineEdit):
             self.setText(ctext)
             self.setCursorPosition(cursor_pos - prefix_len + len(text))
 
-    def update_complete_window(self, matches):
-        self._model.update_matches(matches)
-        if matches:
-            self.show_complete_window()
-        else:
-            self.complete_window.hide()
-
-
-    def position_complete_window(self):
-        popup = self.complete_window
-        screen = QApplication.desktop().availableGeometry(self)
-        h = (popup.sizeHintForRow(0) * min(self.max_visible_items,
-            popup.model().rowCount()) + 3) + 3
-        hsb = popup.horizontalScrollBar()
-        if hsb and hsb.isVisible():
-            h += hsb.sizeHint().height()
-
-        rh = self.height()
-        pos = self.mapToGlobal(QPoint(0, self.height() - 2))
-        w = self.width()
-
-        if w > screen.width():
-            w = screen.width()
-        if (pos.x() + w) > (screen.x() + screen.width()):
-            pos.setX(screen.x() + screen.width() - w)
-        if (pos.x() < screen.x()):
-            pos.setX(screen.x())
-
-        top = pos.y() - rh - screen.top() + 2
-        bottom = screen.bottom() - pos.y()
-        h = max(h, popup.minimumHeight())
-        if h > bottom:
-            h = min(max(top, bottom), h)
-            if top > bottom:
-                pos.setY(pos.y() - h - rh + 2)
-
-        popup.setGeometry(pos.x(), pos.y(), w, h)
-
-
-    def show_complete_window(self):
-        self.position_complete_window()
-        self.complete_window.show()
-
-    def moveEvent(self, ev):
-        ret = QLineEdit.moveEvent(self, ev)
-        QTimer.singleShot(0, self.position_complete_window)
-        return ret
-
-    def resizeEvent(self, ev):
-        ret = QLineEdit.resizeEvent(self, ev)
-        QTimer.singleShot(0, self.position_complete_window)
-        return ret
-
-
     @dynamic_property
     def all_items(self):
         def fget(self):
@@ -317,22 +144,6 @@ class MultiCompleteLineEdit(QLineEdit):
             self._model.set_items(items)
         return property(fget=fget, fset=fset)
 
-    @dynamic_property
-    def sep(self):
-        def fget(self):
-            return self._model.sep
-        def fset(self, val):
-            self._model.sep = val
-        return property(fget=fget, fset=fset)
-
-    @dynamic_property
-    def space_before_sep(self):
-        def fget(self):
-            return self._model.space_before_sep
-        def fset(self, val):
-            self._model.space_before_sep = val
-        return property(fget=fget, fset=fset)
-
 class MultiCompleteComboBox(EnComboBox):
 
     def __init__(self, *args):
diff --git a/src/calibre/gui2/convert/txt_input.py b/src/calibre/gui2/convert/txt_input.py
index 62672cc0f9..acdf5f43c0 100644
--- a/src/calibre/gui2/convert/txt_input.py
+++ b/src/calibre/gui2/convert/txt_input.py
@@ -16,7 +16,8 @@ class PluginWidget(Widget, Ui_Form):
 
     def __init__(self, parent, get_option, get_help, db=None, book_id=None):
         Widget.__init__(self, parent,
-            ['paragraph_type', 'formatting_type', 'markdown_disable_toc', 'preserve_spaces'])
+            ['paragraph_type', 'formatting_type', 'markdown_disable_toc',
+             'preserve_spaces', 'txt_in_remove_indents'])
         self.db, self.book_id = db, book_id
         for x in get_option('paragraph_type').option.choices:
             self.opt_paragraph_type.addItem(x)
diff --git a/src/calibre/gui2/convert/txt_input.ui b/src/calibre/gui2/convert/txt_input.ui
index 6cbd68135f..211b03294a 100644
--- a/src/calibre/gui2/convert/txt_input.ui
+++ b/src/calibre/gui2/convert/txt_input.ui
@@ -7,57 +7,95 @@
     0
     0
     518
-    300
+    353
    
   
   
    Form
   
-  
-   
-    
-     
-      Paragraph style:
+  
+   
+    
+     
+      Structure
      
+     
+      
+       
+        
+         
+          0
+          0
+         
+        
+        
+         Paragraph style:
+        
+       
+      
+      
+       
+        
+         
+          0
+          0
+         
+        
+       
+      
+      
+       
+        
+         
+          0
+          0
+         
+        
+        
+         Formatting style:
+        
+       
+      
+      
+       
+        
+         
+          0
+          0
+         
+        
+       
+      
+     
     
    
-   
-    
-   
-   
-    
-     
-      Preserve &spaces
+   
+    
+     
+      Common
      
+     
+      
+       
+        
+         Preserve &spaces
+        
+       
+      
+      
+       
+        
+         Remove indents at the beginning of lines
+        
+       
+      
+     
     
    
-   
-    
-     
-      Qt::Vertical
-     
-     
-      
-       20
-       213
-      
-     
-    
-   
-   
-    
-   
-   
-    
-     
-      Formatting style:
-     
-    
-   
-   
+   
     
      
-      Markdown Options
+      Markdown
      
      
       
@@ -83,6 +121,19 @@
      
     
    
+   
+    
+     
+      Qt::Vertical
+     
+     
+      
+       20
+       213
+      
+     
+    
+   
   
  
  
diff --git a/src/calibre/gui2/dialogs/message_box.py b/src/calibre/gui2/dialogs/message_box.py
index 9d586ce28d..945d50de4e 100644
--- a/src/calibre/gui2/dialogs/message_box.py
+++ b/src/calibre/gui2/dialogs/message_box.py
@@ -89,7 +89,8 @@ class MessageBox(QDialog, Ui_Dialog):
                 (__version__, unicode(self.windowTitle()),
                     unicode(self.msg.text()),
                     unicode(self.det_msg.toPlainText())))
-        self.ctc_button.setText(_('Copied'))
+        if hasattr(self, 'ctc_button'):
+            self.ctc_button.setText(_('Copied'))
 
     def showEvent(self, ev):
         ret = QDialog.showEvent(self, ev)
diff --git a/src/calibre/gui2/dialogs/metadata_single.py b/src/calibre/gui2/dialogs/metadata_single.py
index 3e711edd2d..52d263fe36 100644
--- a/src/calibre/gui2/dialogs/metadata_single.py
+++ b/src/calibre/gui2/dialogs/metadata_single.py
@@ -616,6 +616,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
         self.original_series_name = unicode(self.series.text()).strip()
         if len(db.custom_column_label_map) == 0:
             self.central_widget.tabBar().setVisible(False)
+            self.central_widget.setTabEnabled(1, False)
         else:
             self.create_custom_column_editors()
         self.generate_cover_button.clicked.connect(self.generate_cover)
@@ -780,8 +781,8 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
                     _('You have changed the tags. In order to use the tags'
                        ' editor, you must either discard or apply these '
                        'changes. Apply changes?'), show_copy_button=False):
-                self.books_to_refresh |= self.apply_tags(commit=True, notify=True,
-                                                         allow_case_change=True)
+                self.books_to_refresh |= self.apply_tags(commit=True,
+                        notify=True)
                 self.original_tags = unicode(self.tags.text())
             else:
                 self.tags.setText(self.original_tags)
diff --git a/src/calibre/gui2/dialogs/tag_list_editor.py b/src/calibre/gui2/dialogs/tag_list_editor.py
index ced0e9a505..9694a9a459 100644
--- a/src/calibre/gui2/dialogs/tag_list_editor.py
+++ b/src/calibre/gui2/dialogs/tag_list_editor.py
@@ -1,7 +1,7 @@
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal '
 
-from PyQt4.QtCore import SIGNAL, Qt
+from PyQt4.QtCore import Qt, QString
 from PyQt4.QtGui import QDialog, QListWidgetItem
 
 from calibre.gui2.dialogs.tag_list_editor_ui import Ui_TagListEditor
@@ -11,30 +11,38 @@ class ListWidgetItem(QListWidgetItem):
 
     def __init__(self, txt):
         QListWidgetItem.__init__(self, txt)
-        self.old_value = txt
-        self.cur_value = txt
+        self.initial_value = QString(txt)
+        self.current_value = QString(txt)
+        self.previous_value = QString(txt)
 
     def data(self, role):
         if role == Qt.DisplayRole:
-            if self.old_value != self.cur_value:
-                return _('%s (was %s)')%(self.cur_value, self.old_value)
+            if self.initial_value != self.current_value:
+                return _('%s (was %s)')%(self.current_value, self.initial_value)
             else:
-                return self.cur_value
+                return self.current_value
         elif role == Qt.EditRole:
-            return self.cur_value
+            return self.current_value
         else:
             return QListWidgetItem.data(self, role)
 
     def setData(self, role, data):
         if role == Qt.EditRole:
-            self.cur_value = data.toString()
+            self.previous_value = self.current_value
+            self.current_value = data.toString()
         QListWidgetItem.setData(self, role, data)
 
     def text(self):
-        return self.cur_value
+        return self.current_value
+
+    def initial_text(self):
+        return self.initial_value
+
+    def previous_text(self):
+        return self.previous_value
 
     def setText(self, txt):
-        self.cur_value = txt
+        self.current_value = txt
         QListWidgetItem.setText(txt)
 
 class TagListEditor(QDialog, Ui_TagListEditor):
@@ -49,7 +57,7 @@ class TagListEditor(QDialog, Ui_TagListEditor):
         self.setWindowIcon(icon)
 
         self.to_rename = {}
-        self.to_delete = []
+        self.to_delete = set([])
         self.all_tags = {}
 
         for k,v in data:
@@ -57,6 +65,7 @@ class TagListEditor(QDialog, Ui_TagListEditor):
         for tag in sorted(self.all_tags.keys(), key=key):
             item = ListWidgetItem(tag)
             item.setData(Qt.UserRole, self.all_tags[tag])
+            item.setFlags (item.flags() | Qt.ItemIsEditable)
             self.available_tags.addItem(item)
 
         if tag_to_match is not None:
@@ -64,23 +73,20 @@ class TagListEditor(QDialog, Ui_TagListEditor):
             if len(items) == 1:
                 self.available_tags.setCurrentItem(items[0])
 
-        self.connect(self.delete_button,  SIGNAL('clicked()'), self.delete_tags)
-        self.connect(self.rename_button,  SIGNAL('clicked()'), self.rename_tag)
-        self.connect(self.available_tags, SIGNAL('itemDoubleClicked(QListWidgetItem *)'), self._rename_tag)
-        self.connect(self.available_tags, SIGNAL('itemChanged(QListWidgetItem *)'), self.finish_editing)
+        self.delete_button.clicked.connect(self.delete_tags)
+        self.rename_button.clicked.connect(self.rename_tag)
+        self.available_tags.itemDoubleClicked.connect(self._rename_tag)
+        self.available_tags.itemChanged.connect(self.finish_editing)
 
     def finish_editing(self, item):
         if not item.text():
                 error_dialog(self, _('Item is blank'),
                              _('An item cannot be set to nothing. Delete it instead.')).exec_()
-                item.setText(self.item_before_editing.text())
+                item.setText(item.previous_text())
                 return
-        if item.text() != self.item_before_editing.text():
-            (id,ign) = self.item_before_editing.data(Qt.UserRole).toInt()
-            if item.text() not in self.to_rename:
-                self.to_rename[item.text()] = [id]
-            else:
-                self.to_rename[item.text()].append(id)
+        if item.text() != item.initial_text():
+            id_ = item.data(Qt.UserRole).toInt()[0]
+            self.to_rename[id_] = unicode(item.text())
 
     def rename_tag(self):
         item = self.available_tags.currentItem()
@@ -91,8 +97,6 @@ class TagListEditor(QDialog, Ui_TagListEditor):
             error_dialog(self, _('No item selected'),
                          _('You must select one item from the list of Available items.')).exec_()
             return
-        self.item_before_editing = item.clone()
-        item.setFlags (item.flags() | Qt.ItemIsEditable);
         self.available_tags.editItem(item)
 
     def delete_tags(self, item=None):
@@ -108,7 +112,7 @@ class TagListEditor(QDialog, Ui_TagListEditor):
         row = self.available_tags.row(deletes[0])
         for item in deletes:
             (id,ign) = item.data(Qt.UserRole).toInt()
-            self.to_delete.append(id)
+            self.to_delete.add(id)
             self.available_tags.takeItem(self.available_tags.row(item))
 
         if row >= self.available_tags.count():
diff --git a/src/calibre/gui2/init.py b/src/calibre/gui2/init.py
index ebd670c8fa..0ca58582b6 100644
--- a/src/calibre/gui2/init.py
+++ b/src/calibre/gui2/init.py
@@ -64,6 +64,7 @@ class LibraryViewMixin(object): # {{{
             view.verticalHeader().sectionDoubleClicked.connect(self.iactions['View'].view_specific_book)
 
         self.build_context_menus()
+        self.library_view.model().set_highlight_only(config['highlight_search_matches'])
 
     def build_context_menus(self):
         lm = QMenu(self)
diff --git a/src/calibre/gui2/layout.py b/src/calibre/gui2/layout.py
index c1d9498075..e8a4e79384 100644
--- a/src/calibre/gui2/layout.py
+++ b/src/calibre/gui2/layout.py
@@ -8,7 +8,7 @@ __docformat__ = 'restructuredtext en'
 from functools import partial
 
 from PyQt4.Qt import QIcon, Qt, QWidget, QToolBar, QSize, \
-    pyqtSignal, QToolButton, QMenu, QCheckBox, \
+    pyqtSignal, QToolButton, QMenu, \
     QObject, QVBoxLayout, QSizePolicy, QLabel, QHBoxLayout, QActionGroup
 
 
@@ -156,7 +156,8 @@ class SearchBar(QWidget): # {{{
         x = ComboBoxWithHelp(self)
         x.setMaximumSize(QSize(150, 16777215))
         x.setObjectName("search_restriction")
-        x.setToolTip(_("Books display will be restricted to those matching the selected saved search"))
+        x.setToolTip(_('Books display will be restricted to those matching the '
+                       'selected saved search'))
         l.addWidget(x)
         parent.search_restriction = x
 
@@ -175,7 +176,8 @@ class SearchBar(QWidget): # {{{
         x = parent.search = SearchBox2(self)
         x.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Minimum)
         x.setObjectName("search")
-        x.setToolTip(_("

Search the list of books by title, author, publisher, tags, comments, etc.

Words separated by spaces are ANDed")) + x.setToolTip(_("

Search the list of books by title, author, publisher, " + "tags, comments, etc.

Words separated by spaces are ANDed")) l.addWidget(x) self.search_button = QToolButton() @@ -194,13 +196,11 @@ class SearchBar(QWidget): # {{{ l.addWidget(x) x.setToolTip(_("Reset Quick Search")) - x = parent.search_highlight_only = QCheckBox() - x.setText(_('&Highlight')) - x.setToolTip('

'+_('When searching, highlight matched books, instead ' - 'of restricting the book list to the matches.

You can use the ' - 'N or F3 keys to go to the next match.')) + x = parent.search_options_button = QToolButton(self) + x.setIcon(QIcon(I('config.png'))) + x.setObjectName("search_option_button") l.addWidget(x) - x.setVisible(False) + x.setToolTip(_("Change the way searching for books works")) x = parent.saved_search = SavedSearchBox(self) x.setMaximumSize(QSize(150, 16777215)) @@ -227,7 +227,6 @@ class SearchBar(QWidget): # {{{ x.setToolTip(_("Delete current saved search")) - # }}} class Spacer(QWidget): # {{{ diff --git a/src/calibre/gui2/library/models.py b/src/calibre/gui2/library/models.py index 2f8a747c39..48668d3376 100644 --- a/src/calibre/gui2/library/models.py +++ b/src/calibre/gui2/library/models.py @@ -238,8 +238,6 @@ class BooksModel(QAbstractTableModel): # {{{ def set_highlight_only(self, toWhat): self.highlight_only = toWhat - if self.last_search: - self.research() def get_current_highlighted_id(self): if len(self.ids_to_highlight) == 0 or self.current_highlighted_idx is None: diff --git a/src/calibre/gui2/metadata/bulk_download.py b/src/calibre/gui2/metadata/bulk_download.py index 19859ed3ec..461f56b60c 100644 --- a/src/calibre/gui2/metadata/bulk_download.py +++ b/src/calibre/gui2/metadata/bulk_download.py @@ -11,7 +11,7 @@ from threading import Thread from Queue import Queue, Empty from functools import partial -from PyQt4.Qt import QObject, Qt, pyqtSignal, QTimer, QDialog, \ +from PyQt4.Qt import QObject, QTimer, QDialog, \ QVBoxLayout, QTextBrowser, QLabel, QGroupBox, QDialogButtonBox from calibre.ebooks.metadata.fetch import search, get_social_metadata @@ -163,27 +163,23 @@ class DownloadMetadata(Thread): class DoDownload(QObject): - idle_process = pyqtSignal() - def __init__(self, parent, title, db, ids, get_covers, set_metadata=True, get_social_metadata=True): QObject.__init__(self, parent) self.pd = ProgressDialog(title, min=0, max=0, parent=parent) self.pd.canceled_signal.connect(self.cancel) - self.idle_process.connect(self.do_one, type=Qt.QueuedConnection) self.downloader = None self.create = partial(DownloadMetadata, db, ids, get_covers, set_metadata=set_metadata, get_social_metadata=get_social_metadata) - self.timer = QTimer(self) self.get_covers = get_covers - self.timer.timeout.connect(self.do_one, type=Qt.QueuedConnection) self.db = db self.updated = set([]) self.total = len(ids) + self.keep_going = True def exec_(self): - self.timer.start(50) + QTimer.singleShot(50, self.do_one) ret = self.pd.exec_() if getattr(self.downloader, 'exception', None) is not None and \ ret == self.pd.Accepted: @@ -194,30 +190,37 @@ class DoDownload(QObject): return ret def cancel(self, *args): - self.timer.stop() + self.keep_going = False self.downloader.keep_going = False self.pd.reject() def do_one(self): - if self.downloader is None: - self.downloader = self.create() - self.downloader.start() - self.pd.set_min(0) - self.pd.set_max(self.downloader.total) try: - r = self.downloader.results.get_nowait() - self.handle_result(r) - except Empty: - pass - if not self.downloader.is_alive(): - self.timer.stop() - while True: - try: - r = self.downloader.results.get_nowait() - self.handle_result(r) - except Empty: - break - self.pd.accept() + if not self.keep_going: + return + if self.downloader is None: + self.downloader = self.create() + self.downloader.start() + self.pd.set_min(0) + self.pd.set_max(self.downloader.total) + try: + r = self.downloader.results.get_nowait() + self.handle_result(r) + except Empty: + pass + if not self.downloader.is_alive(): + while True: + try: + r = self.downloader.results.get_nowait() + self.handle_result(r) + except Empty: + break + self.pd.accept() + return + except: + self.cancel() + raise + QTimer.singleShot(50, self.do_one) def handle_result(self, r): id_, typ, ok, title = r diff --git a/src/calibre/gui2/metadata/single.py b/src/calibre/gui2/metadata/single.py index 1be954155c..0fa5c746e7 100644 --- a/src/calibre/gui2/metadata/single.py +++ b/src/calibre/gui2/metadata/single.py @@ -197,7 +197,7 @@ class MetadataSingleDialogBase(ResizableDialog): self.books_to_refresh = set([]) for widget in self.basic_metadata_widgets: widget.initialize(self.db, id_) - for widget in self.custom_metadata_widgets: + for widget in getattr(self, 'custom_metadata_widgets', []): widget.initialize(id_) # Commented out as it doesn't play nice with Next, Prev buttons #self.fetch_metadata_button.setFocus(Qt.OtherFocusReason) diff --git a/src/calibre/gui2/preferences/adding.py b/src/calibre/gui2/preferences/adding.py index e919d53b64..b4c4ce846a 100644 --- a/src/calibre/gui2/preferences/adding.py +++ b/src/calibre/gui2/preferences/adding.py @@ -12,6 +12,7 @@ from calibre.gui2.preferences import ConfigWidgetBase, test_widget, \ from calibre.gui2.preferences.adding_ui import Ui_Form from calibre.utils.config import prefs from calibre.gui2.widgets import FilenamePattern +from calibre.gui2 import gprefs class ConfigWidget(ConfigWidgetBase, Ui_Form): @@ -23,18 +24,23 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form): r('read_file_metadata', prefs) r('swap_author_names', prefs) r('add_formats_to_existing', prefs) + choices = [ + (_('Ignore duplicate incoming formats'), 'ignore'), + (_('Overwrite existing duplicate formats'), 'overwrite'), + (_('Create new record for each duplicate format'), 'new record')] + r('automerge', gprefs, choices=choices) r('new_book_tags', prefs, setting=CommaSeparatedList) self.filename_pattern = FilenamePattern(self) self.metadata_box.layout().insertWidget(0, self.filename_pattern) self.filename_pattern.changed_signal.connect(self.changed_signal.emit) - def initialize(self): ConfigWidgetBase.initialize(self) self.filename_pattern.blockSignals(True) self.filename_pattern.initialize() self.filename_pattern.blockSignals(False) + self.opt_automerge.setEnabled(self.opt_add_formats_to_existing.isChecked()) def restore_defaults(self): ConfigWidgetBase.restore_defaults(self) diff --git a/src/calibre/gui2/preferences/adding.ui b/src/calibre/gui2/preferences/adding.ui index 75e6c466f0..f9a2c74444 100644 --- a/src/calibre/gui2/preferences/adding.ui +++ b/src/calibre/gui2/preferences/adding.ui @@ -6,7 +6,7 @@ 0 0 - 750 + 753 339 @@ -58,16 +58,33 @@ - + - If an existing book with a similar title and author is found that does not have the format being added, the format is added -to the existing book, instead of creating a new entry. If the existing book already has the format, then it is silently ignored. + Automerge: If books with similar titles and authors found, merge the incoming formats automatically into +existing book records. The box to the right controls what happens when an existing record already has +the incoming format. Note that this option also affects the Copy to library action. Title match ignores leading indefinite articles ("the", "a", "an"), punctuation, case, etc. Author match is exact. - If books with similar titles and authors found, &merge the new files automatically + &Automerge added books if they already exist in the calibre library: + + + + + + + Automerge: If books with similar titles and authors found, merge the incoming formats automatically into +existing book records. This box controls what happens when an existing record already has +the incoming format: + +Ignore duplicate incoming files - means that existing files in your calibre library will not be replaced +Overwrite existing duplicate files - means that existing files in your calibre library will be replaced +Create new record for each duplicate file - means that a new book entry will be created for each duplicate file + +Title matching ignores leading indefinite articles ("the", "a", "an"), punctuation, case, etc. +Author matching is exact. @@ -113,5 +130,22 @@ Title match ignores leading indefinite articles ("the", "a", - + + + opt_add_formats_to_existing + toggled(bool) + opt_automerge + setEnabled(bool) + + + 406 + 83 + + + 457 + 83 + + + + diff --git a/src/calibre/gui2/preferences/look_feel.py b/src/calibre/gui2/preferences/look_feel.py index 37ed90cc61..196ef16b08 100644 --- a/src/calibre/gui2/preferences/look_feel.py +++ b/src/calibre/gui2/preferences/look_feel.py @@ -46,7 +46,6 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form): r('disable_tray_notification', config) r('use_roman_numerals_for_series_number', config) r('separate_cover_flow', config, restart_required=True) - r('search_as_you_type', config) r('show_child_bar', gprefs) choices = [(_('Small'), 'small'), (_('Medium'), 'medium'), @@ -116,7 +115,6 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form): def refresh_gui(self, gui): - gui.search.search_as_you_type(config['search_as_you_type']) self.update_font_display() gui.tags_view.reread_collapse_parameters() diff --git a/src/calibre/gui2/preferences/look_feel.ui b/src/calibre/gui2/preferences/look_feel.ui index 2223167068..3f2bb3e145 100644 --- a/src/calibre/gui2/preferences/look_feel.ui +++ b/src/calibre/gui2/preferences/look_feel.ui @@ -124,23 +124,13 @@ - + Show cover &browser in a separate window (needs restart) - - - - Search as you type - - - true - - - @@ -177,7 +167,7 @@ if you never want subcategories - If a Tag Browser category has more than this number of items, it is divided + If a Tag Browser category has more than this number of items, it is divided up into sub-categories. If the partition method is set to disable, this value is ignored. diff --git a/src/calibre/gui2/preferences/main.py b/src/calibre/gui2/preferences/main.py index f7d49427c8..f25cc85dce 100644 --- a/src/calibre/gui2/preferences/main.py +++ b/src/calibre/gui2/preferences/main.py @@ -157,11 +157,12 @@ class Preferences(QMainWindow): run_wizard_requested = pyqtSignal() - def __init__(self, gui, initial_plugin=None): + def __init__(self, gui, initial_plugin=None, close_after_initial=False): QMainWindow.__init__(self, gui) self.gui = gui self.must_restart = False self.committed = False + self.close_after_initial = close_after_initial self.resize(900, 720) nh, nw = min_available_height()-25, available_width()-10 @@ -306,7 +307,7 @@ class Preferences(QMainWindow): def esc(self, *args): if self.stack.currentIndex() == 1: - self.hide_plugin() + self.cancel() elif self.stack.currentIndex() == 0: self.close() @@ -331,12 +332,15 @@ class Preferences(QMainWindow): show_copy_button=False) self.showing_widget.refresh_gui(self.gui) self.hide_plugin() - if must_restart and rc: + if self.close_after_initial or (must_restart and rc): self.close() def cancel(self, *args): - self.hide_plugin() + if self.close_after_initial: + self.close() + else: + self.hide_plugin() def restore_defaults(self, *args): self.showing_widget.restore_defaults() diff --git a/src/calibre/gui2/preferences/plugins.py b/src/calibre/gui2/preferences/plugins.py index 8f77a03c24..4b83df71c7 100644 --- a/src/calibre/gui2/preferences/plugins.py +++ b/src/calibre/gui2/preferences/plugins.py @@ -329,7 +329,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form): return error_dialog(self, _('Must restart'), _('You must restart calibre before you can' ' configure the %s plugin')%plugin.name, show=True) - if plugin.do_user_config(): + if plugin.do_user_config(self.gui): self._plugin_model.refresh_plugin(plugin) elif op == 'remove': msg = _('Plugin {0} successfully removed').format(plugin.name) diff --git a/src/calibre/gui2/preferences/search.py b/src/calibre/gui2/preferences/search.py new file mode 100644 index 0000000000..81bc603df4 --- /dev/null +++ b/src/calibre/gui2/preferences/search.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai + +__license__ = 'GPL v3' +__copyright__ = '2010, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +from PyQt4.Qt import QApplication + +from calibre.gui2.preferences import ConfigWidgetBase, test_widget, \ + CommaSeparatedList +from calibre.gui2.preferences.search_ui import Ui_Form +from calibre.gui2 import config +from calibre.utils.config import prefs + +class ConfigWidget(ConfigWidgetBase, Ui_Form): + + def genesis(self, gui): + self.gui = gui + + r = self.register + + r('search_as_you_type', config) + r('highlight_search_matches', config) + r('limit_search_columns', prefs) + r('limit_search_columns_to', prefs, setting=CommaSeparatedList) + fl = gui.library_view.model().db.field_metadata.get_search_terms() + self.opt_limit_search_columns_to.update_items_cache(fl) + + def refresh_gui(self, gui): + gui.search.search_as_you_type(config['search_as_you_type']) + gui.library_view.model().set_highlight_only(config['highlight_search_matches']) + gui.search.do_search() + +if __name__ == '__main__': + app = QApplication([]) + test_widget('Interface', 'Search') + diff --git a/src/calibre/gui2/preferences/search.ui b/src/calibre/gui2/preferences/search.ui new file mode 100644 index 0000000000..360059ce56 --- /dev/null +++ b/src/calibre/gui2/preferences/search.ui @@ -0,0 +1,104 @@ + + + Form + + + + 0 + 0 + 670 + 392 + + + + Form + + + + + + Search as you &type + + + + + + + &Highlight search results instead of restricting the book list to the results + + + + + + + What to search by default + + + + + + When you enter a search term without a prefix, by default calibre will search all metadata for matches. For example, entering, "asimov" will search not just authors but title/tags/series/comments/etc. Use these options if you would like to change this behavior. + + + true + + + + + + + &Limit the searched metadata + + + + + + + &Columns that non-prefixed searches are limited to: + + + opt_limit_search_columns_to + + + + + + + + + + Note that this option affects all searches, including saved searches and restrictions. Therefore, if you use this option, it is best to ensure that you always use prefixes in your saved searches. For example, use "series:Foundation" rather than just "Foundation" in a saved search + + + true + + + + + + + + + + Qt::Vertical + + + + 0 + 0 + + + + + + + + + MultiCompleteLineEdit + QLineEdit +

calibre/gui2.complete.h
+ + + + + diff --git a/src/calibre/gui2/search_box.py b/src/calibre/gui2/search_box.py index e4073a01c9..900c882adc 100644 --- a/src/calibre/gui2/search_box.py +++ b/src/calibre/gui2/search_box.py @@ -16,7 +16,6 @@ from calibre.gui2 import config from calibre.gui2.dialogs.confirm_delete import confirm from calibre.gui2.dialogs.saved_search_editor import SavedSearchEditor from calibre.gui2.dialogs.search import SearchDialog -from calibre.utils.config import dynamic from calibre.utils.search_query_parser import saved_searches from calibre.utils.icu import sort_key @@ -271,7 +270,7 @@ class SavedSearchBox(QComboBox): # {{{ def initialize(self, _search_box, colorize=False, help_text=_('Search')): self.search_box = _search_box try: - self.line_edit.setPlaceholderText(help_text) + self.line_edit.setPlaceholderText(help_text) except: # Using Qt < 4.7 pass @@ -376,9 +375,7 @@ class SearchBoxMixin(object): # {{{ unicode(self.search.toolTip()))) self.advanced_search_button.setStatusTip(self.advanced_search_button.toolTip()) self.clear_button.setStatusTip(self.clear_button.toolTip()) - self.search_highlight_only.stateChanged.connect(self.highlight_only_changed) - self.search_highlight_only.setChecked( - dynamic.get('search_highlight_only', False)) + self.search_options_button.clicked.connect(self.search_options_button_clicked) def focus_search_box(self, *args): self.search.setFocus(Qt.OtherFocusReason) @@ -402,14 +399,13 @@ class SearchBoxMixin(object): # {{{ self.search.do_search() self.focus_to_library() + def search_options_button_clicked(self): + self.iactions['Preferences'].do_config(initial_plugin=('Interface', + 'Search'), close_after_initial=True) + def focus_to_library(self): self.current_view().setFocus(Qt.OtherFocusReason) - def highlight_only_changed(self, toWhat): - dynamic.set('search_highlight_only', toWhat) - self.current_view().model().set_highlight_only(toWhat) - self.focus_to_library() - # }}} class SavedSearchBoxMixin(object): # {{{ diff --git a/src/calibre/gui2/tag_view.py b/src/calibre/gui2/tag_view.py index 041f0a715e..79199c6881 100644 --- a/src/calibre/gui2/tag_view.py +++ b/src/calibre/gui2/tag_view.py @@ -1214,7 +1214,7 @@ class TagBrowserMixin(object): # {{{ db.field_metadata.remove_user_categories() for k in d.categories: db.field_metadata.add_user_category('@' + k, k) - db.data.sqp_change_locations(db.field_metadata.get_search_terms()) + db.data.change_search_locations(db.field_metadata.get_search_terms()) self.tags_view.set_new_model() self.tags_view.recount() @@ -1259,9 +1259,8 @@ class TagBrowserMixin(object): # {{{ if rename_func: for item in to_delete: delete_func(item) - for text in to_rename: - for old_id in to_rename[text]: - rename_func(old_id, new_name=unicode(text)) + for old_id in to_rename: + rename_func(old_id, new_name=unicode(to_rename[old_id])) # Clean up the library view self.do_tag_item_renamed() diff --git a/src/calibre/gui2/tools.py b/src/calibre/gui2/tools.py index 655c7ea7c6..39224c8b35 100644 --- a/src/calibre/gui2/tools.py +++ b/src/calibre/gui2/tools.py @@ -9,7 +9,7 @@ Logic for setting up conversion jobs import cPickle, os -from PyQt4.Qt import QDialog, QProgressDialog, QString, QTimer, SIGNAL +from PyQt4.Qt import QDialog, QProgressDialog, QString, QTimer from calibre.ptempfile import PersistentTemporaryFile from calibre.gui2 import warning_dialog, question_dialog @@ -24,7 +24,8 @@ from calibre.ebooks.conversion.config import GuiRecommendations, \ load_defaults, load_specifics, save_specifics from calibre.gui2.convert import bulk_defaults_for_input_format -def convert_single_ebook(parent, db, book_ids, auto_conversion=False, out_format=None): +def convert_single_ebook(parent, db, book_ids, auto_conversion=False, # {{{ + out_format=None): changed = False jobs = [] bad = [] @@ -95,7 +96,9 @@ def convert_single_ebook(parent, db, book_ids, auto_conversion=False, out_format msg).exec_() return jobs, changed, bad +# }}} +# Bulk convert {{{ def convert_bulk_ebook(parent, queue, db, book_ids, out_format=None, args=[]): total = len(book_ids) if total == 0: @@ -125,14 +128,11 @@ class QueueBulk(QProgressDialog): self.parent = parent self.use_saved_single_settings = use_saved_single_settings self.i, self.bad, self.jobs, self.changed = 0, [], [], False - self.timer = QTimer(self) - self.connect(self.timer, SIGNAL('timeout()'), self.do_book) - self.timer.start() + QTimer.singleShot(0, self.do_book) self.exec_() def do_book(self): if self.i >= len(self.book_ids): - self.timer.stop() return self.do_queue() book_id = self.book_ids[self.i] self.i += 1 @@ -191,6 +191,7 @@ class QueueBulk(QProgressDialog): self.setValue(self.i) except NoSupportedInputFormats: self.bad.append(book_id) + QTimer.singleShot(0, self.do_book) def do_queue(self): self.hide() @@ -209,7 +210,9 @@ class QueueBulk(QProgressDialog): self.jobs.reverse() self.queue(self.jobs, self.changed, self.bad, *self.args) -def fetch_scheduled_recipe(arg): +# }}} + +def fetch_scheduled_recipe(arg): # {{{ fmt = prefs['output_format'].lower() pt = PersistentTemporaryFile(suffix='_recipe_out.%s'%fmt.lower()) pt.close() @@ -250,7 +253,9 @@ def fetch_scheduled_recipe(arg): return 'gui_convert', args, _('Fetch news from ')+arg['title'], fmt.upper(), [pt] -def generate_catalog(parent, dbspec, ids, device_manager, db): +# }}} + +def generate_catalog(parent, dbspec, ids, device_manager, db): # {{{ from calibre.gui2.dialogs.catalog import Catalog # Build the Catalog dialog in gui2.dialogs.catalog @@ -308,8 +313,9 @@ def generate_catalog(parent, dbspec, ids, device_manager, db): # Which then calls gui2.convert.gui_conversion:gui_catalog() with the args inline return 'gui_catalog', args, _('Generate catalog'), out.name, d.catalog_sync, \ d.catalog_title +# }}} -def convert_existing(parent, db, book_ids, output_format): +def convert_existing(parent, db, book_ids, output_format): # {{{ already_converted_ids = [] already_converted_titles = [] for book_id in book_ids: @@ -325,3 +331,5 @@ def convert_existing(parent, db, book_ids, output_format): book_ids = [x for x in book_ids if x not in already_converted_ids] return book_ids +# }}} + diff --git a/src/calibre/gui2/ui.py b/src/calibre/gui2/ui.py index 907dd577b8..5ac7e6a45d 100644 --- a/src/calibre/gui2/ui.py +++ b/src/calibre/gui2/ui.py @@ -483,8 +483,10 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{ action.location_selected(location) if location == 'library': self.search_restriction.setEnabled(True) + self.search_options_button.setEnabled(True) else: self.search_restriction.setEnabled(False) + self.search_options_button.setEnabled(False) # Reset the view in case something changed while it was invisible self.current_view().reset() self.set_number_of_books_shown() diff --git a/src/calibre/library/caches.py b/src/calibre/library/caches.py index e818e6a3c0..1330d10e59 100644 --- a/src/calibre/library/caches.py +++ b/src/calibre/library/caches.py @@ -11,7 +11,7 @@ from itertools import repeat from datetime import timedelta from threading import Thread -from calibre.utils.config import tweaks +from calibre.utils.config import tweaks, prefs from calibre.utils.date import parse_date, now, UNDEFINED_DATE from calibre.utils.search_query_parser import SearchQueryParser from calibre.utils.pyparsing import ParseException @@ -182,15 +182,16 @@ class ResultCache(SearchQueryParser): # {{{ self.first_sort = True self.search_restriction = '' self.field_metadata = field_metadata - all_search_locations = field_metadata.get_search_terms() - SearchQueryParser.__init__(self, all_search_locations, optimize=True) + self.all_search_locations = field_metadata.get_search_terms() + SearchQueryParser.__init__(self, self.all_search_locations, optimize=True) self.build_date_relop_dict() self.build_numeric_relop_dict() def break_cycles(self): self._data = self.field_metadata = self.FIELD_MAP = \ self.numeric_search_relops = self.date_search_relops = \ - self.db_prefs = None + self.db_prefs = self.all_search_locations = None + self.sqp_change_locations([]) def __getitem__(self, row): @@ -218,6 +219,10 @@ class ResultCache(SearchQueryParser): # {{{ def universal_set(self): return set([i[0] for i in self._data if i is not None]) + def change_search_locations(self, locations): + self.sqp_change_locations(locations) + self.all_search_locations = locations + def build_date_relop_dict(self): ''' Because the database dates have time in them, we can't use direct @@ -432,6 +437,7 @@ class ResultCache(SearchQueryParser): # {{{ # get metadata key associated with the search term. Eliminates # dealing with plurals and other aliases location = self.field_metadata.search_term_to_field_key(icu_lower(location.strip())) + # grouped search terms if isinstance(location, list): if allow_recursion: for loc in location: @@ -440,6 +446,20 @@ class ResultCache(SearchQueryParser): # {{{ return matches raise ParseException(query, len(query), 'Recursive query group detected', self) + # apply the limit if appropriate + if location == 'all' and prefs['limit_search_columns'] and \ + prefs['limit_search_columns_to']: + terms = set([]) + for l in prefs['limit_search_columns_to']: + l = icu_lower(l.strip()) + if l and l != 'all' and l in self.all_search_locations: + terms.add(l) + if terms: + for l in terms: + matches |= self.get_matches(l, query, + candidates=candidates, allow_recursion=allow_recursion) + return matches + if location in self.field_metadata: fm = self.field_metadata[location] # take care of dates special case diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py index 23127035d2..cb55b2318d 100644 --- a/src/calibre/library/catalog.py +++ b/src/calibre/library/catalog.py @@ -4442,46 +4442,39 @@ then rebuild the catalog.\n''').format(author[0],author[1],current_author[1]) # Insert the link to the series or remove aTag = body.find('a', attrs={'class':'series_id'}) - if book['series']: - if self.opts.generate_series: - aTag['href'] = "%s.html#%s_series" % ('BySeries', - re.sub('\W','',book['series']).lower()) - else: - aTag.extract() + if aTag: + if book['series']: + if self.opts.generate_series: + aTag['href'] = "%s.html#%s_series" % ('BySeries', + re.sub('\W','',book['series']).lower()) + else: + aTag.extract() - # Insert the author link (always) + # Insert the author link aTag = body.find('a', attrs={'class':'author'}) - if self.opts.generate_authors: + if self.opts.generate_authors and aTag: aTag['href'] = "%s.html#%s" % ("ByAlphaAuthor", self.generateAuthorAnchor(book['author'])) if publisher == ' ': - try: - publisherTag = body.find('td', attrs={'class':'publisher'}) + publisherTag = body.find('td', attrs={'class':'publisher'}) + if publisherTag: publisherTag.contents[0].replaceWith(' ') - except: - pass if not genres: - try: - genresTag = body.find('p',attrs={'class':'genres'}) + genresTag = body.find('p',attrs={'class':'genres'}) + if genresTag: genresTag.extract() - except: - pass if not formats: - try: - formatsTag = body.find('p',attrs={'class':'formats'}) + formatsTag = body.find('p',attrs={'class':'formats'}) + if formatsTag: formatsTag.extract() - except: - pass if note_content == '': - try: - tdTag = body.find('td', attrs={'class':'notes'}) + tdTag = body.find('td', attrs={'class':'notes'}) + if tdTag: tdTag.contents[0].replaceWith(' ') - except: - pass emptyTags = body.findAll('td', attrs={'class':'empty'}) for mt in emptyTags: diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index 792081732c..5702b75317 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -414,7 +414,6 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): row = self.data._data[index] if index_is_id else self.data[index] return row[self.FIELD_MAP['path']].replace('/', os.sep) - def abspath(self, index, index_is_id=False, create_dirs=True): 'Return the absolute path to the directory containing this books files as a unicode string.' path = os.path.join(self.library_path, self.path(index, index_is_id=index_is_id)) @@ -422,7 +421,6 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): os.makedirs(path) return path - def construct_path_name(self, id): ''' Construct the directory name for this book based on its metadata. @@ -432,7 +430,11 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): authors = _('Unknown') author = ascii_filename(authors.split(',')[0])[:self.PATH_LIMIT].decode(filesystem_encoding, 'replace') title = ascii_filename(self.title(id, index_is_id=True))[:self.PATH_LIMIT].decode(filesystem_encoding, 'replace') - path = author + '/' + title + ' (%d)'%id + while author[-1] in (' ', '.'): + author = author[:-1] + if not author: + author = ascii_filename(_('Unknown')).decode(filesystem_encoding, 'replace') + path = author + '/' + title + ' (%d)'%id return path def construct_file_name(self, id): diff --git a/src/calibre/library/server/content.py b/src/calibre/library/server/content.py index 8af70d5675..11ea2b951e 100644 --- a/src/calibre/library/server/content.py +++ b/src/calibre/library/server/content.py @@ -124,8 +124,7 @@ class ContentServer(object): cherrypy.request.headers.get('Want-OPDS-Catalog', 919) != 919 or \ ua.startswith('Stanza') - # A better search would be great - want_mobile = self.MOBILE_UA.search(ua) is not None + want_mobile = self.is_mobile_browser(ua) if self.opts.develop and not want_mobile: cherrypy.log('User agent: '+ua) diff --git a/src/calibre/library/server/mobile.py b/src/calibre/library/server/mobile.py index 0992e6c30b..1bf9f549bc 100644 --- a/src/calibre/library/server/mobile.py +++ b/src/calibre/library/server/mobile.py @@ -169,6 +169,10 @@ class MobileServer(object): MOBILE_UA = re.compile('(?i)(?:iPhone|Opera Mini|NetFront|webOS|Mobile|Android|imode|DoCoMo|Minimo|Blackberry|MIDP|Symbian|HD2|Kindle)') + def is_mobile_browser(self, ua): + match = self.MOBILE_UA.search(ua) + return match is not None and 'iPad' not in ua + def add_routes(self, connect): connect('mobile', '/mobile', self.mobile) connect('mobile_css', '/mobile/style.css', self.mobile_css) diff --git a/src/calibre/manual/faq.rst b/src/calibre/manual/faq.rst index 18c53ade5d..cdae20ea3b 100644 --- a/src/calibre/manual/faq.rst +++ b/src/calibre/manual/faq.rst @@ -316,6 +316,27 @@ When you first run |app|, it will ask you for a folder in which to store your bo Metadata about the books is stored in the file ``metadata.db`` at the top level of the library folder This file is is a sqlite database. When backing up your library make sure you copy the entire folder and all its sub-folders. +How does |app| manage author names and sorting? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Author names are complex, especially across cultures. |app| has a very flexible strategy for managing author names. The first thing to understand is that books and authors are separate entities in |app|. A book can have more than one author, and an author can have more than one book. You can manage the authors of a book by the edit metadata dialog. You can manage individual authors by right clicking on the author in the Tag Browser on the left of the main |app| screen and selecting :guilabel:`Manage authors`. Using this dialog you can change the name of an author and also how that name is sorted. This will automatically change the name of the author in all the books of that author. When a book has multiple authors, separate their names using the & character. + +Now coming to author name sorting: + + * When a new author is added to |app| (this happens whenever a book by a new author is added), |app| automatically computes a sort string for both the book and the author. + * Authors in the Tag Browser are sorted by the sort value for the **authors**. Remember that this is different from the Author sort field for a book. + * By default, this sort algorithm assumes that the author name is in ``First name Last name`` format and generates a ``Last name, First name`` sort value. + * You can change this algorithm by going to Preferences->Tweaks and setting the :guilabel:`author_sort_copy_method` tweak. + * You can force |app| to recalculate the author sort values for every author by right clicking on any author and selecting :guilabel:`Manage authors`, then pushing the `Recalculate all author sort values` button. Do this after you have set the author_sort_copy_method tweak to what you want. + * You can force |app| to recalculate the author sort values for all books by using the bulk metadata edit dialog (select all books and click edit metadata, check the `Automatically set author sort` checkbox, then press OK.) + * When recalculating the author sort values for books, |app| uses the author sort values for each individual author. Therefore, ensure that the individual author sort values are correct before recalculating the books' author sort values. + * You can control whether the Tag Browser display authors using their names or their sort values by setting the :guilabel:`categories_use_field_for_author_name` tweak in Preferences->Tweaks + +With all this flexibility, it is possible to have |app| manage your author names however you like. For example, one common request is to have |app| display author names LN, FN. To do this first set the ``author_sort_copy_method`` to ``copy``. Then change all author names to LN, FN via the Manage authors dialog. Then have |app| recalculate author sort values for both authors and books as described above. + +Note that you can set an individual author's sort value to whatever you want using :guilabel:`Manage authors`. This is useful when dealing with names that |app| will not get right, such as complex multi-part names like Miguel de Cervantes Saavedra or when dealing with Asian names like Sun Tzu. + + Why doesn't |app| let me store books in my own directory structure? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/src/calibre/utils/config.py b/src/calibre/utils/config.py index 11c58f7769..a2ceaced68 100644 --- a/src/calibre/utils/config.py +++ b/src/calibre/utils/config.py @@ -728,6 +728,17 @@ def _prefs(): c.add_opt('user_categories', default={}, help=_('User-created tag browser categories')) c.add_opt('manage_device_metadata', default='manual', help=_('How and when calibre updates metadata on the device.')) + c.add_opt('limit_search_columns', default=False, + help=_('When searching for text without using lookup ' + 'prefixes, as for example, Red instead of title:Red, ' + 'limit the columns searched to those named below.')) + c.add_opt('limit_search_columns_to', + default=['title', 'authors', 'tags', 'series', 'publisher'], + help=_('Choose columns to be searched when not using prefixes, ' + 'as for example, when searching for Redd instead of ' + 'title:Red. Enter a list of search/lookup names ' + 'separated by commas. Only takes effect if you set the option ' + 'to limit search columns above.')) c.add_opt('migrated', default=False, help='For Internal use. Don\'t modify.') return c diff --git a/src/calibre/utils/html2textile.py b/src/calibre/utils/html2textile.py index 82797a81ad..786e912e36 100644 --- a/src/calibre/utils/html2textile.py +++ b/src/calibre/utils/html2textile.py @@ -77,7 +77,7 @@ class EchoTarget: new_tag = '~' newline = '' elif tag == 'span': - new_tag = '%' + new_tag = '' newline = '' elif tag == 'a': self.block = True @@ -147,7 +147,7 @@ class EchoTarget: elif tag == 'sub': self.final_output.append('~') elif tag == 'span': - self.final_output.append('%') + self.final_output.append('') elif tag == 'a': if self.a_part['title']: textilized = ' "%s (%s)":%s ' % ( diff --git a/src/calibre/utils/localization.py b/src/calibre/utils/localization.py index 037a147e28..97356df081 100644 --- a/src/calibre/utils/localization.py +++ b/src/calibre/utils/localization.py @@ -104,6 +104,7 @@ _extra_lang_codes = { 'en_IN' : _('English (India)'), 'en_TH' : _('English (Thailand)'), 'en_CY' : _('English (Cyprus)'), + 'en_CZ' : _('English (Czechoslovakia)'), 'en_PK' : _('English (Pakistan)'), 'en_HR' : _('English (Croatia)'), 'en_IL' : _('English (Israel)'),