diff --git a/resources/recipes/abc_au.recipe b/resources/recipes/abc_au.recipe new file mode 100644 index 0000000000..1330f8e4b5 --- /dev/null +++ b/resources/recipes/abc_au.recipe @@ -0,0 +1,54 @@ +__license__ = 'GPL v3' +__copyright__ = '2010, Dean Cording' +''' +abc.net.au/news +''' +import re +from calibre.web.feeds.recipes import BasicNewsRecipe + +class ABCNews(BasicNewsRecipe): + title = 'ABC News' + __author__ = 'Dean Cording' + description = 'News from Australia' + masthead_url = 'http://www.abc.net.au/news/assets/v5/images/common/logo-news.png' + cover_url = 'http://www.abc.net.au/news/assets/v5/images/common/logo-news.png' + + oldest_article = 2 + max_articles_per_feed = 100 + no_stylesheets = False + #delay = 1 + use_embedded_content = False + encoding = 'utf8' + publisher = 'ABC News' + category = 'News, Australia, World' + language = 'en_AU' + publication_type = 'newsportal' + preprocess_regexps = [(re.compile(r'', re.DOTALL), lambda m: '')] + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + ,'linearize_tables': False + } + + keep_only_tags = dict(id='article') + + remove_tags = [dict(attrs={'class':['related', 'tags']}), + dict(id='statepromo') + ] + + remove_attributes = ['width','height'] + + feeds = [ + ('Top Stories', 'http://www.abc.net.au/news/syndicate/topstoriesrss.xml'), + ('Canberra', 'http://www.abc.net.au/news/indexes/idx-act/rss.xml'), + ('Sydney', 'http://www.abc.net.au/news/indexes/sydney/rss.xml'), + ('Melbourne', 'http://www.abc.net.au/news/indexes/melbourne/rss.xml'), + ('Brisbane', 'http://www.abc.net.au/news/indexes/brisbane/rss.xml'), + ('Perth', 'http://www.abc.net.au/news/indexes/perth/rss.xml'), + ('Australia', 'http://www.abc.net.au/news/indexes/idx-australia/rss.xml'), + ('World', 'http://www.abc.net.au/news/indexes/world/rss.xml'), + ('Business', 'http://www.abc.net.au/news/indexes/business/rss.xml'), + ('Science and Technology', 'http://www.abc.net.au/news/tag/science-and-technology/rss.xml'), + ] diff --git a/resources/recipes/business_spectator.recipe b/resources/recipes/business_spectator.recipe new file mode 100644 index 0000000000..ef58424c6c --- /dev/null +++ b/resources/recipes/business_spectator.recipe @@ -0,0 +1,48 @@ +__license__ = 'GPL v3' +__copyright__ = '2010, Dean Cording' +''' +abc.net.au/news +''' +import re +from calibre.web.feeds.recipes import BasicNewsRecipe + +class BusinessSpectator(BasicNewsRecipe): + title = 'Business Spectator' + __author__ = 'Dean Cording' + description = 'Australian Business News & commentary delivered the way you want it.' + masthead_url = 'http://www.businessspectator.com.au/bs.nsf/logo-business-spectator.gif' + cover_url = masthead_url + + oldest_article = 2 + max_articles_per_feed = 100 + no_stylesheets = True + #delay = 1 + use_embedded_content = False + encoding = 'utf8' + publisher = 'Business Spectator' + category = 'News, Australia, Business' + language = 'en_AU' + publication_type = 'newsportal' + preprocess_regexps = [(re.compile(r'', re.DOTALL), lambda m: '')] + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + ,'linearize_tables': False + } + + keep_only_tags = [dict(id='storyHeader'), dict(id='body-html')] + + remove_tags = [dict(attrs={'class':'hql'})] + + remove_attributes = ['width','height','style'] + + feeds = [ + ('Top Stories', 'http://www.businessspectator.com.au/top-stories.rss'), + ('Alan Kohler', 'http://www.businessspectator.com.au/bs.nsf/RSS?readform&type=spectators&cat=Alan%20Kohler'), + ('Robert Gottliebsen', 'http://www.businessspectator.com.au/bs.nsf/RSS?readform&type=spectators&cat=Robert%20Gottliebsen'), + ('Stephen Bartholomeusz', 'http://www.businessspectator.com.au/bs.nsf/RSS?readform&type=spectators&cat=Stephen%20Bartholomeusz'), + ('Daily Dossier', 'http://www.businessspectator.com.au/bs.nsf/RSS?readform&type=kgb&cat=dossier'), + ('Australia', 'http://www.businessspectator.com.au/bs.nsf/RSS?readform&type=region&cat=australia'), + ] diff --git a/resources/recipes/esenja.recipe b/resources/recipes/esenja.recipe new file mode 100644 index 0000000000..b8b94ad66e --- /dev/null +++ b/resources/recipes/esenja.recipe @@ -0,0 +1,87 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2010, matek09, matek09@gmail.com' + +from calibre.web.feeds.news import BasicNewsRecipe +import re + +class Esensja(BasicNewsRecipe): + + title = u'Esensja' + __author__ = 'matek09' + description = 'Monthly magazine' + encoding = 'utf-8' + no_stylesheets = True + language = 'pl' + remove_javascript = True + HREF = '0' + + #keep_only_tags =[] + #keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'article'}) + remove_tags_before = dict(dict(name = 'div', attrs = {'class' : 't-title'})) + remove_tags_after = dict(dict(name = 'img', attrs = {'src' : '../../../2000/01/img/tab_bot.gif'})) + + remove_tags =[] + remove_tags.append(dict(name = 'img', attrs = {'src' : '../../../2000/01/img/tab_top.gif'})) + remove_tags.append(dict(name = 'img', attrs = {'src' : '../../../2000/01/img/tab_bot.gif'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 't-title2 nextpage'})) + + extra_css = ''' + .t-title {font-size: x-large; font-weight: bold; text-align: left} + .t-author {font-size: x-small; text-align: left} + .t-title2 {font-size: x-small; font-style: italic; text-align: left} + .text {font-size: small; text-align: left} + .annot-ref {font-style: italic; text-align: left} + ''' + + preprocess_regexps = [(re.compile(r'alt="[^"]*"'), + lambda match: '')] + + def parse_index(self): + soup = self.index_to_soup('http://www.esensja.pl/magazyn/') + a = soup.find('a', attrs={'href' : re.compile('.*/index.html')}) + year = a['href'].split('/')[0] + month = a['href'].split('/')[1] + self.HREF = 'http://www.esensja.pl/magazyn/' + year + '/' + month + '/iso/' + soup = self.index_to_soup(self.HREF + '01.html') + self.cover_url = 'http://www.esensja.pl/magazyn/' + year + '/' + month + '/img/ilustr/cover_b.jpg' + feeds = [] + intro = soup.find('div', attrs={'class' : 'n-title'}) + introduction = {'title' : self.tag_to_string(intro.a), + 'url' : self.HREF + intro.a['href'], + 'date' : '', + 'description' : ''} + chapter = 'Wprowadzenie' + subchapter = '' + articles = [] + articles.append(introduction) + for tag in intro.findAllNext(attrs={'class': ['chapter', 'subchapter', 'n-title']}): + if tag.name in 'td': + if len(articles) > 0: + section = chapter + if len(subchapter) > 0: + section += ' - ' + subchapter + feeds.append((section, articles)) + articles = [] + if tag['class'] == 'chapter': + chapter = self.tag_to_string(tag).capitalize() + subchapter = '' + else: + subchapter = self.tag_to_string(tag) + subchapter = self.tag_to_string(tag) + continue + articles.append({'title' : self.tag_to_string(tag.a), 'url' : self.HREF + tag.a['href'], 'date' : '', 'description' : ''}) + + a = self.index_to_soup(self.HREF + tag.a['href']) + i = 1 + while True: + div = a.find('div', attrs={'class' : 't-title2 nextpage'}) + if div is not None: + a = self.index_to_soup(self.HREF + div.a['href']) + articles.append({'title' : self.tag_to_string(tag.a) + ' c. d. ' + str(i), 'url' : self.HREF + div.a['href'], 'date' : '', 'description' : ''}) + i = i + 1 + else: + break + + return feeds diff --git a/resources/recipes/fr_online.recipe b/resources/recipes/fr_online.recipe index e4a817d0d6..b3448c17dc 100644 --- a/resources/recipes/fr_online.recipe +++ b/resources/recipes/fr_online.recipe @@ -1,67 +1,61 @@ -__license__ = 'GPL v3' -__copyright__ = '2009, Justus Bisser ' +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2010, Christian Schmitt' + ''' fr-online.de ''' -import re -from calibre.web.feeds.news import BasicNewsRecipe +from calibre.web.feeds.recipes import BasicNewsRecipe -class Spiegel_ger(BasicNewsRecipe): - title = 'Frankfurter Rundschau' - __author__ = 'Justus Bisser' - description = "Dies ist die Online-Ausgabe der Frankfurter Rundschau. Um die abgerufenen individuell einzustellen bearbeiten sie die Liste im erweiterten Modus. Die Feeds findet man auf http://www.fr-online.de/verlagsservice/fr_newsreader/?em_cnt=574255" - publisher = 'Druck- und Verlagshaus Frankfurt am Main GmbH' - category = 'FR Online, Frankfurter Rundschau, Nachrichten, News,Dienste, RSS, RSS, Feedreader, Newsfeed, iGoogle, Netvibes, Widget' - oldest_article = 7 - max_articles_per_feed = 100 - language = 'de' - lang = 'de-DE' - no_stylesheets = True - use_embedded_content = False - #encoding = 'cp1252' +class FROnlineRecipe(BasicNewsRecipe): + title = 'Frankfurter Rundschau' + __author__ = 'maccs' + description = 'Nachrichten aus D und aller Welt' + encoding = 'utf-8' + masthead_url = 'http://www.fr-online.de/image/view/-/1474018/data/823552/-/logo.png' + publisher = 'Druck- und Verlagshaus Frankfurt am Main GmbH' + category = 'news, germany, world' + language = 'de' + publication_type = 'newspaper' + use_embedded_content = False + remove_javascript = True + no_stylesheets = True + oldest_article = 1 # Increase this number if you're interested in older articles + max_articles_per_feed = 50 # Seems a reasonable number to me + extra_css = ''' + body { font-family: "arial", "verdana", "geneva", sans-serif; font-size: 12px; margin: 0px; background-color: #ffffff;} + .imgSubline{background-color: #f4f4f4; font-size: 0.8em;} + .p--heading-1 {font-weight: bold;} + .calibre_navbar {font-size: 0.8em; font-family: "arial", "verdana", "geneva", sans-serif;} + ''' + remove_tags = [dict(name='div', attrs={'id':'Logo'})] + cover_url = 'http://www.fr-online.de/image/view/-/1474018/data/823552/-/logo.png' + cover_margins = (100, 150, '#ffffff') - conversion_options = { - 'comment' : description - , 'tags' : category - , 'publisher' : publisher - , 'language' : lang - } - recursions = 0 - max_articles_per_feed = 100 - #keep_only_tags = [dict(name='div', attrs={'class':'text'})] - #tags_remove = [dict(name='div', attrs={'style':'text-align: left; margin: 4px 0px 0px 4px; width: 200px; float: right;'})] - remove_attributes = ['style'] - feeds = [] - #remove_tags_before = [dict(name='div', attrs={'style':'padding-left: 0px;'})] - #remove_tags_after = [dict(name='div', attrs={'class':'box_head_text'})] + feeds = [] + feeds.append(('Startseite', u'http://www.fr-online.de/home/-/1472778/1472778/-/view/asFeed/-/index.xml')) + feeds.append(('Politik', u'http://www.fr-online.de/politik/-/1472596/1472596/-/view/asFeed/-/index.xml')) + feeds.append(('Meinung', u'http://www.fr-online.de/politik/meinung/-/1472602/1472602/-/view/asFeed/-/index.xml')) + feeds.append(('Wirtschaft', u'http://www.fr-online.de/wirtschaft/-/1472780/1472780/-/view/asFeed/-/index.xml')) + feeds.append(('Sport', u'http://www.fr-online.de/sport/-/1472784/1472784/-/view/asFeed/-/index.xml')) + feeds.append(('Eintracht Frankfurt', u'http://www.fr-online.de/sport/eintracht-frankfurt/-/1473446/1473446/-/view/asFeed/-/index.xml')) + feeds.append(('Kultur und Medien', u'http://www.fr-online.de/kultur/-/1472786/1472786/-/view/asFeed/-/index.xml')) + feeds.append(('Panorama', u'http://www.fr-online.de/panorama/-/1472782/1472782/-/view/asFeed/-/index.xml')) + feeds.append(('Frankfurt', u'http://www.fr-online.de/frankfurt/-/1472798/1472798/-/view/asFeed/-/index.xml')) + feeds.append(('Rhein-Main', u'http://www.fr-online.de/rhein-main/-/1472796/1472796/-/view/asFeed/-/index.xml')) + feeds.append(('Hanau', u'http://www.fr-online.de/rhein-main/hanau/-/1472866/1472866/-/view/asFeed/-/index.xml')) + feeds.append(('Darmstadt', u'http://www.fr-online.de/rhein-main/darmstadt/-/1472858/1472858/-/view/asFeed/-/index.xml')) + feeds.append(('Wiesbaden', u'http://www.fr-online.de/rhein-main/wiesbaden/-/1472860/1472860/-/view/asFeed/-/index.xml')) + feeds.append(('Offenbach', u'http://www.fr-online.de/rhein-main/offenbach/-/1472856/1472856/-/view/asFeed/-/index.xml')) + feeds.append(('Bad Homburg', u'http://www.fr-online.de/rhein-main/bad-homburg/-/1472864/1472864/-/view/asFeed/-/index.xml')) + feeds.append(('Digital', u'http://www.fr-online.de/digital/-/1472406/1472406/-/view/asFeed/-/index.xml')) + feeds.append(('Wissenschaft', u'http://www.fr-online.de/wissenschaft/-/1472788/1472788/-/view/asFeed/-/index.xml')) - # enable for all news - allNews = 0 - if allNews: - feeds = [(u'Frankfurter Rundschau', u'http://www.fr-online.de/rss/sport/index.xml')] - else: - #select the feeds you like - feeds = [(u'Nachrichten', u'http://www.fr-online.de/rss/politik/index.xml')] - feeds.append((u'Kommentare und Analysen', u'http://www.fr-online.de/rss/meinung/index.xml')) - feeds.append((u'Dokumentationen', u'http://www.fr-online.de/rss/dokumentation/index.xml')) - feeds.append((u'Deutschlandtrend', u'http://www.fr-online.de/rss/deutschlandtrend/index.xml')) - feeds.append((u'Wirtschaft', u'http://www.fr-online.de/rss/wirtschaft/index.xml')) - feeds.append((u'Sport', u'http://www.fr-online.de/rss/sport/index.xml')) - feeds.append((u'Feuilleton', u'http://www.fr-online.de/rss/feuilleton/index.xml')) - feeds.append((u'Panorama', u'http://www.fr-online.de/rss/panorama/index.xml')) - feeds.append((u'Rhein Main und Hessen', u'http://www.fr-online.de/rss/hessen/index.xml')) - feeds.append((u'Fitness und Gesundheit', u'http://www.fr-online.de/rss/fit/index.xml')) - feeds.append((u'Multimedia', u'http://www.fr-online.de/rss/multimedia/index.xml')) - feeds.append((u'Wissen und Bildung', u'http://www.fr-online.de/rss/wissen/index.xml')) - def get_article_url(self, article): - url = article.link - regex = re.compile("0C[0-9]{6,8}0A?") + def print_version(self, url): + return url.replace('index.html', 'view/printVersion/-/index.html') - liste = regex.findall(url) - string = liste.pop(0) - string = string[2:len(string)-1] - return "http://www.fr-online.de/_em_cms/_globals/print.php?em_cnt=" + string diff --git a/resources/recipes/histmag.recipe b/resources/recipes/histmag.recipe new file mode 100644 index 0000000000..38956e7995 --- /dev/null +++ b/resources/recipes/histmag.recipe @@ -0,0 +1,59 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2010, matek09, matek09@gmail.com' + +from calibre.web.feeds.news import BasicNewsRecipe +import re + +class Histmag(BasicNewsRecipe): + + title = u'Histmag' + __author__ = 'matek09' + description = u"Artykuly historyczne i publicystyczne" + encoding = 'utf-8' + no_stylesheets = True + language = 'pl' + remove_javascript = True + #max_articles_per_feed = 1 + remove_tags_before = dict(dict(name = 'div', attrs = {'id' : 'article'})) + remove_tags_after = dict(dict(name = 'h2', attrs = {'class' : 'komentarze'})) + #keep_only_tags =[] + #keep_only_tags.append(dict(name = 'h2')) + #keep_only_tags.append(dict(name = 'p')) + + remove_tags =[] + remove_tags.append(dict(name = 'p', attrs = {'class' : 'podpis'})) + remove_tags.append(dict(name = 'h2', attrs = {'class' : 'komentarze'})) + remove_tags.append(dict(name = 'img', attrs = {'src' : 'style/buttons/wesprzyjnas-1.jpg'})) + + preprocess_regexps = [(re.compile(r''), lambda match: '

'), + (re.compile(r''), lambda match: '

')] + extra_css = ''' + .left {font-size: x-small} + .right {font-size: x-small} + ''' + + def find_articles(self, soup): + articles = [] + for div in soup.findAll('div', attrs={'class' : 'text'}): + articles.append({ + 'title' : self.tag_to_string(div.h3.a), + 'url' : 'http://www.histmag.org/' + div.h3.a['href'], + 'date' : self.tag_to_string(div.next('p')).split('|')[0], + 'description' : self.tag_to_string(div.next('p', podpis=False)), + }) + return articles + + def parse_index(self): + soup = self.index_to_soup('http://histmag.org/?arc=4&dx=0') + feeds = [] + feeds.append((u"Artykuly historyczne", self.find_articles(soup))) + soup = self.index_to_soup('http://histmag.org/?arc=5&dx=0') + feeds.append((u"Artykuly publicystyczne", self.find_articles(soup))) + soup = self.index_to_soup('http://histmag.org/?arc=1&dx=0') + feeds.append((u"Wydarzenia", self.find_articles(soup))) + + return feeds + + diff --git a/resources/recipes/newsweek_polska.recipe b/resources/recipes/newsweek_polska.recipe index 31dd8ccddd..4227a88026 100644 --- a/resources/recipes/newsweek_polska.recipe +++ b/resources/recipes/newsweek_polska.recipe @@ -1,19 +1,22 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2010, Mateusz Kielar, matek09@gmail.com' +__copyright__ = '2010, matek09, matek09@gmail.com' from calibre.web.feeds.news import BasicNewsRecipe class Newsweek(BasicNewsRecipe): - EDITION = 0 + FIND_LAST_FULL_ISSUE = True + EDITION = '0' + EXCLUDE_LOCKED = True + LOCKED_ICO = 'http://www.newsweek.pl/bins/media/static/newsweek/img/ico_locked.gif' title = u'Newsweek Polska' - __author__ = 'Mateusz Kielar' + __author__ = 'matek09' description = 'Weekly magazine' encoding = 'utf-8' no_stylesheets = True - language = 'en' + language = 'pl' remove_javascript = True keep_only_tags =[] @@ -33,34 +36,54 @@ class Newsweek(BasicNewsRecipe): def print_version(self, url): return url.replace("http://www.newsweek.pl/artykuly/wydanie/" + str(self.EDITION), "http://www.newsweek.pl/artykuly") + '/print' + def is_locked(self, a): + if a.findNext('img')['src'] == 'http://www.newsweek.pl/bins/media/static/newsweek/img/ico_locked.gif': + return True + else: + return False + + def is_full(self, issue_soup): + if len(issue_soup.findAll('img', attrs={'src' : 'http://www.newsweek.pl/bins/media/static/newsweek/img/ico_locked.gif'})) > 1: + return False + else: + return True + def find_last_full_issue(self): - page = self.index_to_soup('http://www.newsweek.pl/Frames/IssueCover.aspx') - issue = 'http://www.newsweek.pl/Frames/' + page.find(lambda tag: tag.name == 'span' and not tag.attrs).a['href'] - page = self.index_to_soup(issue) - issue = 'http://www.newsweek.pl/Frames/' + page.find(lambda tag: tag.name == 'span' and not tag.attrs).a['href'] - page = self.index_to_soup(issue) - self.EDITION = page.find('a', attrs={'target' : '_parent'})['href'].replace('/wydania/','') + frame_url = 'http://www.newsweek.pl/Frames/IssueCover.aspx' + while True: + frame_soup = self.index_to_soup(frame_url) + self.EDITION = frame_soup.find('a', attrs={'target' : '_parent'})['href'].replace('/wydania/','') + issue_soup = self.index_to_soup('http://www.newsweek.pl/wydania/' + self.EDITION) + if self.is_full(issue_soup): + break + frame_url = 'http://www.newsweek.pl/Frames/' + frame_soup.find(lambda tag: tag.name == 'span' and not tag.attrs).a['href'] + + def parse_index(self): - self.find_last_full_issue() - soup = self.index_to_soup('http://www.newsweek.pl/wydania/' + str(self.EDITION)) + if self.FIND_LAST_FULL_ISSUE: + self.find_last_full_issue() + soup = self.index_to_soup('http://www.newsweek.pl/wydania/' + self.EDITION) img = soup.find('img', id="ctl00_C1_PaperIsssueView_IssueImage", src=True) self.cover_url = img['src'] feeds = [] parent = soup.find(id='content-left-big') for txt in parent.findAll(attrs={'class':'txt_normal_red strong'}): - section = self.tag_to_string(txt).capitalize() articles = list(self.find_articles(txt)) - feeds.append((section, articles)) + if len(articles) > 0: + section = self.tag_to_string(txt).capitalize() + feeds.append((section, articles)) return feeds def find_articles(self, txt): for a in txt.findAllNext( attrs={'class':['strong','hr']}): if a.name in "div": break + if (not self.FIND_LAST_FULL_ISSUE) & self.EXCLUDE_LOCKED & self.is_locked(a): + continue yield { 'title' : self.tag_to_string(a), - 'url' : 'http://www.newsweek.pl'+a['href'], + 'url' : 'http://www.newsweek.pl' + a['href'], 'date' : '', 'description' : '' } diff --git a/resources/recipes/nin.recipe b/resources/recipes/nin.recipe index 70fd998a09..27942f7d43 100644 --- a/resources/recipes/nin.recipe +++ b/resources/recipes/nin.recipe @@ -8,12 +8,15 @@ www.nin.co.rs import re from calibre import strftime from calibre.web.feeds.news import BasicNewsRecipe +from contextlib import nested, closing +from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, CData, Tag +from calibre import entity_to_unicode class Nin(BasicNewsRecipe): title = 'NIN online' __author__ = 'Darko Miletic' description = 'Nedeljne Informativne Novine' - publisher = 'NIN d.o.o.' + publisher = 'NIN d.o.o. - Ringier d.o.o.' category = 'news, politics, Serbia' no_stylesheets = True delay = 1 @@ -26,18 +29,29 @@ class Nin(BasicNewsRecipe): use_embedded_content = False language = 'sr' publication_type = 'magazine' - extra_css = ' @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: Verdana, Lucida, sans1, sans-serif} .article_description{font-family: Verdana, Lucida, sans1, sans-serif} .artTitle{font-size: x-large; font-weight: bold; color: #900} .izjava{font-size: x-large; font-weight: bold} .columnhead{font-size: small; font-weight: bold;} img{margin-top:0.5em; margin-bottom: 0.7em} b{margin-top: 1em} ' + extra_css = """ + @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} + body{font-family: Verdana, Lucida, sans1, sans-serif} + .article_description{font-family: Verdana, Lucida, sans1, sans-serif} + .artTitle{font-size: x-large; font-weight: bold; color: #900} + .izjava{font-size: x-large; font-weight: bold} + .columnhead{font-size: small; font-weight: bold;} + img{margin-top:0.5em; margin-bottom: 0.7em; display: block} + b{margin-top: 1em} + """ conversion_options = { - 'comment' : description - , 'tags' : category - , 'publisher' : publisher - , 'language' : language - , 'linearize_tables' : True + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language } - preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] - remove_attributes = ['height','width'] + preprocess_regexps = [ + (re.compile(r'.*?', re.DOTALL|re.IGNORECASE),lambda match: '') + ,(re.compile(r'.*?', re.DOTALL|re.IGNORECASE),lambda match: '') + ,(re.compile(u'\u0110'), lambda match: u'\u00D0') + ] def get_browser(self): br = BasicNewsRecipe.get_browser() @@ -50,7 +64,10 @@ class Nin(BasicNewsRecipe): return br keep_only_tags =[dict(name='td', attrs={'width':'520'})] + remove_tags_before =dict(name='span', attrs={'class':'izjava'}) remove_tags_after =dict(name='html') + remove_tags = [dict(name=['object','link','iframe','meta','base'])] + remove_attributes=['border','background','height','width','align','valign'] def get_cover_url(self): cover_url = None @@ -63,7 +80,7 @@ class Nin(BasicNewsRecipe): def parse_index(self): articles = [] count = 0 - soup = self.index_to_soup(self.PREFIX) + soup = self.index_to_soup(self.INDEX) for item in soup.findAll('a',attrs={'class':'lmeninavFont'}): count = count +1 if self.test and count > 2: @@ -90,3 +107,45 @@ class Nin(BasicNewsRecipe): articles.append((section,inarts)) return articles + def index_to_soup(self, url_or_raw, raw=False): + if re.match(r'\w+://', url_or_raw): + open_func = getattr(self.browser, 'open_novisit', self.browser.open) + with closing(open_func(url_or_raw)) as f: + _raw = f.read() + if not _raw: + raise RuntimeError('Could not fetch index from %s'%url_or_raw) + else: + _raw = url_or_raw + if raw: + return _raw + if not isinstance(_raw, unicode) and self.encoding: + if callable(self.encoding): + _raw = self.encoding(_raw) + else: + _raw = _raw.decode(self.encoding, 'replace') + massage = list(BeautifulSoup.MARKUP_MASSAGE) + enc = 'cp1252' if callable(self.encoding) or self.encoding is None else self.encoding + massage.append((re.compile(r'&(\S+?);'), lambda match: + entity_to_unicode(match, encoding=enc))) + massage.append((re.compile(r'[\x00-\x08]+'), lambda match: + '')) + return BeautifulSoup(_raw, markupMassage=massage) + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + for item in soup.findAll('div'): + if len(item.contents) == 0: + item.extract() + for item in soup.findAll(['td','tr']): + item.name='div' + for item in soup.findAll('img'): + if not item.has_key('alt'): + item['alt'] = 'image' + for tbl in soup.findAll('table'): + img = tbl.find('img') + if img: + img.extract() + tbl.replaceWith(img) + return soup + \ No newline at end of file diff --git a/resources/recipes/polityka.recipe b/resources/recipes/polityka.recipe index ab31e148aa..16ccae6085 100644 --- a/resources/recipes/polityka.recipe +++ b/resources/recipes/polityka.recipe @@ -1,18 +1,18 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2010, Mateusz Kielar, matek09@gmail.com' +__copyright__ = '2010, matek09, matek09@gmail.com' from calibre.web.feeds.news import BasicNewsRecipe class Polityka(BasicNewsRecipe): title = u'Polityka' - __author__ = 'Mateusz Kielar' + __author__ = 'matek09' description = 'Weekly magazine. Last archive issue' encoding = 'utf-8' no_stylesheets = True - language = 'en' + language = 'pl' remove_javascript = True remove_tags_before = dict(dict(name = 'h2', attrs = {'class' : 'box_nag'})) @@ -48,7 +48,6 @@ class Polityka(BasicNewsRecipe): for div in box.findAll('div', attrs={'class': 'list_tresc'}): article_page = self.index_to_soup('http://archiwum.polityka.pl' + div.a['href'],) section = self.tag_to_string(article_page.find('h2', attrs = {'class' : 'box_nag'})).split('/')[0].lstrip().rstrip() - print section if not articles.has_key(section): articles[section] = [] articles[section].append( { diff --git a/resources/recipes/wprost.recipe b/resources/recipes/wprost.recipe new file mode 100644 index 0000000000..b317571981 --- /dev/null +++ b/resources/recipes/wprost.recipe @@ -0,0 +1,91 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2010, matek09, matek09@gmail.com' + +from calibre.web.feeds.news import BasicNewsRecipe +import re + +class Wprost(BasicNewsRecipe): + EDITION = 0 + FIND_LAST_FULL_ISSUE = True + EXCLUDE_LOCKED = True + ICO_BLOCKED = 'http://www.wprost.pl/G/icons/ico_blocked.gif' + + title = u'Wprost' + __author__ = 'matek09' + description = 'Weekly magazine' + encoding = 'ISO-8859-2' + no_stylesheets = True + language = 'pl' + remove_javascript = True + + remove_tags_before = dict(dict(name = 'div', attrs = {'id' : 'print-layer'})) + remove_tags_after = dict(dict(name = 'div', attrs = {'id' : 'print-layer'})) + + '''keep_only_tags =[] + keep_only_tags.append(dict(name = 'table', attrs = {'id' : 'title-table'})) + keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'div-header'})) + keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'div-content'})) + keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'def element-autor'}))''' + + preprocess_regexps = [(re.compile(r'style="display: none;"'), lambda match: ''), + (re.compile(r'display: block;'), lambda match: '')] + + + remove_tags =[] + remove_tags.append(dict(name = 'div', attrs = {'class' : 'def element-date'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 'def silver'})) + remove_tags.append(dict(name = 'div', attrs = {'id' : 'content-main-column-right'})) + + + extra_css = ''' + .div-header {font-size: x-small; font-weight: bold} + ''' +#h2 {font-size: x-large; font-weight: bold} + def is_blocked(self, a): + if a.findNextSibling('img') is None: + return False + else: + return True + + + + def find_last_issue(self): + soup = self.index_to_soup('http://www.wprost.pl/archiwum/') + a = 0 + if self.FIND_LAST_FULL_ISSUE: + ico_blocked = soup.findAll('img', attrs={'src' : self.ICO_BLOCKED}) + a = ico_blocked[-1].findNext('a', attrs={'title' : re.compile('Zobacz spis tre.ci')}) + else: + a = soup.find('a', attrs={'title' : re.compile('Zobacz spis tre.ci')}) + self.EDITION = a['href'].replace('/tygodnik/?I=', '') + self.cover_url = a.img['src'] + + + + def parse_index(self): + self.find_last_issue() + soup = self.index_to_soup('http://www.wprost.pl/tygodnik/?I=' + self.EDITION) + feeds = [] + for main_block in soup.findAll(attrs={'class':'main-block-s3 s3-head head-red3'}): + articles = list(self.find_articles(main_block)) + if len(articles) > 0: + section = self.tag_to_string(main_block) + feeds.append((section, articles)) + return feeds + + def find_articles(self, main_block): + for a in main_block.findAllNext( attrs={'style':['','padding-top: 15px;']}): + if a.name in "td": + break + if self.EXCLUDE_LOCKED & self.is_blocked(a): + continue + yield { + 'title' : self.tag_to_string(a), + 'url' : 'http://www.wprost.pl' + a['href'], + 'date' : '', + 'description' : '' + } + + diff --git a/src/calibre/devices/android/driver.py b/src/calibre/devices/android/driver.py index 811acbe55b..0deef5eb92 100644 --- a/src/calibre/devices/android/driver.py +++ b/src/calibre/devices/android/driver.py @@ -38,7 +38,7 @@ class ANDROID(USBMS): 0x227]}, # Samsung - 0x04e8 : { 0x681d : [0x0222, 0x0224, 0x0400], + 0x04e8 : { 0x681d : [0x0222, 0x0223, 0x0224, 0x0400], 0x681c : [0x0222, 0x0224, 0x0400], 0x6640 : [0x0100], }, @@ -62,7 +62,8 @@ class ANDROID(USBMS): 'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX'] WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE', '__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897', - 'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID'] + 'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', + 'SCH-I500_CARD'] WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID'] diff --git a/src/calibre/devices/cybook/driver.py b/src/calibre/devices/cybook/driver.py index becb7912a9..c1a62fdbf3 100644 --- a/src/calibre/devices/cybook/driver.py +++ b/src/calibre/devices/cybook/driver.py @@ -65,8 +65,8 @@ class ORIZON(CYBOOK): BCD = [0x319] - WINDOWS_MAIN_MEM = re.compile(r'CYBOOK_ORIZON__-FD') - WINDOWS_CARD_A_MEM = re.compile('CYBOOK_ORIZON__-SD') + WINDOWS_MAIN_MEM = re.compile(r'(CYBOOK_ORIZON__-FD)|(FILE-STOR_GADGET)') + WINDOWS_CARD_A_MEM = re.compile('(CYBOOK_ORIZON__-SD)|(FILE-STOR_GADGET)') EBOOK_DIR_MAIN = EBOOK_DIR_CARD_A = 'Digital Editions' diff --git a/src/calibre/devices/eb600/driver.py b/src/calibre/devices/eb600/driver.py index 4b3d4498c4..54d73d9c1d 100644 --- a/src/calibre/devices/eb600/driver.py +++ b/src/calibre/devices/eb600/driver.py @@ -229,7 +229,7 @@ class POCKETBOOK301(USBMS): class POCKETBOOK602(USBMS): - name = 'PocketBook Pro 602 Device Interface' + name = 'PocketBook Pro 602/902 Device Interface' description = _('Communicate with the PocketBook 602 reader.') author = 'Kovid Goyal' supported_platforms = ['windows', 'osx', 'linux'] @@ -244,5 +244,5 @@ class POCKETBOOK602(USBMS): BCD = [0x0324] VENDOR_NAME = '' - WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'PB602' + WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['PB602', 'PB902'] diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py index 3987ffa1b8..2a9a92612e 100644 --- a/src/calibre/ebooks/fb2/fb2ml.py +++ b/src/calibre/ebooks/fb2/fb2ml.py @@ -91,6 +91,10 @@ class FB2MLizer(object): return u'\n%s' % etree.tostring(etree.fromstring(output), encoding=unicode, pretty_print=True) def clean_text(self, text): + text = re.sub(r'(?miu)
\s*
', '', text) + text = re.sub(r'(?miu)\s+', '', text) + text = re.sub(r'(?miu)
', '
\n\n
', text) + text = re.sub(r'(?miu)

\s*

', '', text) text = re.sub(r'(?miu)\s+

', '

', text) text = re.sub(r'(?miu)

', '

\n\n

', text) @@ -166,11 +170,15 @@ class FB2MLizer(object): def get_text(self): text = [] - for item in self.oeb_book.spine: + for i, item in enumerate(self.oeb_book.spine): + if self.opts.sectionize_chapters_using_file_structure and i is not 0: + text.append('

') self.log.debug('Converting %s to FictionBook2 XML' % item.href) stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile) text.append(self.add_page_anchor(item)) text += self.dump_text(item.data.find(XHTML('body')), stylizer, item) + if self.opts.sectionize_chapters_using_file_structure and i is not len(self.oeb_book.spine) - 1: + text.append('
') return ''.join(text) def fb2_body_footer(self): @@ -258,6 +266,10 @@ class FB2MLizer(object): if id_name: fb2_text.append(self.get_anchor(page, id_name)) + if tag == 'h1' and self.opts.h1_to_title or tag == 'h2' and self.opts.h2_to_title or tag == 'h3' and self.opts.h3_to_title: + fb2_text.append('') + tags.append('title') + fb2_tag = TAG_MAP.get(tag, None) if fb2_tag == 'p': if 'p' in tag_stack+tags: diff --git a/src/calibre/ebooks/fb2/output.py b/src/calibre/ebooks/fb2/output.py index d6c7a25a90..bacaf0da91 100644 --- a/src/calibre/ebooks/fb2/output.py +++ b/src/calibre/ebooks/fb2/output.py @@ -25,6 +25,20 @@ class FB2Output(OutputFormatPlugin): 'WARNING: ' \ 'This option is experimental. It can cause conversion ' \ 'to fail. It can also produce unexpected output.')), + OptionRecommendation(name='sectionize_chapters_using_file_structure', + recommended_value=False, level=OptionRecommendation.LOW, + help=_('Try to turn chapters into individual sections using the ' \ + 'internal structure of the ebook. This works well for EPUB ' \ + 'books that have been internally split by chapter.')), + OptionRecommendation(name='h1_to_title', + recommended_value=False, level=OptionRecommendation.LOW, + help=_('Wrap all h1 tags with fb2 title elements.')), + OptionRecommendation(name='h2_to_title', + recommended_value=False, level=OptionRecommendation.LOW, + help=_('Wrap all h2 tags with fb2 title elements.')), + OptionRecommendation(name='h3_to_title', + recommended_value=False, level=OptionRecommendation.LOW, + help=_('Wrap all h3 tags with fb2 title elements.')), ]) def convert(self, oeb_book, output_path, input_plugin, opts, log): diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py index f80d15359c..48ece79f45 100644 --- a/src/calibre/ebooks/mobi/reader.py +++ b/src/calibre/ebooks/mobi/reader.py @@ -504,6 +504,9 @@ class MobiReader(object): 'x-large': '5', 'xx-large': '6', } + def barename(x): + return x.rpartition(':')[-1] + mobi_version = self.book_header.mobi_version for x in root.xpath('//ncx'): x.getparent().remove(x) @@ -512,8 +515,9 @@ class MobiReader(object): for x in tag.attrib: if ':' in x: del tag.attrib[x] - if tag.tag in ('country-region', 'place', 'placetype', 'placename', - 'state', 'city', 'street', 'address', 'content', 'form'): + if tag.tag and barename(tag.tag.lower()) in \ + ('country-region', 'place', 'placetype', 'placename', + 'state', 'city', 'street', 'address', 'content', 'form'): tag.tag = 'div' if tag.tag in ('content', 'form') else 'span' for key in tag.attrib.keys(): tag.attrib.pop(key) diff --git a/src/calibre/ebooks/pml/pmlml.py b/src/calibre/ebooks/pml/pmlml.py index 560a132ce1..ceb7f36124 100644 --- a/src/calibre/ebooks/pml/pmlml.py +++ b/src/calibre/ebooks/pml/pmlml.py @@ -216,7 +216,9 @@ class PMLMLizer(object): w = '\\w' width = elem.get('width') if width: - w += '="%s%%"' % width + if not width.endswith('%'): + width += '%' + w += '="%s"' % width else: w += '="50%"' text.append(w) @@ -252,8 +254,8 @@ class PMLMLizer(object): if href not in self.link_hrefs.keys(): self.link_hrefs[href] = 'calibre_link-%s' % len(self.link_hrefs.keys()) href = '#%s' % self.link_hrefs[href] - text.append('\\q="%s"' % href) - tags.append('q') + text.append('\\q="%s"' % href) + tags.append('q') # Anchor ids id_name = elem.get('id') diff --git a/src/calibre/gui2/actions/edit_metadata.py b/src/calibre/gui2/actions/edit_metadata.py index 8b57b4b455..559ea4a6f7 100644 --- a/src/calibre/gui2/actions/edit_metadata.py +++ b/src/calibre/gui2/actions/edit_metadata.py @@ -164,12 +164,15 @@ class EditMetadataAction(InterfaceAction): self.gui.tags_view.blockSignals(True) changed = False try: + current_tab = 0 while True: - dialog = MetadataBulkDialog(self.gui, rows, self.gui.library_view.model()) + dialog = MetadataBulkDialog(self.gui, rows, + self.gui.library_view.model(), current_tab) if dialog.changed: changed = True if not dialog.do_again: break + current_tab = dialog.central_widget.currentIndex() finally: self.gui.tags_view.blockSignals(False) if changed: diff --git a/src/calibre/gui2/actions/similar_books.py b/src/calibre/gui2/actions/similar_books.py index 644cd3160a..b1ee04a4d4 100644 --- a/src/calibre/gui2/actions/similar_books.py +++ b/src/calibre/gui2/actions/similar_books.py @@ -58,6 +58,7 @@ class SimilarBooksAction(InterfaceAction): for a in authors.split(',')] join = ' or ' if search: - self.gui.search.set_search_string(join.join(search)) + self.gui.search.set_search_string(join.join(search), + store_in_history=True) diff --git a/src/calibre/gui2/actions/view.py b/src/calibre/gui2/actions/view.py index 5f4f7ce428..0a26653771 100644 --- a/src/calibre/gui2/actions/view.py +++ b/src/calibre/gui2/actions/view.py @@ -12,7 +12,7 @@ from PyQt4.Qt import Qt, QMenu from calibre.constants import isosx from calibre.gui2 import error_dialog, Dispatcher, question_dialog, config, \ - open_local_file + open_local_file, info_dialog from calibre.gui2.dialogs.choose_format import ChooseFormatDialog from calibre.utils.config import prefs from calibre.ptempfile import PersistentTemporaryFile @@ -89,18 +89,34 @@ class ViewAction(InterfaceAction): self._launch_viewer(name, viewer, internal) def view_specific_format(self, triggered): - rows = self.gui.library_view.selectionModel().selectedRows() + rows = list(self.gui.library_view.selectionModel().selectedRows()) if not rows or len(rows) == 0: d = error_dialog(self.gui, _('Cannot view'), _('No book selected')) d.exec_() return - row = rows[0].row() - formats = self.gui.library_view.model().db.formats(row).upper().split(',') - d = ChooseFormatDialog(self.gui, _('Choose the format to view'), formats) + db = self.gui.library_view.model().db + rows = [r.row() for r in rows] + formats = [db.formats(row) for row in rows] + formats = [list(f.upper().split(',')) if f else None for f in formats] + all_fmts = set([]) + for x in formats: + for f in x: all_fmts.add(f) + d = ChooseFormatDialog(self.gui, _('Choose the format to view'), + list(sorted(all_fmts))) if d.exec_() == d.Accepted: - format = d.format() - self.view_format(row, format) + fmt = d.format() + orig_num = len(rows) + rows = [rows[i] for i in range(len(rows)) if formats[i] and fmt in + formats[i]] + if self._view_check(len(rows)): + for row in rows: + self.view_format(row, fmt) + if len(rows) < orig_num: + info_dialog(self.gui, _('Format unavailable'), + _('Not all the selected books were available in' + ' the %s format. You should convert' + ' them first.')%fmt, show=True) def _view_check(self, num, max_=3): if num <= max_: diff --git a/src/calibre/gui2/book_details.py b/src/calibre/gui2/book_details.py index 4ffc8da650..b101d4c44f 100644 --- a/src/calibre/gui2/book_details.py +++ b/src/calibre/gui2/book_details.py @@ -208,8 +208,9 @@ class BookInfo(QWebView): rows = u'\n'.join([u'<tr><td valign="top"><b>%s:</b></td><td valign="top">%s</td></tr>'%(k,t) for k, t in rows]) comments = data.get(_('Comments'), '') - if comments and comments != u'None': - self.renderer.queue.put((rows, comments)) + if not comments or comments == u'None': + comments = '' + self.renderer.queue.put((rows, comments)) self._show_data(rows, '') diff --git a/src/calibre/gui2/convert/fb2_output.py b/src/calibre/gui2/convert/fb2_output.py index a3cbe0e647..5d927146a5 100644 --- a/src/calibre/gui2/convert/fb2_output.py +++ b/src/calibre/gui2/convert/fb2_output.py @@ -17,6 +17,8 @@ class PluginWidget(Widget, Ui_Form): ICON = I('mimetypes/fb2.png') def __init__(self, parent, get_option, get_help, db=None, book_id=None): - Widget.__init__(self, parent, ['inline_toc', 'sectionize_chapters']) + Widget.__init__(self, parent, ['inline_toc', 'sectionize_chapters', + 'sectionize_chapters_using_file_structure', 'h1_to_title', + 'h2_to_title', 'h3_to_title']) self.db, self.book_id = db, book_id self.initialize_options(get_option, get_help, db, book_id) diff --git a/src/calibre/gui2/convert/fb2_output.ui b/src/calibre/gui2/convert/fb2_output.ui index a43a8b72ea..a90ecd615e 100644 --- a/src/calibre/gui2/convert/fb2_output.ui +++ b/src/calibre/gui2/convert/fb2_output.ui @@ -14,7 +14,7 @@ <string>Form</string> </property> <layout class="QGridLayout" name="gridLayout"> - <item row="2" column="0"> + <item row="6" column="0"> <spacer name="verticalSpacer"> <property name="orientation"> <enum>Qt::Vertical</enum> @@ -41,6 +41,34 @@ </property> </widget> </item> + <item row="2" column="0"> + <widget class="QCheckBox" name="opt_sectionize_chapters_using_file_structure"> + <property name="text"> + <string>Sectionize Chapters using file structure</string> + </property> + </widget> + </item> + <item row="3" column="0"> + <widget class="QCheckBox" name="opt_h1_to_title"> + <property name="text"> + <string>Wrap h1 tags with <title> elements</string> + </property> + </widget> + </item> + <item row="4" column="0"> + <widget class="QCheckBox" name="opt_h2_to_title"> + <property name="text"> + <string>Wrap h2 tags with <title> elements</string> + </property> + </widget> + </item> + <item row="5" column="0"> + <widget class="QCheckBox" name="opt_h3_to_title"> + <property name="text"> + <string>Wrap h3 tags with <title> elements</string> + </property> + </widget> + </item> </layout> </widget> <resources/> diff --git a/src/calibre/gui2/dialogs/metadata_bulk.py b/src/calibre/gui2/dialogs/metadata_bulk.py index 4fd34e4c4c..e30e0e16e1 100644 --- a/src/calibre/gui2/dialogs/metadata_bulk.py +++ b/src/calibre/gui2/dialogs/metadata_bulk.py @@ -197,7 +197,7 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog): _('Append to field'), ] - def __init__(self, window, rows, model): + def __init__(self, window, rows, model, tab): QDialog.__init__(self, window) Ui_MetadataBulkDialog.__init__(self) self.setupUi(self) @@ -238,6 +238,7 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog): 'Immediately make all changes without closing the dialog. ' 'This operation cannot be canceled or undone')) self.do_again = False + self.central_widget.setCurrentIndex(tab) self.exec_() def button_clicked(self, which): diff --git a/src/calibre/gui2/init.py b/src/calibre/gui2/init.py index 223efcf95b..27a6a2352a 100644 --- a/src/calibre/gui2/init.py +++ b/src/calibre/gui2/init.py @@ -86,6 +86,10 @@ class LibraryViewMixin(object): # {{{ if view is self.current_view(): self.search.search_done(ok) self.set_number_of_books_shown() + if ok: + v = self.current_view() + if hasattr(v, 'set_current_row'): + v.set_current_row(0) # }}} diff --git a/src/calibre/gui2/layout.py b/src/calibre/gui2/layout.py index fd42bfe671..aaaf1b0267 100644 --- a/src/calibre/gui2/layout.py +++ b/src/calibre/gui2/layout.py @@ -182,7 +182,7 @@ class SearchBar(QWidget): # {{{ l.addWidget(self.search_button) self.search_button.setSizePolicy(QSizePolicy.Minimum, QSizePolicy.Minimum) - self.search_button.clicked.connect(parent.search.do_search) + self.search_button.clicked.connect(parent.do_search_button) self.search_button.setToolTip( _('Do Quick Search (you can also press the Enter key)')) diff --git a/src/calibre/gui2/search_box.py b/src/calibre/gui2/search_box.py index 4d598a3bbb..1cdf622537 100644 --- a/src/calibre/gui2/search_box.py +++ b/src/calibre/gui2/search_box.py @@ -9,7 +9,8 @@ __docformat__ = 'restructuredtext en' import re from PyQt4.Qt import QComboBox, Qt, QLineEdit, QStringList, pyqtSlot, QDialog, \ - pyqtSignal, QCompleter, QAction, QKeySequence, QTimer + pyqtSignal, QCompleter, QAction, QKeySequence, QTimer, \ + QString from calibre.gui2 import config from calibre.gui2.dialogs.confirm_delete import confirm @@ -17,21 +18,13 @@ from calibre.gui2.dialogs.saved_search_editor import SavedSearchEditor from calibre.gui2.dialogs.search import SearchDialog from calibre.utils.search_query_parser import saved_searches -class SearchLineEdit(QLineEdit): +class SearchLineEdit(QLineEdit): # {{{ key_pressed = pyqtSignal(object) def keyPressEvent(self, event): self.key_pressed.emit(event) QLineEdit.keyPressEvent(self, event) - def mouseReleaseEvent(self, event): - QLineEdit.mouseReleaseEvent(self, event) - QLineEdit.selectAll(self) - - def focusInEvent(self, event): - QLineEdit.focusInEvent(self, event) - QLineEdit.selectAll(self) - def dropEvent(self, ev): self.parent().normalize_state() return QLineEdit.dropEvent(self, ev) @@ -44,17 +37,23 @@ class SearchLineEdit(QLineEdit): def paste(self, *args): self.parent().normalize_state() return QLineEdit.paste(self) +# }}} -class SearchBox2(QComboBox): +class SearchBox2(QComboBox): # {{{ ''' To use this class: * Call initialize() * Connect to the search() and cleared() signals from this widget. - * Connect to the cleared() signal to know when the box content changes - * Connect to focus_to_library signal to be told to manually change focus + * Connect to the changed() signal to know when the box content changes + * Connect to focus_to_library() signal to be told to manually change focus * Call search_done() after every search is complete + * Call set_search_string() to perform a search programmatically + * You can use the current_text property to get the current search text + Be aware that if you are using it in a slot connected to the + changed() signal, if the connection is not queued it will not be + accurate. ''' INTERVAL = 1500 #: Time to wait before emitting search signal @@ -70,8 +69,12 @@ class SearchBox2(QComboBox): self.normal_background = 'rgb(255, 255, 255, 0%)' self.line_edit = SearchLineEdit(self) self.setLineEdit(self.line_edit) + c = self.line_edit.completer() c.setCompletionMode(c.PopupCompletion) + c.highlighted[QString].connect(self.completer_used) + c.activated[QString].connect(self.history_selected) + self.line_edit.key_pressed.connect(self.key_pressed, type=Qt.DirectConnection) self.activated.connect(self.history_selected) self.setEditable(True) @@ -89,7 +92,11 @@ class SearchBox2(QComboBox): def initialize(self, opt_name, colorize=False, help_text=_('Search')): self.as_you_type = config['search_as_you_type'] self.opt_name = opt_name - self.addItems(QStringList(list(set(config[opt_name])))) + items = [] + for item in config[opt_name]: + if item not in items: + items.append(item) + self.addItems(QStringList(items)) try: self.line_edit.setPlaceholderText(help_text) except: @@ -130,6 +137,7 @@ class SearchBox2(QComboBox): col = self.normal_background self.line_edit.setStyleSheet('QLineEdit{color:black;background-color:%s;}' % col) + # Comes from the lineEdit control def key_pressed(self, event): k = event.key() if k in (Qt.Key_Left, Qt.Key_Right, Qt.Key_Up, Qt.Key_Down, @@ -146,6 +154,21 @@ class SearchBox2(QComboBox): elif self.as_you_type and unicode(event.text()): self.timer.start(1500) + # Comes from the combobox itself + def keyPressEvent(self, event): + k = event.key() + if k not in (Qt.Key_Up, Qt.Key_Down): + QComboBox.keyPressEvent(self, event) + else: + self.blockSignals(True) + self.normalize_state() + QComboBox.keyPressEvent(self, event) + self.blockSignals(False) + + def completer_used(self, text): + self.timer.stop() + self.normalize_state() + def timer_event(self): self.do_search() @@ -153,48 +176,45 @@ class SearchBox2(QComboBox): self.changed.emit() self.do_search() - def do_search(self, *args): + def _do_search(self, store_in_history=True): text = unicode(self.currentText()).strip() if not text: return self.clear() self.search.emit(text) - idx = self.findText(text, Qt.MatchFixedString) - self.block_signals(True) - if idx < 0: - self.insertItem(0, text) - else: - t = self.itemText(idx) - self.removeItem(idx) - self.insertItem(0, t) + if store_in_history: + idx = self.findText(text, Qt.MatchFixedString) + self.block_signals(True) + if idx < 0: + self.insertItem(0, text) + else: + t = self.itemText(idx) + self.removeItem(idx) + self.insertItem(0, t) self.setCurrentIndex(0) - self.block_signals(False) - config[self.opt_name] = [unicode(self.itemText(i)) for i in - range(self.count())] + self.block_signals(False) + history = [unicode(self.itemText(i)) for i in + range(self.count())] + config[self.opt_name] = history + + def do_search(self, *args): + self._do_search() def block_signals(self, yes): self.blockSignals(yes) self.line_edit.blockSignals(yes) - def search_from_tokens(self, tokens, all): - ans = u' '.join([u'%s:%s'%x for x in tokens]) - if not all: - ans = '[' + ans + ']' - self.set_search_string(ans) - - def search_from_tags(self, tags, all): - joiner = ' and ' if all else ' or ' - self.set_search_string(joiner.join(tags)) - - def set_search_string(self, txt): + def set_search_string(self, txt, store_in_history=False): + self.setFocus(Qt.OtherFocusReason) if not txt: self.clear() - return - self.normalize_state() - self.setEditText(txt) - self.search.emit(txt) - self.line_edit.end(False) - self.initial_state = False + else: + self.normalize_state() + self.setEditText(txt) + self.line_edit.end(False) + self.changed.emit() + self._do_search(store_in_history=store_in_history) + self.focus_to_library.emit() def search_as_you_type(self, enabled): self.as_you_type = enabled @@ -202,7 +222,13 @@ class SearchBox2(QComboBox): def in_a_search(self): return self._in_a_search -class SavedSearchBox(QComboBox): + @property + def current_text(self): + return unicode(self.lineEdit().text()) + + # }}} + +class SavedSearchBox(QComboBox): # {{{ ''' To use this class: @@ -212,7 +238,6 @@ class SavedSearchBox(QComboBox): ''' changed = pyqtSignal() - focus_to_library = pyqtSignal() def __init__(self, parent=None): QComboBox.__init__(self, parent) @@ -236,7 +261,11 @@ class SavedSearchBox(QComboBox): def initialize(self, _search_box, colorize=False, help_text=_('Search')): self.search_box = _search_box - self.line_edit.setPlaceholderText(help_text) + try: + self.line_edit.setPlaceholderText(help_text) + except: + # Using Qt < 4.7 + pass self.colorize = colorize self.clear() @@ -253,7 +282,6 @@ class SavedSearchBox(QComboBox): def key_pressed(self, event): if event.key() in (Qt.Key_Return, Qt.Key_Enter): self.saved_search_selected(self.currentText()) - self.focus_to_library.emit() def saved_search_selected(self, qname): qname = unicode(qname) @@ -267,7 +295,6 @@ class SavedSearchBox(QComboBox): self.search_box.set_search_string(u'search:"%s"' % qname) self.setEditText(qname) self.setToolTip(saved_searches().lookup(qname)) - self.focus_to_library.emit() def initialize_saved_search_names(self): qnames = saved_searches().names() @@ -313,13 +340,17 @@ class SavedSearchBox(QComboBox): return self.search_box.set_search_string(saved_searches().lookup(unicode(self.currentText()))) -class SearchBoxMixin(object): + # }}} + +class SearchBoxMixin(object): # {{{ def __init__(self): self.search.initialize('main_search_history', colorize=True, help_text=_('Search (For Advanced Search click the button to the left)')) self.search.cleared.connect(self.search_box_cleared) - self.search.changed.connect(self.search_box_changed) + # Queued so that search.current_text will be correct + self.search.changed.connect(self.search_box_changed, + type=Qt.QueuedConnection) self.search.focus_to_library.connect(self.focus_to_library) self.clear_button.clicked.connect(self.search.clear_clicked) self.advanced_search_button.clicked[bool].connect(self.do_advanced_search) @@ -330,14 +361,17 @@ class SearchBoxMixin(object): shortcuts = QKeySequence.keyBindings(QKeySequence.Find) shortcuts = list(shortcuts) + [QKeySequence('/'), QKeySequence('Alt+S')] self.action_focus_search.setShortcuts(shortcuts) - self.action_focus_search.triggered.connect(lambda x: - self.search.setFocus(Qt.OtherFocusReason)) + self.action_focus_search.triggered.connect(self.focus_search_box) self.addAction(self.action_focus_search) self.search.setStatusTip(re.sub(r'<\w+>', ' ', unicode(self.search.toolTip()))) self.advanced_search_button.setStatusTip(self.advanced_search_button.toolTip()) self.clear_button.setStatusTip(self.clear_button.toolTip()) + def focus_search_box(self, *args): + self.search.setFocus(Qt.OtherFocusReason) + self.search.lineEdit().selectAll() + def search_box_cleared(self): self.tags_view.clear() self.saved_search.clear() @@ -345,22 +379,27 @@ class SearchBoxMixin(object): def search_box_changed(self): self.saved_search.clear() - self.tags_view.clear() + self.tags_view.conditional_clear(self.search.current_text) def do_advanced_search(self, *args): d = SearchDialog(self, self.library_view.model().db) if d.exec_() == QDialog.Accepted: self.search.set_search_string(d.search_string()) + def do_search_button(self): + self.search.do_search() + self.focus_to_library() + def focus_to_library(self): self.current_view().setFocus(Qt.OtherFocusReason) -class SavedSearchBoxMixin(object): + # }}} + +class SavedSearchBoxMixin(object): # {{{ def __init__(self): self.saved_search.changed.connect(self.saved_searches_changed) self.clear_button.clicked.connect(self.saved_search.clear) - self.saved_search.focus_to_library.connect(self.focus_to_library) self.save_search_button.clicked.connect( self.saved_search.save_search_button_clicked) self.delete_search_button.clicked.connect( @@ -396,6 +435,5 @@ class SavedSearchBoxMixin(object): self.saved_searches_changed() self.saved_search.clear() - def focus_to_library(self): - self.current_view().setFocus(Qt.OtherFocusReason) + # }}} diff --git a/src/calibre/gui2/search_restriction_mixin.py b/src/calibre/gui2/search_restriction_mixin.py index 6373e452e5..74132ae610 100644 --- a/src/calibre/gui2/search_restriction_mixin.py +++ b/src/calibre/gui2/search_restriction_mixin.py @@ -4,6 +4,8 @@ Created on 10 Jun 2010 @author: charles ''' +from PyQt4.Qt import Qt + class SearchRestrictionMixin(object): def __init__(self): @@ -53,6 +55,7 @@ class SearchRestrictionMixin(object): self.saved_search.clear() self.tags_view.set_search_restriction(restriction) self.set_number_of_books_shown() + self.current_view().setFocus(Qt.OtherFocusReason) def set_number_of_books_shown(self): if self.current_view() == self.library_view and self.restriction_in_effect: diff --git a/src/calibre/gui2/tag_view.py b/src/calibre/gui2/tag_view.py index b841706439..972a1eeba3 100644 --- a/src/calibre/gui2/tag_view.py +++ b/src/calibre/gui2/tag_view.py @@ -60,7 +60,7 @@ class TagDelegate(QItemDelegate): # {{{ class TagsView(QTreeView): # {{{ refresh_required = pyqtSignal() - tags_marked = pyqtSignal(object, object) + tags_marked = pyqtSignal(object) user_category_edit = pyqtSignal(object) tag_list_edit = pyqtSignal(object, object) saved_search_edit = pyqtSignal(object) @@ -135,11 +135,21 @@ class TagsView(QTreeView): # {{{ # swallow these to avoid toggling and editing at the same time pass + @property + def search_string(self): + tokens = self._model.tokens() + joiner = ' and ' if self.match_all else ' or ' + return joiner.join(tokens) + def toggle(self, index): modifiers = int(QApplication.keyboardModifiers()) exclusive = modifiers not in (Qt.CTRL, Qt.SHIFT) if self._model.toggle(index, exclusive): - self.tags_marked.emit(self._model.tokens(), self.match_all) + self.tags_marked.emit(self.search_string) + + def conditional_clear(self, search_string): + if search_string != self.search_string: + self.clear() def context_menu_handler(self, action=None, category=None, key=None, index=None): @@ -842,8 +852,7 @@ class TagBrowserMixin(object): # {{{ self.library_view.model().count_changed_signal.connect(self.tags_view.recount) self.tags_view.set_database(self.library_view.model().db, self.tag_match, self.sort_by) - self.tags_view.tags_marked.connect(self.search.search_from_tags) - self.tags_view.tags_marked.connect(self.saved_search.clear) + self.tags_view.tags_marked.connect(self.search.set_search_string) self.tags_view.tag_list_edit.connect(self.do_tags_list_edit) self.tags_view.user_category_edit.connect(self.do_user_categories_edit) self.tags_view.saved_search_edit.connect(self.do_saved_search_edit) diff --git a/src/calibre/gui2/viewer/main.py b/src/calibre/gui2/viewer/main.py index 70fa99b4b6..1ea1aca733 100644 --- a/src/calibre/gui2/viewer/main.py +++ b/src/calibre/gui2/viewer/main.py @@ -17,7 +17,7 @@ from calibre.gui2.viewer.bookmarkmanager import BookmarkManager from calibre.gui2.widgets import ProgressIndicator from calibre.gui2.main_window import MainWindow from calibre.gui2 import Application, ORG_NAME, APP_UID, choose_files, \ - info_dialog, error_dialog, open_url + info_dialog, error_dialog, open_url, available_height from calibre.ebooks.oeb.iterator import EbookIterator from calibre.ebooks import DRMError from calibre.constants import islinux, isfreebsd, isosx @@ -253,6 +253,7 @@ class EbookViewer(MainWindow, Ui_EbookViewer): self.connect(self.vertical_scrollbar, SIGNAL('valueChanged(int)'), lambda x: self.goto_page(x/100.)) self.search.search.connect(self.find) + self.search.focus_to_library.connect(lambda: self.view.setFocus(Qt.OtherFocusReason)) self.connect(self.toc, SIGNAL('clicked(QModelIndex)'), self.toc_clicked) self.connect(self.reference, SIGNAL('goto(PyQt_PyObject)'), self.goto) @@ -693,6 +694,9 @@ class EbookViewer(MainWindow, Ui_EbookViewer): if ss is not None: self.splitter.restoreState(ss) self.show_toc_on_open = dynamic.get('viewer_toc_isvisible', False) + av = available_height() - 30 + if self.height() > av: + self.resize(self.width(), av) def config(defaults=None): desc = _('Options to control the ebook viewer') diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py index b84ec99bed..eed258a6b0 100644 --- a/src/calibre/library/catalog.py +++ b/src/calibre/library/catalog.py @@ -1402,7 +1402,6 @@ class EPUB_MOBI(CatalogPlugin): if record['cover']: this_title['cover'] = re.sub('&', '&', record['cover']) - # This may be updated in self.processSpecialTags() this_title['read'] = self.discoverReadStatus(record) if record['tags']: @@ -2676,14 +2675,7 @@ class EPUB_MOBI(CatalogPlugin): pBookTag = Tag(soup, "p") ptc = 0 - # THIS SHOULDN'T BE NECESSARY - # book with read/reading/unread symbol -# for tag in book['tags']: -# if tag == self.opts.read_tag: -# book['read'] = True -# break -# else: -# book['read'] = False + book['read'] = self.discoverReadStatus(book) # book with read|reading|unread symbol or wishlist item if self.opts.wishlist_tag in book.get('tags', []): @@ -4057,7 +4049,6 @@ class EPUB_MOBI(CatalogPlugin): return False - def filterDbTags(self, tags): # Remove the special marker tags from the database's tag list, # return sorted list of normalized genre tags @@ -4550,7 +4541,6 @@ class EPUB_MOBI(CatalogPlugin): markerTags = [] markerTags.extend(self.opts.exclude_tags.split(',')) markerTags.extend(self.opts.note_tag.split(',')) - # Process read_book_marker if field is tag return markerTags def letter_or_symbol(self,char):