Merge from trunk

2025-12-11 07:35:14 -05:00 · 2010-12-01 23:26:29 +01:00 · 2010-12-01 23:26:29 +01:00 · 697df6950f
commit 697df6950f
parent 68b6ef55bd 0c7df44df5
30 changed files with 732 additions and 185 deletions
--- a/resources/recipes/abc_au.recipe
+++ b/resources/recipes/abc_au.recipe
@ -0,0 +1,54 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Dean Cording'
+'''
+abc.net.au/news
+'''
+import re
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class ABCNews(BasicNewsRecipe):
+    title                  = 'ABC News'
+    __author__             = 'Dean Cording'
+    description            = 'News from Australia'
+    masthead_url           = 'http://www.abc.net.au/news/assets/v5/images/common/logo-news.png'
+    cover_url              = 'http://www.abc.net.au/news/assets/v5/images/common/logo-news.png'
+
+    oldest_article         = 2
+    max_articles_per_feed  = 100
+    no_stylesheets         = False
+    #delay                  = 1
+    use_embedded_content   = False
+    encoding               = 'utf8'
+    publisher              = 'ABC News'
+    category               = 'News, Australia, World'
+    language               = 'en_AU'
+    publication_type       = 'newsportal'
+    preprocess_regexps     = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
+    conversion_options = {
+                             'comments'        : description
+                            ,'tags'            : category
+                            ,'language'        : language
+                            ,'publisher'       : publisher
+                            ,'linearize_tables': False
+                         }
+
+    keep_only_tags    =  dict(id='article')
+
+    remove_tags = [dict(attrs={'class':['related', 'tags']}),
+                     dict(id='statepromo')
+                        ]
+
+    remove_attributes = ['width','height']
+
+    feeds          = [
+                      ('Top Stories', 'http://www.abc.net.au/news/syndicate/topstoriesrss.xml'),
+                      ('Canberra', 'http://www.abc.net.au/news/indexes/idx-act/rss.xml'),
+                      ('Sydney', 'http://www.abc.net.au/news/indexes/sydney/rss.xml'),
+                      ('Melbourne', 'http://www.abc.net.au/news/indexes/melbourne/rss.xml'),
+                      ('Brisbane', 'http://www.abc.net.au/news/indexes/brisbane/rss.xml'),
+                      ('Perth', 'http://www.abc.net.au/news/indexes/perth/rss.xml'),
+                      ('Australia', 'http://www.abc.net.au/news/indexes/idx-australia/rss.xml'),
+                      ('World', 'http://www.abc.net.au/news/indexes/world/rss.xml'),
+                      ('Business', 'http://www.abc.net.au/news/indexes/business/rss.xml'),
+                      ('Science and Technology', 'http://www.abc.net.au/news/tag/science-and-technology/rss.xml'),
+                    ]
--- a/resources/recipes/business_spectator.recipe
+++ b/resources/recipes/business_spectator.recipe
@ -0,0 +1,48 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Dean Cording'
+'''
+abc.net.au/news
+'''
+import re
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class BusinessSpectator(BasicNewsRecipe):
+    title                  = 'Business Spectator'
+    __author__             = 'Dean Cording'
+    description            = 'Australian Business News & commentary delivered the way you want it.'
+    masthead_url           = 'http://www.businessspectator.com.au/bs.nsf/logo-business-spectator.gif'
+    cover_url              = masthead_url
+
+    oldest_article         = 2
+    max_articles_per_feed  = 100
+    no_stylesheets         = True
+    #delay                  = 1
+    use_embedded_content   = False
+    encoding               = 'utf8'
+    publisher              = 'Business Spectator'
+    category               = 'News, Australia, Business'
+    language               = 'en_AU'
+    publication_type       = 'newsportal'
+    preprocess_regexps     = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
+    conversion_options = {
+                             'comments'        : description
+                            ,'tags'            : category
+                            ,'language'        : language
+                            ,'publisher'       : publisher
+                            ,'linearize_tables': False
+                         }
+
+    keep_only_tags    =  [dict(id='storyHeader'), dict(id='body-html')]
+
+    remove_tags = [dict(attrs={'class':'hql'})]
+
+    remove_attributes = ['width','height','style']
+
+    feeds          = [
+                      ('Top Stories', 'http://www.businessspectator.com.au/top-stories.rss'),
+                      ('Alan Kohler', 'http://www.businessspectator.com.au/bs.nsf/RSS?readform&type=spectators&cat=Alan%20Kohler'),
+                      ('Robert Gottliebsen', 'http://www.businessspectator.com.au/bs.nsf/RSS?readform&type=spectators&cat=Robert%20Gottliebsen'),
+                      ('Stephen Bartholomeusz', 'http://www.businessspectator.com.au/bs.nsf/RSS?readform&type=spectators&cat=Stephen%20Bartholomeusz'),
+                      ('Daily Dossier', 'http://www.businessspectator.com.au/bs.nsf/RSS?readform&type=kgb&cat=dossier'),
+                      ('Australia', 'http://www.businessspectator.com.au/bs.nsf/RSS?readform&type=region&cat=australia'),
+                    ]
--- a/resources/recipes/esenja.recipe
+++ b/resources/recipes/esenja.recipe
@ -0,0 +1,87 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, matek09, matek09@gmail.com'
+
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
+class Esensja(BasicNewsRecipe):
+
+	title = u'Esensja'
+	__author__ = 'matek09'
+	description = 'Monthly magazine'
+	encoding = 'utf-8'
+	no_stylesheets = True
+	language = 'pl'
+	remove_javascript = True
+	HREF = '0'
+
+	#keep_only_tags =[]
+	#keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'article'})
+	remove_tags_before = dict(dict(name = 'div', attrs = {'class' : 't-title'}))
+	remove_tags_after = dict(dict(name = 'img', attrs = {'src' : '../../../2000/01/img/tab_bot.gif'}))
+
+	remove_tags =[]
+	remove_tags.append(dict(name = 'img', attrs = {'src' : '../../../2000/01/img/tab_top.gif'}))
+	remove_tags.append(dict(name = 'img', attrs = {'src' : '../../../2000/01/img/tab_bot.gif'}))
+	remove_tags.append(dict(name = 'div', attrs = {'class' : 't-title2 nextpage'}))
+
+	extra_css = '''
+					.t-title {font-size: x-large; font-weight: bold; text-align: left}
+					.t-author {font-size: x-small; text-align: left}
+					.t-title2 {font-size: x-small; font-style: italic; text-align: left}
+					.text {font-size: small; text-align: left}
+					.annot-ref {font-style: italic; text-align: left}
+				'''
+
+	preprocess_regexps = [(re.compile(r'alt="[^"]*"'),
+						lambda match: '')]
+
+	def parse_index(self):
+		soup = self.index_to_soup('http://www.esensja.pl/magazyn/')
+		a = soup.find('a', attrs={'href' : re.compile('.*/index.html')})
+		year = a['href'].split('/')[0]
+		month = a['href'].split('/')[1]
+		self.HREF = 'http://www.esensja.pl/magazyn/' + year + '/' + month + '/iso/'
+		soup = self.index_to_soup(self.HREF + '01.html')
+		self.cover_url = 'http://www.esensja.pl/magazyn/' + year + '/' + month + '/img/ilustr/cover_b.jpg'
+		feeds = []
+		intro = soup.find('div', attrs={'class' : 'n-title'})
+		introduction = {'title' : self.tag_to_string(intro.a),
+						'url' : self.HREF + intro.a['href'],
+						'date' : '',
+						'description' : ''}
+		chapter = 'Wprowadzenie'
+		subchapter = ''
+		articles = []
+		articles.append(introduction)
+		for tag in intro.findAllNext(attrs={'class': ['chapter', 'subchapter', 'n-title']}):
+			if tag.name in 'td':
+				if len(articles) > 0:
+					section = chapter
+					if len(subchapter) > 0:
+						section += ' - ' + subchapter
+					feeds.append((section, articles))
+					articles = []
+				if tag['class'] == 'chapter':
+					chapter = self.tag_to_string(tag).capitalize()
+					subchapter = ''
+				else:
+					subchapter = self.tag_to_string(tag)
+					subchapter = self.tag_to_string(tag)
+				continue
+			articles.append({'title' : self.tag_to_string(tag.a), 'url' : self.HREF + tag.a['href'], 'date' : '', 'description' : ''})
+
+			a = self.index_to_soup(self.HREF + tag.a['href'])
+			i = 1
+			while True:
+				div = a.find('div', attrs={'class' : 't-title2 nextpage'})
+				if div is not None:
+					a = self.index_to_soup(self.HREF + div.a['href'])
+					articles.append({'title' : self.tag_to_string(tag.a) + ' c. d. ' + str(i), 'url' : self.HREF + div.a['href'], 'date' : '', 'description' : ''})
+					i = i + 1
+				else:
+					break
+
+		return feeds
--- a/resources/recipes/fr_online.recipe
+++ b/resources/recipes/fr_online.recipe
@ -1,67 +1,61 @@
-__license__   = 'GPL v3'
-__copyright__ = '2009, Justus Bisser <justus.bisser at gmail.com>'
+#!/usr/bin/env  python
+
+__license__            = 'GPL v3'
+__copyright__          = '2010, Christian Schmitt'
+
 '''
 fr-online.de
 '''
-import re

-from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.web.feeds.recipes import BasicNewsRecipe

-class Spiegel_ger(BasicNewsRecipe):
-    title                 = 'Frankfurter Rundschau'
-    __author__            = 'Justus Bisser'
-    description           = "Dies ist die Online-Ausgabe der Frankfurter Rundschau. Um die abgerufenen individuell einzustellen bearbeiten sie die Liste im erweiterten Modus. Die Feeds findet man auf http://www.fr-online.de/verlagsservice/fr_newsreader/?em_cnt=574255"
-    publisher             = 'Druck- und Verlagshaus Frankfurt am Main GmbH'
-    category              = 'FR Online, Frankfurter Rundschau, Nachrichten, News,Dienste, RSS, RSS, Feedreader, Newsfeed, iGoogle, Netvibes, Widget'
-    oldest_article        = 7
-    max_articles_per_feed = 100
-    language              = 'de'
-    lang                  = 'de-DE'
-    no_stylesheets        = True
-    use_embedded_content  = False
-    #encoding              = 'cp1252'
+class FROnlineRecipe(BasicNewsRecipe):
+  title                  = 'Frankfurter Rundschau'
+  __author__             = 'maccs'
+  description            = 'Nachrichten aus D und aller Welt'
+  encoding               = 'utf-8'
+  masthead_url =  'http://www.fr-online.de/image/view/-/1474018/data/823552/-/logo.png'
+  publisher              = 'Druck- und Verlagshaus Frankfurt am Main GmbH'
+  category               = 'news, germany, world'
+  language               = 'de'
+  publication_type       = 'newspaper'
+  use_embedded_content   = False
+  remove_javascript      = True
+  no_stylesheets         = True
+  oldest_article         = 1   # Increase this number if you're interested in older articles
+  max_articles_per_feed  = 50  # Seems a reasonable number to me
+  extra_css              = '''
+                            body { font-family: "arial", "verdana", "geneva", sans-serif; font-size: 12px; margin: 0px; background-color: #ffffff;}
+                            .imgSubline{background-color: #f4f4f4; font-size: 0.8em;}
+                            .p--heading-1 {font-weight: bold;}
+                            .calibre_navbar {font-size: 0.8em; font-family: "arial", "verdana", "geneva", sans-serif;}
+                            '''
+  remove_tags            = [dict(name='div', attrs={'id':'Logo'})]
+  cover_url              = 'http://www.fr-online.de/image/view/-/1474018/data/823552/-/logo.png'
+  cover_margins          = (100, 150, '#ffffff')

-    conversion_options = {
-                          'comment'          : description
-                        , 'tags'             : category
-                        , 'publisher'        : publisher
-                        , 'language'         : lang
-                        }

-    recursions = 0
-    max_articles_per_feed = 100
-    #keep_only_tags = [dict(name='div', attrs={'class':'text'})]
-    #tags_remove = [dict(name='div', attrs={'style':'text-align: left; margin: 4px 0px 0px 4px; width: 200px; float: right;'})]
-    remove_attributes = ['style']
-    feeds = []
-    #remove_tags_before = [dict(name='div', attrs={'style':'padding-left: 0px;'})]
-    #remove_tags_after = [dict(name='div', attrs={'class':'box_head_text'})]
+  feeds = []
+  feeds.append(('Startseite', u'http://www.fr-online.de/home/-/1472778/1472778/-/view/asFeed/-/index.xml'))
+  feeds.append(('Politik', u'http://www.fr-online.de/politik/-/1472596/1472596/-/view/asFeed/-/index.xml'))
+  feeds.append(('Meinung', u'http://www.fr-online.de/politik/meinung/-/1472602/1472602/-/view/asFeed/-/index.xml'))
+  feeds.append(('Wirtschaft', u'http://www.fr-online.de/wirtschaft/-/1472780/1472780/-/view/asFeed/-/index.xml'))
+  feeds.append(('Sport', u'http://www.fr-online.de/sport/-/1472784/1472784/-/view/asFeed/-/index.xml'))
+  feeds.append(('Eintracht Frankfurt', u'http://www.fr-online.de/sport/eintracht-frankfurt/-/1473446/1473446/-/view/asFeed/-/index.xml'))
+  feeds.append(('Kultur und Medien', u'http://www.fr-online.de/kultur/-/1472786/1472786/-/view/asFeed/-/index.xml'))
+  feeds.append(('Panorama', u'http://www.fr-online.de/panorama/-/1472782/1472782/-/view/asFeed/-/index.xml'))
+  feeds.append(('Frankfurt', u'http://www.fr-online.de/frankfurt/-/1472798/1472798/-/view/asFeed/-/index.xml'))
+  feeds.append(('Rhein-Main', u'http://www.fr-online.de/rhein-main/-/1472796/1472796/-/view/asFeed/-/index.xml'))
+  feeds.append(('Hanau', u'http://www.fr-online.de/rhein-main/hanau/-/1472866/1472866/-/view/asFeed/-/index.xml'))
+  feeds.append(('Darmstadt', u'http://www.fr-online.de/rhein-main/darmstadt/-/1472858/1472858/-/view/asFeed/-/index.xml'))
+  feeds.append(('Wiesbaden', u'http://www.fr-online.de/rhein-main/wiesbaden/-/1472860/1472860/-/view/asFeed/-/index.xml'))
+  feeds.append(('Offenbach', u'http://www.fr-online.de/rhein-main/offenbach/-/1472856/1472856/-/view/asFeed/-/index.xml'))
+  feeds.append(('Bad Homburg', u'http://www.fr-online.de/rhein-main/bad-homburg/-/1472864/1472864/-/view/asFeed/-/index.xml'))
+  feeds.append(('Digital', u'http://www.fr-online.de/digital/-/1472406/1472406/-/view/asFeed/-/index.xml'))
+  feeds.append(('Wissenschaft', u'http://www.fr-online.de/wissenschaft/-/1472788/1472788/-/view/asFeed/-/index.xml'))

-    # enable for all news
-    allNews = 0
-    if allNews:
-        feeds = [(u'Frankfurter Rundschau', u'http://www.fr-online.de/rss/sport/index.xml')]
-    else:
-        #select the feeds you like
-        feeds = [(u'Nachrichten', u'http://www.fr-online.de/rss/politik/index.xml')]
-        feeds.append((u'Kommentare und Analysen', u'http://www.fr-online.de/rss/meinung/index.xml'))
-        feeds.append((u'Dokumentationen', u'http://www.fr-online.de/rss/dokumentation/index.xml'))
-        feeds.append((u'Deutschlandtrend', u'http://www.fr-online.de/rss/deutschlandtrend/index.xml'))
-        feeds.append((u'Wirtschaft', u'http://www.fr-online.de/rss/wirtschaft/index.xml'))
-        feeds.append((u'Sport', u'http://www.fr-online.de/rss/sport/index.xml'))
-        feeds.append((u'Feuilleton', u'http://www.fr-online.de/rss/feuilleton/index.xml'))
-        feeds.append((u'Panorama', u'http://www.fr-online.de/rss/panorama/index.xml'))
-        feeds.append((u'Rhein Main und Hessen', u'http://www.fr-online.de/rss/hessen/index.xml'))
-        feeds.append((u'Fitness und Gesundheit', u'http://www.fr-online.de/rss/fit/index.xml'))
-        feeds.append((u'Multimedia', u'http://www.fr-online.de/rss/multimedia/index.xml'))
-        feeds.append((u'Wissen und Bildung', u'http://www.fr-online.de/rss/wissen/index.xml'))

-    def get_article_url(self, article):
-        url = article.link
-        regex = re.compile("0C[0-9]{6,8}0A?")
+  def print_version(self, url):
+    return url.replace('index.html', 'view/printVersion/-/index.html')

-        liste = regex.findall(url)
-        string = liste.pop(0)
-        string = string[2:len(string)-1]
-        return "http://www.fr-online.de/_em_cms/_globals/print.php?em_cnt=" + string

--- a/resources/recipes/histmag.recipe
+++ b/resources/recipes/histmag.recipe
@ -0,0 +1,59 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, matek09, matek09@gmail.com'
+
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
+class Histmag(BasicNewsRecipe):
+
+	title = u'Histmag'
+	__author__ = 'matek09'
+	description = u"Artykuly historyczne i publicystyczne"
+	encoding = 'utf-8'
+	no_stylesheets = True
+	language = 'pl'
+	remove_javascript = True
+	#max_articles_per_feed = 1
+	remove_tags_before = dict(dict(name = 'div', attrs = {'id' : 'article'}))
+	remove_tags_after = dict(dict(name = 'h2', attrs = {'class' : 'komentarze'}))
+	#keep_only_tags =[]
+	#keep_only_tags.append(dict(name = 'h2'))
+	#keep_only_tags.append(dict(name = 'p'))
+
+	remove_tags =[]
+	remove_tags.append(dict(name = 'p', attrs = {'class' : 'podpis'}))
+	remove_tags.append(dict(name = 'h2', attrs = {'class' : 'komentarze'}))
+	remove_tags.append(dict(name = 'img', attrs = {'src' : 'style/buttons/wesprzyjnas-1.jpg'}))
+
+	preprocess_regexps = [(re.compile(r'</span>'), lambda match: '</span><br><br>'),
+						(re.compile(r'<span>'), lambda match: '<br><br><span>')]
+	extra_css = '''
+					.left {font-size: x-small}
+					.right {font-size: x-small}
+				'''
+
+	def find_articles(self, soup):
+		articles = []
+		for div in soup.findAll('div', attrs={'class' : 'text'}):
+			articles.append({
+				'title' : self.tag_to_string(div.h3.a),
+				'url'   : 'http://www.histmag.org/' + div.h3.a['href'],
+				'date'  : self.tag_to_string(div.next('p')).split('|')[0],
+				'description' : self.tag_to_string(div.next('p', podpis=False)),
+				})
+		return articles
+
+	def parse_index(self):
+		soup = self.index_to_soup('http://histmag.org/?arc=4&dx=0')
+		feeds = []
+		feeds.append((u"Artykuly historyczne", self.find_articles(soup)))
+		soup = self.index_to_soup('http://histmag.org/?arc=5&dx=0')
+		feeds.append((u"Artykuly publicystyczne", self.find_articles(soup)))
+		soup = self.index_to_soup('http://histmag.org/?arc=1&dx=0')
+		feeds.append((u"Wydarzenia", self.find_articles(soup)))
+
+		return feeds
+
+
--- a/resources/recipes/newsweek_polska.recipe
+++ b/resources/recipes/newsweek_polska.recipe
@ -1,19 +1,22 @@
 #!/usr/bin/env  python

 __license__   = 'GPL v3'
-__copyright__ = '2010, Mateusz Kielar, matek09@gmail.com'
+__copyright__ = '2010, matek09, matek09@gmail.com'

 from calibre.web.feeds.news import BasicNewsRecipe

 class Newsweek(BasicNewsRecipe):
-	EDITION = 0
+	FIND_LAST_FULL_ISSUE = True
+	EDITION = '0'
+	EXCLUDE_LOCKED = True
+	LOCKED_ICO = 'http://www.newsweek.pl/bins/media/static/newsweek/img/ico_locked.gif'

 	title = u'Newsweek Polska'
-	__author__ = 'Mateusz Kielar'
+	__author__ = 'matek09'
 	description = 'Weekly magazine'
 	encoding = 'utf-8'
 	no_stylesheets = True
-	language = 'en'
+	language = 'pl'
 	remove_javascript = True

 	keep_only_tags =[]
@ -33,34 +36,54 @@ class Newsweek(BasicNewsRecipe):
 	def print_version(self, url):
 		return url.replace("http://www.newsweek.pl/artykuly/wydanie/" + str(self.EDITION), "http://www.newsweek.pl/artykuly") + '/print'

+	def is_locked(self, a):
+		if a.findNext('img')['src'] == 'http://www.newsweek.pl/bins/media/static/newsweek/img/ico_locked.gif':
+			return True
+		else:
+			return False
+
+	def is_full(self, issue_soup):
+		if len(issue_soup.findAll('img', attrs={'src' : 'http://www.newsweek.pl/bins/media/static/newsweek/img/ico_locked.gif'})) > 1:
+			return False
+		else:
+			return True
+
 	def find_last_full_issue(self):
-		page = self.index_to_soup('http://www.newsweek.pl/Frames/IssueCover.aspx')
-		issue = 'http://www.newsweek.pl/Frames/' + page.find(lambda tag: tag.name == 'span' and not tag.attrs).a['href']
-		page = self.index_to_soup(issue)
-		issue = 'http://www.newsweek.pl/Frames/' + page.find(lambda tag: tag.name == 'span' and not tag.attrs).a['href']
-		page = self.index_to_soup(issue)
-		self.EDITION = page.find('a', attrs={'target' : '_parent'})['href'].replace('/wydania/','')
+		frame_url = 'http://www.newsweek.pl/Frames/IssueCover.aspx'
+		while True:
+			frame_soup = self.index_to_soup(frame_url)
+			self.EDITION = frame_soup.find('a', attrs={'target' : '_parent'})['href'].replace('/wydania/','')
+			issue_soup = self.index_to_soup('http://www.newsweek.pl/wydania/' + self.EDITION)
+			if self.is_full(issue_soup):
+				break
+			frame_url = 'http://www.newsweek.pl/Frames/' + frame_soup.find(lambda tag: tag.name == 'span' and not tag.attrs).a['href']
+
+

 	def parse_index(self):
-		self.find_last_full_issue()
-		soup = self.index_to_soup('http://www.newsweek.pl/wydania/' + str(self.EDITION))
+		if self.FIND_LAST_FULL_ISSUE:
+			self.find_last_full_issue()
+		soup = self.index_to_soup('http://www.newsweek.pl/wydania/' + self.EDITION)
 		img = soup.find('img', id="ctl00_C1_PaperIsssueView_IssueImage", src=True)
 		self.cover_url = img['src']
 		feeds = []
 		parent = soup.find(id='content-left-big')
 		for txt in parent.findAll(attrs={'class':'txt_normal_red strong'}):
-			section = self.tag_to_string(txt).capitalize()
 			articles = list(self.find_articles(txt))
-			feeds.append((section, articles))
+			if len(articles) > 0:
+				section = self.tag_to_string(txt).capitalize()
+				feeds.append((section, articles))
 		return feeds

 	def find_articles(self, txt):
 		for a in txt.findAllNext( attrs={'class':['strong','hr']}):
 			if a.name in "div":
 				break
+			if (not self.FIND_LAST_FULL_ISSUE) & self.EXCLUDE_LOCKED & self.is_locked(a):
+				continue
 			yield {
 				'title' : self.tag_to_string(a),
-				'url'   : 'http://www.newsweek.pl'+a['href'],
+				'url'   : 'http://www.newsweek.pl' + a['href'],
 				'date'  : '',
 				'description' : ''
 				}
--- a/resources/recipes/nin.recipe
+++ b/resources/recipes/nin.recipe
@ -8,12 +8,15 @@ www.nin.co.rs
 import re
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
+from contextlib import nested, closing
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, CData, Tag
+from calibre import entity_to_unicode

 class Nin(BasicNewsRecipe):
    title                  = 'NIN online'
    __author__             = 'Darko Miletic'
    description            = 'Nedeljne Informativne Novine'
-    publisher              = 'NIN d.o.o.'
+    publisher              = 'NIN d.o.o. - Ringier d.o.o.'
    category               = 'news, politics, Serbia'
    no_stylesheets         = True
    delay                  = 1
@ -26,18 +29,29 @@ class Nin(BasicNewsRecipe):
    use_embedded_content   = False
    language               = 'sr'
    publication_type       = 'magazine'
-    extra_css              = ' @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: Verdana, Lucida, sans1, sans-serif} .article_description{font-family: Verdana, Lucida, sans1, sans-serif} .artTitle{font-size: x-large; font-weight: bold; color: #900} .izjava{font-size: x-large; font-weight: bold} .columnhead{font-size: small; font-weight: bold;} img{margin-top:0.5em; margin-bottom: 0.7em} b{margin-top: 1em} '
+    extra_css              = """ 
+                                 @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
+                                 body{font-family: Verdana, Lucida, sans1, sans-serif} 
+                                 .article_description{font-family: Verdana, Lucida, sans1, sans-serif} 
+                                 .artTitle{font-size: x-large; font-weight: bold; color: #900} 
+                                 .izjava{font-size: x-large; font-weight: bold} 
+                                 .columnhead{font-size: small; font-weight: bold;} 
+                                 img{margin-top:0.5em; margin-bottom: 0.7em; display: block} 
+                                 b{margin-top: 1em}
+                             """

    conversion_options = {
-                          'comment'          : description
-                        , 'tags'             : category
-                        , 'publisher'        : publisher
-                        , 'language'         : language
-                        , 'linearize_tables' : True
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
                        }

-    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
-    remove_attributes = ['height','width']
+    preprocess_regexps = [
+                           (re.compile(r'</body>.*?<html>', re.DOTALL|re.IGNORECASE),lambda match: '</body>')
+                          ,(re.compile(r'</html>.*?</html>', re.DOTALL|re.IGNORECASE),lambda match: '</html>')
+                          ,(re.compile(u'\u0110'), lambda match: u'\u00D0')
+                         ]

    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
@ -50,7 +64,10 @@ class Nin(BasicNewsRecipe):
        return br

    keep_only_tags    =[dict(name='td', attrs={'width':'520'})]
+    remove_tags_before =dict(name='span', attrs={'class':'izjava'})
    remove_tags_after =dict(name='html')
+    remove_tags = [dict(name=['object','link','iframe','meta','base'])]
+    remove_attributes=['border','background','height','width','align','valign']

    def get_cover_url(self):
        cover_url = None
@ -63,7 +80,7 @@ class Nin(BasicNewsRecipe):
    def parse_index(self):
        articles = []
        count = 0
-        soup = self.index_to_soup(self.PREFIX)
+        soup = self.index_to_soup(self.INDEX)
        for item in soup.findAll('a',attrs={'class':'lmeninavFont'}):
            count = count +1
            if self.test and count > 2:
@ -90,3 +107,45 @@ class Nin(BasicNewsRecipe):
            articles.append((section,inarts))
        return articles

+    def index_to_soup(self, url_or_raw, raw=False):
+        if re.match(r'\w+://', url_or_raw):
+            open_func = getattr(self.browser, 'open_novisit', self.browser.open)
+            with closing(open_func(url_or_raw)) as f:
+                _raw = f.read()
+            if not _raw:
+                raise RuntimeError('Could not fetch index from %s'%url_or_raw)
+        else:
+            _raw = url_or_raw
+        if raw:
+            return _raw
+        if not isinstance(_raw, unicode) and self.encoding:
+            if callable(self.encoding):
+                _raw = self.encoding(_raw)
+            else:
+                _raw = _raw.decode(self.encoding, 'replace')
+        massage = list(BeautifulSoup.MARKUP_MASSAGE)
+        enc = 'cp1252' if callable(self.encoding) or self.encoding is None else self.encoding
+        massage.append((re.compile(r'&(\S+?);'), lambda match:
+            entity_to_unicode(match, encoding=enc)))
+        massage.append((re.compile(r'[\x00-\x08]+'), lambda match:
+            ''))
+        return BeautifulSoup(_raw, markupMassage=massage)
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        for item in soup.findAll('div'):
+            if len(item.contents) == 0:
+               item.extract()
+        for item in soup.findAll(['td','tr']):
+            item.name='div'
+        for item in soup.findAll('img'):
+            if not item.has_key('alt'):
+               item['alt'] = 'image'
+        for tbl in soup.findAll('table'):
+            img = tbl.find('img')
+            if img:
+               img.extract()
+               tbl.replaceWith(img)
+        return soup
+        
--- a/resources/recipes/polityka.recipe
+++ b/resources/recipes/polityka.recipe
@ -1,18 +1,18 @@
 #!/usr/bin/env  python

 __license__   = 'GPL v3'
-__copyright__ = '2010, Mateusz Kielar, matek09@gmail.com'
+__copyright__ = '2010, matek09, matek09@gmail.com'

 from calibre.web.feeds.news import BasicNewsRecipe

 class Polityka(BasicNewsRecipe):

 	title = u'Polityka'
-	__author__ = 'Mateusz Kielar'
+	__author__ = 'matek09'
 	description = 'Weekly magazine. Last archive issue'
 	encoding = 'utf-8'
 	no_stylesheets = True
-	language = 'en'
+	language = 'pl'
 	remove_javascript = True

 	remove_tags_before = dict(dict(name = 'h2', attrs = {'class' : 'box_nag'}))
@ -48,7 +48,6 @@ class Polityka(BasicNewsRecipe):
 				for div in box.findAll('div', attrs={'class': 'list_tresc'}):
 					article_page = self.index_to_soup('http://archiwum.polityka.pl' + div.a['href'],)
 					section = self.tag_to_string(article_page.find('h2', attrs = {'class' : 'box_nag'})).split('/')[0].lstrip().rstrip()
-					print section
 					if not articles.has_key(section):
 						articles[section] = []
 					articles[section].append( {
--- a/resources/recipes/wprost.recipe
+++ b/resources/recipes/wprost.recipe
@ -0,0 +1,91 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, matek09, matek09@gmail.com'
+
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
+class Wprost(BasicNewsRecipe):
+	EDITION = 0
+	FIND_LAST_FULL_ISSUE = True
+	EXCLUDE_LOCKED = True
+	ICO_BLOCKED = 'http://www.wprost.pl/G/icons/ico_blocked.gif'
+
+	title = u'Wprost'
+	__author__ = 'matek09'
+	description = 'Weekly magazine'
+	encoding = 'ISO-8859-2'
+	no_stylesheets = True
+	language = 'pl'
+	remove_javascript = True
+
+	remove_tags_before = dict(dict(name = 'div', attrs = {'id' : 'print-layer'}))
+	remove_tags_after = dict(dict(name = 'div', attrs = {'id' : 'print-layer'}))
+
+	'''keep_only_tags =[]
+	keep_only_tags.append(dict(name = 'table', attrs = {'id' : 'title-table'}))
+	keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'div-header'}))
+	keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'div-content'}))
+	keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'def element-autor'}))'''
+
+	preprocess_regexps = [(re.compile(r'style="display: none;"'), lambda match: ''),
+						(re.compile(r'display: block;'), lambda match: '')]
+
+
+	remove_tags =[]
+	remove_tags.append(dict(name = 'div', attrs = {'class' : 'def element-date'}))
+	remove_tags.append(dict(name = 'div', attrs = {'class' : 'def silver'}))
+	remove_tags.append(dict(name = 'div', attrs = {'id' : 'content-main-column-right'}))
+
+
+	extra_css = '''
+					.div-header {font-size: x-small; font-weight: bold}
+					'''
+#h2 {font-size: x-large; font-weight: bold}
+	def is_blocked(self, a):
+		if a.findNextSibling('img') is None:
+			return False
+		else:
+			return True
+
+
+
+	def find_last_issue(self):
+		soup = self.index_to_soup('http://www.wprost.pl/archiwum/')
+		a = 0
+		if self.FIND_LAST_FULL_ISSUE:
+			ico_blocked = soup.findAll('img', attrs={'src' : self.ICO_BLOCKED})
+			a = ico_blocked[-1].findNext('a', attrs={'title' : re.compile('Zobacz spis tre.ci')})
+		else:
+			a = soup.find('a', attrs={'title' : re.compile('Zobacz spis tre.ci')})
+		self.EDITION = a['href'].replace('/tygodnik/?I=', '')
+		self.cover_url = a.img['src']
+
+
+
+	def parse_index(self):
+		self.find_last_issue()
+		soup = self.index_to_soup('http://www.wprost.pl/tygodnik/?I=' + self.EDITION)
+		feeds = []
+		for main_block in soup.findAll(attrs={'class':'main-block-s3 s3-head head-red3'}):
+			articles = list(self.find_articles(main_block))
+			if len(articles) > 0:
+				section = self.tag_to_string(main_block)
+				feeds.append((section, articles))
+		return feeds
+
+	def find_articles(self, main_block):
+		for a in main_block.findAllNext( attrs={'style':['','padding-top: 15px;']}):
+			if a.name in "td":
+				break
+			if self.EXCLUDE_LOCKED & self.is_blocked(a):
+				continue
+			yield {
+				'title' : self.tag_to_string(a),
+				'url'   : 'http://www.wprost.pl' + a['href'],
+				'date'  : '',
+				'description' : ''
+				}
+
+
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -38,7 +38,7 @@ class ANDROID(USBMS):
                0x227]},

            # Samsung
-            0x04e8 : { 0x681d : [0x0222, 0x0224, 0x0400],
+            0x04e8 : { 0x681d : [0x0222, 0x0223, 0x0224, 0x0400],
                       0x681c : [0x0222, 0x0224, 0x0400],
                       0x6640 : [0x0100],
                     },
@ -62,7 +62,8 @@ class ANDROID(USBMS):
            'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX']
    WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
            '__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
-            'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID']
+            'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',
+            'SCH-I500_CARD']
    WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
            'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID']

--- a/src/calibre/devices/cybook/driver.py
+++ b/src/calibre/devices/cybook/driver.py
@ -65,8 +65,8 @@ class ORIZON(CYBOOK):

    BCD         = [0x319]

-    WINDOWS_MAIN_MEM = re.compile(r'CYBOOK_ORIZON__-FD')
-    WINDOWS_CARD_A_MEM = re.compile('CYBOOK_ORIZON__-SD')
+    WINDOWS_MAIN_MEM = re.compile(r'(CYBOOK_ORIZON__-FD)|(FILE-STOR_GADGET)')
+    WINDOWS_CARD_A_MEM = re.compile('(CYBOOK_ORIZON__-SD)|(FILE-STOR_GADGET)')

    EBOOK_DIR_MAIN = EBOOK_DIR_CARD_A = 'Digital Editions'

--- a/src/calibre/devices/eb600/driver.py
+++ b/src/calibre/devices/eb600/driver.py
@ -229,7 +229,7 @@ class POCKETBOOK301(USBMS):

 class POCKETBOOK602(USBMS):

-    name = 'PocketBook Pro 602 Device Interface'
+    name = 'PocketBook Pro 602/902 Device Interface'
    description    = _('Communicate with the PocketBook 602 reader.')
    author         = 'Kovid Goyal'
    supported_platforms = ['windows', 'osx', 'linux']
@ -244,5 +244,5 @@ class POCKETBOOK602(USBMS):
    BCD         = [0x0324]

    VENDOR_NAME = ''
-    WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'PB602'
+    WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['PB602', 'PB902']

--- a/src/calibre/ebooks/fb2/fb2ml.py
+++ b/src/calibre/ebooks/fb2/fb2ml.py
@ -91,6 +91,10 @@ class FB2MLizer(object):
        return u'<?xml version="1.0" encoding="UTF-8"?>\n%s' % etree.tostring(etree.fromstring(output), encoding=unicode, pretty_print=True)

    def clean_text(self, text):
+        text = re.sub(r'(?miu)<section>\s*</section>', '', text)
+        text = re.sub(r'(?miu)\s+</section>', '</section>', text)
+        text = re.sub(r'(?miu)</section><section>', '</section>\n\n<section>', text)
+
        text = re.sub(r'(?miu)<p>\s*</p>', '', text)
        text = re.sub(r'(?miu)\s+</p>', '</p>', text)
        text = re.sub(r'(?miu)</p><p>', '</p>\n\n<p>', text)
@ -166,11 +170,15 @@ class FB2MLizer(object):

    def get_text(self):
        text = []
-        for item in self.oeb_book.spine:
+        for i, item in enumerate(self.oeb_book.spine):
+            if self.opts.sectionize_chapters_using_file_structure and i is not 0:
+                text.append('<section>')
            self.log.debug('Converting %s to FictionBook2 XML' % item.href)
            stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
            text.append(self.add_page_anchor(item))
            text += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
+            if self.opts.sectionize_chapters_using_file_structure and i is not len(self.oeb_book.spine) - 1:
+                text.append('</section>')
        return ''.join(text)

    def fb2_body_footer(self):
@ -258,6 +266,10 @@ class FB2MLizer(object):
        if id_name:
            fb2_text.append(self.get_anchor(page, id_name))

+        if tag == 'h1' and self.opts.h1_to_title or tag == 'h2' and self.opts.h2_to_title or tag == 'h3' and self.opts.h3_to_title:
+            fb2_text.append('<title>')
+            tags.append('title')
+
        fb2_tag = TAG_MAP.get(tag, None)
        if fb2_tag == 'p':
            if 'p' in tag_stack+tags:
--- a/src/calibre/ebooks/fb2/output.py
+++ b/src/calibre/ebooks/fb2/output.py
@ -25,6 +25,20 @@ class FB2Output(OutputFormatPlugin):
                   'WARNING: ' \
                   'This option is experimental. It can cause conversion ' \
                   'to fail. It can also produce unexpected output.')),
+        OptionRecommendation(name='sectionize_chapters_using_file_structure',
+            recommended_value=False, level=OptionRecommendation.LOW,
+            help=_('Try to turn chapters into individual sections using the ' \
+                   'internal structure of the ebook. This works well for EPUB ' \
+                   'books that have been internally split by chapter.')),
+        OptionRecommendation(name='h1_to_title',
+            recommended_value=False, level=OptionRecommendation.LOW,
+            help=_('Wrap all h1 tags with fb2 title elements.')),
+        OptionRecommendation(name='h2_to_title',
+            recommended_value=False, level=OptionRecommendation.LOW,
+            help=_('Wrap all h2 tags with fb2 title elements.')),
+        OptionRecommendation(name='h3_to_title',
+            recommended_value=False, level=OptionRecommendation.LOW,
+            help=_('Wrap all h3 tags with fb2 title elements.')),
    ])

    def convert(self, oeb_book, output_path, input_plugin, opts, log):
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@ -504,6 +504,9 @@ class MobiReader(object):
            'x-large': '5',
            'xx-large': '6',
            }
+        def barename(x):
+            return x.rpartition(':')[-1]
+
        mobi_version = self.book_header.mobi_version
        for x in root.xpath('//ncx'):
            x.getparent().remove(x)
@ -512,8 +515,9 @@ class MobiReader(object):
            for x in tag.attrib:
                if ':' in x:
                    del tag.attrib[x]
-            if tag.tag in ('country-region', 'place', 'placetype', 'placename',
-                'state', 'city', 'street', 'address', 'content', 'form'):
+            if tag.tag and barename(tag.tag.lower()) in \
+                ('country-region', 'place', 'placetype', 'placename',
+                    'state', 'city', 'street', 'address', 'content', 'form'):
                tag.tag = 'div' if tag.tag in ('content', 'form') else 'span'
                for key in tag.attrib.keys():
                    tag.attrib.pop(key)
--- a/src/calibre/ebooks/pml/pmlml.py
+++ b/src/calibre/ebooks/pml/pmlml.py
@ -216,7 +216,9 @@ class PMLMLizer(object):
            w = '\\w'
            width = elem.get('width')
            if width:
-                w += '="%s%%"' % width
+                if not width.endswith('%'):
+                    width += '%'
+                w += '="%s"' % width
            else:
                w += '="50%"'
            text.append(w)
@ -252,8 +254,8 @@ class PMLMLizer(object):
                    if href not in self.link_hrefs.keys():
                        self.link_hrefs[href] = 'calibre_link-%s' % len(self.link_hrefs.keys())
                    href = '#%s' % self.link_hrefs[href]
-                text.append('\\q="%s"' % href)
-                tags.append('q')
+                    text.append('\\q="%s"' % href)
+                    tags.append('q')

        # Anchor ids
        id_name = elem.get('id')
--- a/src/calibre/gui2/actions/edit_metadata.py
+++ b/src/calibre/gui2/actions/edit_metadata.py
@ -164,12 +164,15 @@ class EditMetadataAction(InterfaceAction):
        self.gui.tags_view.blockSignals(True)
        changed = False
        try:
+            current_tab = 0
            while True:
-                dialog = MetadataBulkDialog(self.gui, rows, self.gui.library_view.model())
+                dialog = MetadataBulkDialog(self.gui, rows,
+                                self.gui.library_view.model(), current_tab)
                if dialog.changed:
                    changed = True
                if not dialog.do_again:
                    break
+                current_tab = dialog.central_widget.currentIndex()
        finally:
            self.gui.tags_view.blockSignals(False)
        if changed:
--- a/src/calibre/gui2/actions/similar_books.py
+++ b/src/calibre/gui2/actions/similar_books.py
@ -58,6 +58,7 @@ class SimilarBooksAction(InterfaceAction):
                                for a in authors.split(',')]
                join = ' or '
        if search:
-            self.gui.search.set_search_string(join.join(search))
+            self.gui.search.set_search_string(join.join(search),
+                    store_in_history=True)


--- a/src/calibre/gui2/actions/view.py
+++ b/src/calibre/gui2/actions/view.py
@ -12,7 +12,7 @@ from PyQt4.Qt import Qt, QMenu

 from calibre.constants import isosx
 from calibre.gui2 import error_dialog, Dispatcher, question_dialog, config, \
-        open_local_file
+        open_local_file, info_dialog
 from calibre.gui2.dialogs.choose_format import ChooseFormatDialog
 from calibre.utils.config import prefs
 from calibre.ptempfile import PersistentTemporaryFile
@ -89,18 +89,34 @@ class ViewAction(InterfaceAction):
        self._launch_viewer(name, viewer, internal)

    def view_specific_format(self, triggered):
-        rows = self.gui.library_view.selectionModel().selectedRows()
+        rows = list(self.gui.library_view.selectionModel().selectedRows())
        if not rows or len(rows) == 0:
            d = error_dialog(self.gui, _('Cannot view'), _('No book selected'))
            d.exec_()
            return

-        row = rows[0].row()
-        formats = self.gui.library_view.model().db.formats(row).upper().split(',')
-        d = ChooseFormatDialog(self.gui, _('Choose the format to view'), formats)
+        db = self.gui.library_view.model().db
+        rows = [r.row() for r in rows]
+        formats = [db.formats(row) for row in rows]
+        formats = [list(f.upper().split(',')) if f else None for f in formats]
+        all_fmts = set([])
+        for x in formats:
+            for f in x: all_fmts.add(f)
+        d = ChooseFormatDialog(self.gui, _('Choose the format to view'),
+                list(sorted(all_fmts)))
        if d.exec_() == d.Accepted:
-            format = d.format()
-            self.view_format(row, format)
+            fmt = d.format()
+            orig_num = len(rows)
+            rows = [rows[i] for i in range(len(rows)) if formats[i] and fmt in
+                    formats[i]]
+            if self._view_check(len(rows)):
+                for row in rows:
+                    self.view_format(row, fmt)
+                if len(rows) < orig_num:
+                    info_dialog(self.gui, _('Format unavailable'),
+                            _('Not all the selected books were available in'
+                                ' the %s format. You should convert'
+                                ' them first.')%fmt, show=True)

    def _view_check(self, num, max_=3):
        if num <= max_:
--- a/src/calibre/gui2/book_details.py
+++ b/src/calibre/gui2/book_details.py
@ -208,8 +208,9 @@ class BookInfo(QWebView):
        rows = u'\n'.join([u'<tr><td valign="top"><b>%s:</b></td><td valign="top">%s</td></tr>'%(k,t) for
            k, t in rows])
        comments = data.get(_('Comments'), '')
-        if comments and comments != u'None':
-            self.renderer.queue.put((rows, comments))
+        if not comments or comments == u'None':
+            comments = ''
+        self.renderer.queue.put((rows, comments))
        self._show_data(rows, '')


--- a/src/calibre/gui2/convert/fb2_output.py
+++ b/src/calibre/gui2/convert/fb2_output.py
@ -17,6 +17,8 @@ class PluginWidget(Widget, Ui_Form):
    ICON = I('mimetypes/fb2.png')

    def __init__(self, parent, get_option, get_help, db=None, book_id=None):
-        Widget.__init__(self, parent, ['inline_toc', 'sectionize_chapters'])
+        Widget.__init__(self, parent, ['inline_toc', 'sectionize_chapters', 
+            'sectionize_chapters_using_file_structure', 'h1_to_title', 
+            'h2_to_title', 'h3_to_title'])
        self.db, self.book_id = db, book_id
        self.initialize_options(get_option, get_help, db, book_id)
--- a/src/calibre/gui2/convert/fb2_output.ui
+++ b/src/calibre/gui2/convert/fb2_output.ui
@ -14,7 +14,7 @@
   <string>Form</string>
  </property>
  <layout class="QGridLayout" name="gridLayout">
-   <item row="2" column="0">
+   <item row="6" column="0">
    <spacer name="verticalSpacer">
     <property name="orientation">
      <enum>Qt::Vertical</enum>
@ -41,6 +41,34 @@
     </property>
    </widget>
   </item>
+   <item row="2" column="0">
+    <widget class="QCheckBox" name="opt_sectionize_chapters_using_file_structure">
+     <property name="text">
+      <string>Sectionize Chapters using file structure</string>
+     </property>
+    </widget>
+   </item>
+   <item row="3" column="0">
+    <widget class="QCheckBox" name="opt_h1_to_title">
+     <property name="text">
+      <string>Wrap h1 tags with &lt;title&gt; elements</string>
+     </property>
+    </widget>
+   </item>
+   <item row="4" column="0">
+    <widget class="QCheckBox" name="opt_h2_to_title">
+     <property name="text">
+      <string>Wrap h2 tags with &lt;title&gt; elements</string>
+     </property>
+    </widget>
+   </item>
+   <item row="5" column="0">
+    <widget class="QCheckBox" name="opt_h3_to_title">
+     <property name="text">
+      <string>Wrap h3 tags with &lt;title&gt; elements</string>
+     </property>
+    </widget>
+   </item>
  </layout>
 </widget>
 <resources/>
--- a/src/calibre/gui2/dialogs/metadata_bulk.py
+++ b/src/calibre/gui2/dialogs/metadata_bulk.py
@ -197,7 +197,7 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
                            _('Append to field'),
                        ]

-    def __init__(self, window, rows, model):
+    def __init__(self, window, rows, model, tab):
        QDialog.__init__(self, window)
        Ui_MetadataBulkDialog.__init__(self)
        self.setupUi(self)
@ -238,6 +238,7 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
            'Immediately make all changes without closing the dialog. '
            'This operation cannot be canceled or undone'))
        self.do_again = False
+        self.central_widget.setCurrentIndex(tab)
        self.exec_()

    def button_clicked(self, which):
--- a/src/calibre/gui2/init.py
+++ b/src/calibre/gui2/init.py
@ -86,6 +86,10 @@ class LibraryViewMixin(object): # {{{
        if view is self.current_view():
            self.search.search_done(ok)
            self.set_number_of_books_shown()
+            if ok:
+                v = self.current_view()
+                if hasattr(v, 'set_current_row'):
+                    v.set_current_row(0)

    # }}}

--- a/src/calibre/gui2/layout.py
+++ b/src/calibre/gui2/layout.py
@ -182,7 +182,7 @@ class SearchBar(QWidget): # {{{
        l.addWidget(self.search_button)
        self.search_button.setSizePolicy(QSizePolicy.Minimum,
                QSizePolicy.Minimum)
-        self.search_button.clicked.connect(parent.search.do_search)
+        self.search_button.clicked.connect(parent.do_search_button)
        self.search_button.setToolTip(
            _('Do Quick Search (you can also press the Enter key)'))

--- a/src/calibre/gui2/search_box.py
+++ b/src/calibre/gui2/search_box.py
@ -9,7 +9,8 @@ __docformat__ = 'restructuredtext en'
 import re

 from PyQt4.Qt import QComboBox, Qt, QLineEdit, QStringList, pyqtSlot, QDialog, \
-                     pyqtSignal, QCompleter, QAction, QKeySequence, QTimer
+                     pyqtSignal, QCompleter, QAction, QKeySequence, QTimer, \
+                     QString

 from calibre.gui2 import config
 from calibre.gui2.dialogs.confirm_delete import confirm
@ -17,21 +18,13 @@ from calibre.gui2.dialogs.saved_search_editor import SavedSearchEditor
 from calibre.gui2.dialogs.search import SearchDialog
 from calibre.utils.search_query_parser import saved_searches

-class SearchLineEdit(QLineEdit):
+class SearchLineEdit(QLineEdit): # {{{
    key_pressed = pyqtSignal(object)

    def keyPressEvent(self, event):
        self.key_pressed.emit(event)
        QLineEdit.keyPressEvent(self, event)

-    def mouseReleaseEvent(self, event):
-        QLineEdit.mouseReleaseEvent(self, event)
-        QLineEdit.selectAll(self)
-
-    def focusInEvent(self, event):
-        QLineEdit.focusInEvent(self, event)
-        QLineEdit.selectAll(self)
-
    def dropEvent(self, ev):
        self.parent().normalize_state()
        return QLineEdit.dropEvent(self, ev)
@ -44,17 +37,23 @@ class SearchLineEdit(QLineEdit):
    def paste(self, *args):
        self.parent().normalize_state()
        return QLineEdit.paste(self)
+# }}}

-class SearchBox2(QComboBox):
+class SearchBox2(QComboBox): # {{{

    '''
    To use this class:

        * Call initialize()
        * Connect to the search() and cleared() signals from this widget.
-        * Connect to the cleared() signal to know when the box content changes
-        * Connect to focus_to_library signal to be told to manually change focus
+        * Connect to the changed() signal to know when the box content changes
+        * Connect to focus_to_library() signal to be told to manually change focus
        * Call search_done() after every search is complete
+        * Call set_search_string() to perform a search programmatically
+        * You can use the current_text property to get the current search text
+          Be aware that if you are using it in a slot connected to the
+          changed() signal, if the connection is not queued it will not be
+          accurate.
    '''

    INTERVAL = 1500 #: Time to wait before emitting search signal
@ -70,8 +69,12 @@ class SearchBox2(QComboBox):
        self.normal_background = 'rgb(255, 255, 255, 0%)'
        self.line_edit = SearchLineEdit(self)
        self.setLineEdit(self.line_edit)
+
        c = self.line_edit.completer()
        c.setCompletionMode(c.PopupCompletion)
+        c.highlighted[QString].connect(self.completer_used)
+        c.activated[QString].connect(self.history_selected)
+
        self.line_edit.key_pressed.connect(self.key_pressed, type=Qt.DirectConnection)
        self.activated.connect(self.history_selected)
        self.setEditable(True)
@ -89,7 +92,11 @@ class SearchBox2(QComboBox):
    def initialize(self, opt_name, colorize=False, help_text=_('Search')):
        self.as_you_type = config['search_as_you_type']
        self.opt_name = opt_name
-        self.addItems(QStringList(list(set(config[opt_name]))))
+        items = []
+        for item in config[opt_name]:
+            if item not in items:
+                items.append(item)
+        self.addItems(QStringList(items))
        try:
            self.line_edit.setPlaceholderText(help_text)
        except:
@ -130,6 +137,7 @@ class SearchBox2(QComboBox):
            col = self.normal_background
        self.line_edit.setStyleSheet('QLineEdit{color:black;background-color:%s;}' % col)

+    # Comes from the lineEdit control
    def key_pressed(self, event):
        k = event.key()
        if k in (Qt.Key_Left, Qt.Key_Right, Qt.Key_Up, Qt.Key_Down,
@ -146,6 +154,21 @@ class SearchBox2(QComboBox):
        elif self.as_you_type and unicode(event.text()):
            self.timer.start(1500)

+    # Comes from the combobox itself
+    def keyPressEvent(self, event):
+        k = event.key()
+        if k not in (Qt.Key_Up, Qt.Key_Down):
+            QComboBox.keyPressEvent(self, event)
+        else:
+            self.blockSignals(True)
+            self.normalize_state()
+            QComboBox.keyPressEvent(self, event)
+            self.blockSignals(False)
+
+    def completer_used(self, text):
+        self.timer.stop()
+        self.normalize_state()
+
    def timer_event(self):
        self.do_search()

@ -153,48 +176,45 @@ class SearchBox2(QComboBox):
        self.changed.emit()
        self.do_search()

-    def do_search(self, *args):
+    def _do_search(self, store_in_history=True):
        text = unicode(self.currentText()).strip()
        if not text:
            return self.clear()
        self.search.emit(text)

-        idx = self.findText(text, Qt.MatchFixedString)
-        self.block_signals(True)
-        if idx < 0:
-            self.insertItem(0, text)
-        else:
-            t = self.itemText(idx)
-            self.removeItem(idx)
-            self.insertItem(0, t)
+        if store_in_history:
+            idx = self.findText(text, Qt.MatchFixedString)
+            self.block_signals(True)
+            if idx < 0:
+                self.insertItem(0, text)
+            else:
+                t = self.itemText(idx)
+                self.removeItem(idx)
+                self.insertItem(0, t)
            self.setCurrentIndex(0)
-        self.block_signals(False)
-        config[self.opt_name] = [unicode(self.itemText(i)) for i in
-                range(self.count())]
+            self.block_signals(False)
+            history = [unicode(self.itemText(i)) for i in
+                    range(self.count())]
+            config[self.opt_name] = history
+
+    def do_search(self, *args):
+        self._do_search()

    def block_signals(self, yes):
        self.blockSignals(yes)
        self.line_edit.blockSignals(yes)

-    def search_from_tokens(self, tokens, all):
-        ans = u' '.join([u'%s:%s'%x for x in tokens])
-        if not all:
-            ans = '[' + ans + ']'
-        self.set_search_string(ans)
-
-    def search_from_tags(self, tags, all):
-        joiner = ' and ' if all else ' or '
-        self.set_search_string(joiner.join(tags))
-
-    def set_search_string(self, txt):
+    def set_search_string(self, txt, store_in_history=False):
+        self.setFocus(Qt.OtherFocusReason)
        if not txt:
            self.clear()
-            return
-        self.normalize_state()
-        self.setEditText(txt)
-        self.search.emit(txt)
-        self.line_edit.end(False)
-        self.initial_state = False
+        else:
+            self.normalize_state()
+            self.setEditText(txt)
+            self.line_edit.end(False)
+            self.changed.emit()
+            self._do_search(store_in_history=store_in_history)
+        self.focus_to_library.emit()

    def search_as_you_type(self, enabled):
        self.as_you_type = enabled
@ -202,7 +222,13 @@ class SearchBox2(QComboBox):
    def in_a_search(self):
        return self._in_a_search

-class SavedSearchBox(QComboBox):
+    @property
+    def current_text(self):
+        return unicode(self.lineEdit().text())
+
+    # }}}
+
+class SavedSearchBox(QComboBox): # {{{

    '''
    To use this class:
@ -212,7 +238,6 @@ class SavedSearchBox(QComboBox):
    '''

    changed = pyqtSignal()
-    focus_to_library = pyqtSignal()

    def __init__(self, parent=None):
        QComboBox.__init__(self, parent)
@ -236,7 +261,11 @@ class SavedSearchBox(QComboBox):

    def initialize(self, _search_box, colorize=False, help_text=_('Search')):
        self.search_box = _search_box
-        self.line_edit.setPlaceholderText(help_text)
+        try:
+           self.line_edit.setPlaceholderText(help_text)
+        except:
+            # Using Qt < 4.7
+            pass
        self.colorize = colorize
        self.clear()

@ -253,7 +282,6 @@ class SavedSearchBox(QComboBox):
    def key_pressed(self, event):
        if event.key() in (Qt.Key_Return, Qt.Key_Enter):
            self.saved_search_selected(self.currentText())
-            self.focus_to_library.emit()

    def saved_search_selected(self, qname):
        qname = unicode(qname)
@ -267,7 +295,6 @@ class SavedSearchBox(QComboBox):
        self.search_box.set_search_string(u'search:"%s"' % qname)
        self.setEditText(qname)
        self.setToolTip(saved_searches().lookup(qname))
-        self.focus_to_library.emit()

    def initialize_saved_search_names(self):
        qnames = saved_searches().names()
@ -313,13 +340,17 @@ class SavedSearchBox(QComboBox):
            return
        self.search_box.set_search_string(saved_searches().lookup(unicode(self.currentText())))

-class SearchBoxMixin(object):
+    # }}}
+
+class SearchBoxMixin(object): # {{{

    def __init__(self):
        self.search.initialize('main_search_history', colorize=True,
                help_text=_('Search (For Advanced Search click the button to the left)'))
        self.search.cleared.connect(self.search_box_cleared)
-        self.search.changed.connect(self.search_box_changed)
+        # Queued so that search.current_text will be correct
+        self.search.changed.connect(self.search_box_changed,
+                type=Qt.QueuedConnection)
        self.search.focus_to_library.connect(self.focus_to_library)
        self.clear_button.clicked.connect(self.search.clear_clicked)
        self.advanced_search_button.clicked[bool].connect(self.do_advanced_search)
@ -330,14 +361,17 @@ class SearchBoxMixin(object):
        shortcuts = QKeySequence.keyBindings(QKeySequence.Find)
        shortcuts = list(shortcuts) + [QKeySequence('/'), QKeySequence('Alt+S')]
        self.action_focus_search.setShortcuts(shortcuts)
-        self.action_focus_search.triggered.connect(lambda x:
-                self.search.setFocus(Qt.OtherFocusReason))
+        self.action_focus_search.triggered.connect(self.focus_search_box)
        self.addAction(self.action_focus_search)
        self.search.setStatusTip(re.sub(r'<\w+>', ' ',
            unicode(self.search.toolTip())))
        self.advanced_search_button.setStatusTip(self.advanced_search_button.toolTip())
        self.clear_button.setStatusTip(self.clear_button.toolTip())

+    def focus_search_box(self, *args):
+        self.search.setFocus(Qt.OtherFocusReason)
+        self.search.lineEdit().selectAll()
+
    def search_box_cleared(self):
        self.tags_view.clear()
        self.saved_search.clear()
@ -345,22 +379,27 @@ class SearchBoxMixin(object):

    def search_box_changed(self):
        self.saved_search.clear()
-        self.tags_view.clear()
+        self.tags_view.conditional_clear(self.search.current_text)

    def do_advanced_search(self, *args):
        d = SearchDialog(self, self.library_view.model().db)
        if d.exec_() == QDialog.Accepted:
            self.search.set_search_string(d.search_string())

+    def do_search_button(self):
+        self.search.do_search()
+        self.focus_to_library()
+
    def focus_to_library(self):
        self.current_view().setFocus(Qt.OtherFocusReason)

-class SavedSearchBoxMixin(object):
+    # }}}
+
+class SavedSearchBoxMixin(object): # {{{

    def __init__(self):
        self.saved_search.changed.connect(self.saved_searches_changed)
        self.clear_button.clicked.connect(self.saved_search.clear)
-        self.saved_search.focus_to_library.connect(self.focus_to_library)
        self.save_search_button.clicked.connect(
                                self.saved_search.save_search_button_clicked)
        self.delete_search_button.clicked.connect(
@ -396,6 +435,5 @@ class SavedSearchBoxMixin(object):
            self.saved_searches_changed()
            self.saved_search.clear()

-    def focus_to_library(self):
-        self.current_view().setFocus(Qt.OtherFocusReason)
+    # }}}

--- a/src/calibre/gui2/search_restriction_mixin.py
+++ b/src/calibre/gui2/search_restriction_mixin.py
@ -4,6 +4,8 @@ Created on 10 Jun 2010
@author: charles
 '''

+from PyQt4.Qt import Qt
+
 class SearchRestrictionMixin(object):

    def __init__(self):
@ -53,6 +55,7 @@ class SearchRestrictionMixin(object):
        self.saved_search.clear()
        self.tags_view.set_search_restriction(restriction)
        self.set_number_of_books_shown()
+        self.current_view().setFocus(Qt.OtherFocusReason)

    def set_number_of_books_shown(self):
        if self.current_view() == self.library_view and self.restriction_in_effect:
--- a/src/calibre/gui2/tag_view.py
+++ b/src/calibre/gui2/tag_view.py
@ -60,7 +60,7 @@ class TagDelegate(QItemDelegate): # {{{
 class TagsView(QTreeView): # {{{

    refresh_required    = pyqtSignal()
-    tags_marked         = pyqtSignal(object, object)
+    tags_marked         = pyqtSignal(object)
    user_category_edit  = pyqtSignal(object)
    tag_list_edit       = pyqtSignal(object, object)
    saved_search_edit   = pyqtSignal(object)
@ -135,11 +135,21 @@ class TagsView(QTreeView): # {{{
        # swallow these to avoid toggling and editing at the same time
        pass

+    @property
+    def search_string(self):
+        tokens = self._model.tokens()
+        joiner = ' and ' if self.match_all else ' or '
+        return joiner.join(tokens)
+
    def toggle(self, index):
        modifiers = int(QApplication.keyboardModifiers())
        exclusive = modifiers not in (Qt.CTRL, Qt.SHIFT)
        if self._model.toggle(index, exclusive):
-            self.tags_marked.emit(self._model.tokens(), self.match_all)
+            self.tags_marked.emit(self.search_string)
+
+    def conditional_clear(self, search_string):
+        if search_string != self.search_string:
+            self.clear()

    def context_menu_handler(self, action=None, category=None,
                             key=None, index=None):
@ -842,8 +852,7 @@ class TagBrowserMixin(object): # {{{
        self.library_view.model().count_changed_signal.connect(self.tags_view.recount)
        self.tags_view.set_database(self.library_view.model().db,
                self.tag_match, self.sort_by)
-        self.tags_view.tags_marked.connect(self.search.search_from_tags)
-        self.tags_view.tags_marked.connect(self.saved_search.clear)
+        self.tags_view.tags_marked.connect(self.search.set_search_string)
        self.tags_view.tag_list_edit.connect(self.do_tags_list_edit)
        self.tags_view.user_category_edit.connect(self.do_user_categories_edit)
        self.tags_view.saved_search_edit.connect(self.do_saved_search_edit)
--- a/src/calibre/gui2/viewer/main.py
+++ b/src/calibre/gui2/viewer/main.py
@ -17,7 +17,7 @@ from calibre.gui2.viewer.bookmarkmanager import BookmarkManager
 from calibre.gui2.widgets import ProgressIndicator
 from calibre.gui2.main_window import MainWindow
 from calibre.gui2 import Application, ORG_NAME, APP_UID, choose_files, \
-                         info_dialog, error_dialog, open_url
+                         info_dialog, error_dialog, open_url, available_height
 from calibre.ebooks.oeb.iterator import EbookIterator
 from calibre.ebooks import DRMError
 from calibre.constants import islinux, isfreebsd, isosx
@ -253,6 +253,7 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
        self.connect(self.vertical_scrollbar, SIGNAL('valueChanged(int)'),
                     lambda x: self.goto_page(x/100.))
        self.search.search.connect(self.find)
+        self.search.focus_to_library.connect(lambda: self.view.setFocus(Qt.OtherFocusReason))
        self.connect(self.toc, SIGNAL('clicked(QModelIndex)'), self.toc_clicked)
        self.connect(self.reference, SIGNAL('goto(PyQt_PyObject)'), self.goto)

@ -693,6 +694,9 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
            if ss is not None:
                self.splitter.restoreState(ss)
            self.show_toc_on_open = dynamic.get('viewer_toc_isvisible', False)
+        av = available_height() - 30
+        if self.height() > av:
+            self.resize(self.width(), av)

 def config(defaults=None):
    desc = _('Options to control the ebook viewer')
--- a/src/calibre/library/catalog.py
+++ b/src/calibre/library/catalog.py
@ -1402,7 +1402,6 @@ class EPUB_MOBI(CatalogPlugin):
                if record['cover']:
                    this_title['cover'] = re.sub('&amp;', '&', record['cover'])

-                # This may be updated in self.processSpecialTags()
                this_title['read'] = self.discoverReadStatus(record)

                if record['tags']:
@ -2676,14 +2675,7 @@ class EPUB_MOBI(CatalogPlugin):
                pBookTag = Tag(soup, "p")
                ptc = 0

-                # THIS SHOULDN'T BE NECESSARY
-                #  book with read/reading/unread symbol
-#                 for tag in book['tags']:
-#                     if tag == self.opts.read_tag:
-#                         book['read'] = True
-#                         break
-#                 else:
-#                     book['read'] = False
+                book['read'] = self.discoverReadStatus(book)

                #  book with read|reading|unread symbol or wishlist item
                if self.opts.wishlist_tag in book.get('tags', []):
@ -4057,7 +4049,6 @@ class EPUB_MOBI(CatalogPlugin):

            return False

-
        def filterDbTags(self, tags):
            # Remove the special marker tags from the database's tag list,
            # return sorted list of normalized genre tags
@ -4550,7 +4541,6 @@ class EPUB_MOBI(CatalogPlugin):
            markerTags = []
            markerTags.extend(self.opts.exclude_tags.split(','))
            markerTags.extend(self.opts.note_tag.split(','))
-            # Process read_book_marker if field is tag
            return markerTags

        def letter_or_symbol(self,char):