Merge from trunk

2025-11-21 14:03:03 -05:00 · 2010-12-01 08:58:00 +00:00 · 2010-12-01 08:58:00 +00:00 · c7bb32dc40
commit c7bb32dc40
parent 6132fa1385 c14726e5bb
35 changed files with 2267 additions and 232 deletions
--- a/resources/jacket/stylesheet.css
+++ b/resources/jacket/stylesheet.css
@ -36,22 +36,37 @@
 /*
 **	Title
 */
-.cbj_title {
+table.cbj_header td.cbj_title {
 	font-size: x-large;
+	font-style: italic;
+	text-align: center;
+}
+
+/*
+**	Series
+*/
+table.cbj_header td.cbj_series {
+	font-size: medium;
 	text-align: center;
 }

 /*
 **	Author
 */
-.cbj_author {
+table.cbj_header td.cbj_author {
 	font-size: medium;
 	text-align: center;
-    margin-bottom: 1ex;
 }

 /*
-**	Table containing Series, Publication Year, Rating and Tags
+**	Publisher/published
+*/
+table.cbj_header td.cbj_pubdata {
+	text-align: center;
+}
+
+/*
+**	Table containing Rating and Tags
 */
 table.cbj_header {
 	width: 100%;
@ -62,9 +77,8 @@ table.cbj_header {
 */
 table.cbj_header td.cbj_label {
 	font-family: sans-serif;
-	font-weight: bold;
 	text-align: right;
-	width: 40%;
+	width: 33%;
 	}

 /*
@ -73,9 +87,23 @@ table.cbj_header td.cbj_label {
 table.cbj_header td.cbj_content {
 	font-family: sans-serif;
 	text-align: left;
-	width:60%;
+	width:67%;
 	}

+/*
+** Metadata divider
+*/
+hr.metadata_divider {
+	width:90%;
+	margin-left:5%;
+	border-top: solid white 0px;
+	border-right: solid white 0px;
+    border-bottom: solid black 1px;
+    border-left: solid white 0px;
+	}
+
+
+
 /*
 **	To skip a banner item (Series|Published|Rating|Tags),
 **	edit the appropriate CSS rule below.
--- a/resources/jacket/template.xhtml
+++ b/resources/jacket/template.xhtml
@ -6,17 +6,24 @@
    </head>
    <body>
        <div class="cbj_banner">
-            <div class="cbj_title">{title}</div>
-            <div class="cbj_author">{author}</div>
            <table class="cbj_header">
-                <tr class="cbj_series">
-                    <td class="cbj_label">{series_label}:</td>
-                    <td class="cbj_content">{series}</td>
+            	<tr>
+            		<td class="cbj_title" colspan="2">{title}</td>
+            	</tr>
+                <tr>
+                    <td  class="cbj_series" colspan="2">{series}</td>
                </tr>
-                <tr class="cbj_pubdate">
-                    <td class="cbj_label">{pubdate_label}:</td>
-                    <td class="cbj_content">{pubdate}</td>
+            	<tr>
+            		<td class="cbj_author" colspan="2">{author}</td>
+            	</tr>
+                <tr>
+                	<td class="cbj_pubdata" colspan="2">{publisher} ({pubdate})</td>
                </tr>
+
+            	<tr>
+            		<td class="cbj_author" colspan="2"><hr class="metadata_divider" /></td>
+            	</tr>
+
                <tr class="cbj_rating">
                    <td class="cbj_label">{rating_label}:</td>
                    <td class="cbj_content">{rating}</td>
--- a/resources/mime.types
+++ b/resources/mime.types
--- a/resources/recipes/abc_au.recipe
+++ b/resources/recipes/abc_au.recipe
@ -0,0 +1,54 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Dean Cording'
+'''
+abc.net.au/news
+'''
+import re
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class ABCNews(BasicNewsRecipe):
+    title                  = 'ABC News'
+    __author__             = 'Dean Cording'
+    description            = 'News from Australia'
+    masthead_url           = 'http://www.abc.net.au/news/assets/v5/images/common/logo-news.png'
+    cover_url              = 'http://www.abc.net.au/news/assets/v5/images/common/logo-news.png'
+
+    oldest_article         = 2
+    max_articles_per_feed  = 100
+    no_stylesheets         = False
+    #delay                  = 1
+    use_embedded_content   = False
+    encoding               = 'utf8'
+    publisher              = 'ABC News'
+    category               = 'News, Australia, World'
+    language               = 'en_AU'
+    publication_type       = 'newsportal'
+    preprocess_regexps     = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
+    conversion_options = {
+                             'comments'        : description
+                            ,'tags'            : category
+                            ,'language'        : language
+                            ,'publisher'       : publisher
+                            ,'linearize_tables': False
+                         }
+
+    keep_only_tags    =  dict(id='article')
+
+    remove_tags = [dict(attrs={'class':['related', 'tags']}),
+                     dict(id='statepromo')
+                        ]
+
+    remove_attributes = ['width','height']
+
+    feeds          = [
+                      ('Top Stories', 'http://www.abc.net.au/news/syndicate/topstoriesrss.xml'),
+                      ('Canberra', 'http://www.abc.net.au/news/indexes/idx-act/rss.xml'),
+                      ('Sydney', 'http://www.abc.net.au/news/indexes/sydney/rss.xml'),
+                      ('Melbourne', 'http://www.abc.net.au/news/indexes/melbourne/rss.xml'),
+                      ('Brisbane', 'http://www.abc.net.au/news/indexes/brisbane/rss.xml'),
+                      ('Perth', 'http://www.abc.net.au/news/indexes/perth/rss.xml'),
+                      ('Australia', 'http://www.abc.net.au/news/indexes/idx-australia/rss.xml'),
+                      ('World', 'http://www.abc.net.au/news/indexes/world/rss.xml'),
+                      ('Business', 'http://www.abc.net.au/news/indexes/business/rss.xml'),
+                      ('Science and Technology', 'http://www.abc.net.au/news/tag/science-and-technology/rss.xml'),
+                    ]
--- a/resources/recipes/business_spectator.recipe
+++ b/resources/recipes/business_spectator.recipe
@ -0,0 +1,48 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Dean Cording'
+'''
+abc.net.au/news
+'''
+import re
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class BusinessSpectator(BasicNewsRecipe):
+    title                  = 'Business Spectator'
+    __author__             = 'Dean Cording'
+    description            = 'Australian Business News & commentary delivered the way you want it.'
+    masthead_url           = 'http://www.businessspectator.com.au/bs.nsf/logo-business-spectator.gif'
+    cover_url              = masthead_url
+
+    oldest_article         = 2
+    max_articles_per_feed  = 100
+    no_stylesheets         = True
+    #delay                  = 1
+    use_embedded_content   = False
+    encoding               = 'utf8'
+    publisher              = 'Business Spectator'
+    category               = 'News, Australia, Business'
+    language               = 'en_AU'
+    publication_type       = 'newsportal'
+    preprocess_regexps     = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
+    conversion_options = {
+                             'comments'        : description
+                            ,'tags'            : category
+                            ,'language'        : language
+                            ,'publisher'       : publisher
+                            ,'linearize_tables': False
+                         }
+
+    keep_only_tags    =  [dict(id='storyHeader'), dict(id='body-html')]
+
+    remove_tags = [dict(attrs={'class':'hql'})]
+
+    remove_attributes = ['width','height','style']
+
+    feeds          = [
+                      ('Top Stories', 'http://www.businessspectator.com.au/top-stories.rss'),
+                      ('Alan Kohler', 'http://www.businessspectator.com.au/bs.nsf/RSS?readform&type=spectators&cat=Alan%20Kohler'),
+                      ('Robert Gottliebsen', 'http://www.businessspectator.com.au/bs.nsf/RSS?readform&type=spectators&cat=Robert%20Gottliebsen'),
+                      ('Stephen Bartholomeusz', 'http://www.businessspectator.com.au/bs.nsf/RSS?readform&type=spectators&cat=Stephen%20Bartholomeusz'),
+                      ('Daily Dossier', 'http://www.businessspectator.com.au/bs.nsf/RSS?readform&type=kgb&cat=dossier'),
+                      ('Australia', 'http://www.businessspectator.com.au/bs.nsf/RSS?readform&type=region&cat=australia'),
+                    ]
--- a/resources/recipes/esenja.recipe
+++ b/resources/recipes/esenja.recipe
@ -0,0 +1,87 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, matek09, matek09@gmail.com'
+
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
+class Esensja(BasicNewsRecipe):
+
+	title = u'Esensja'
+	__author__ = 'matek09'
+	description = 'Monthly magazine'
+	encoding = 'utf-8'
+	no_stylesheets = True
+	language = 'pl'
+	remove_javascript = True
+	HREF = '0'
+
+	#keep_only_tags =[]
+	#keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'article'})
+	remove_tags_before = dict(dict(name = 'div', attrs = {'class' : 't-title'}))
+	remove_tags_after = dict(dict(name = 'img', attrs = {'src' : '../../../2000/01/img/tab_bot.gif'}))
+
+	remove_tags =[]
+	remove_tags.append(dict(name = 'img', attrs = {'src' : '../../../2000/01/img/tab_top.gif'}))
+	remove_tags.append(dict(name = 'img', attrs = {'src' : '../../../2000/01/img/tab_bot.gif'}))
+	remove_tags.append(dict(name = 'div', attrs = {'class' : 't-title2 nextpage'}))
+
+	extra_css = '''
+					.t-title {font-size: x-large; font-weight: bold; text-align: left}
+					.t-author {font-size: x-small; text-align: left}
+					.t-title2 {font-size: x-small; font-style: italic; text-align: left}
+					.text {font-size: small; text-align: left}
+					.annot-ref {font-style: italic; text-align: left}
+				'''
+
+	preprocess_regexps = [(re.compile(r'alt="[^"]*"'),
+						lambda match: '')]
+
+	def parse_index(self):
+		soup = self.index_to_soup('http://www.esensja.pl/magazyn/')
+		a = soup.find('a', attrs={'href' : re.compile('.*/index.html')})
+		year = a['href'].split('/')[0]
+		month = a['href'].split('/')[1]
+		self.HREF = 'http://www.esensja.pl/magazyn/' + year + '/' + month + '/iso/'
+		soup = self.index_to_soup(self.HREF + '01.html')
+		self.cover_url = 'http://www.esensja.pl/magazyn/' + year + '/' + month + '/img/ilustr/cover_b.jpg'
+		feeds = []
+		intro = soup.find('div', attrs={'class' : 'n-title'})
+		introduction = {'title' : self.tag_to_string(intro.a),
+						'url' : self.HREF + intro.a['href'],
+						'date' : '',
+						'description' : ''}
+		chapter = 'Wprowadzenie'
+		subchapter = ''
+		articles = []
+		articles.append(introduction)
+		for tag in intro.findAllNext(attrs={'class': ['chapter', 'subchapter', 'n-title']}):
+			if tag.name in 'td':
+				if len(articles) > 0:
+					section = chapter
+					if len(subchapter) > 0:
+						section += ' - ' + subchapter
+					feeds.append((section, articles))
+					articles = []
+				if tag['class'] == 'chapter':
+					chapter = self.tag_to_string(tag).capitalize()
+					subchapter = ''
+				else:
+					subchapter = self.tag_to_string(tag)
+					subchapter = self.tag_to_string(tag)
+				continue
+			articles.append({'title' : self.tag_to_string(tag.a), 'url' : self.HREF + tag.a['href'], 'date' : '', 'description' : ''})
+
+			a = self.index_to_soup(self.HREF + tag.a['href'])
+			i = 1
+			while True:
+				div = a.find('div', attrs={'class' : 't-title2 nextpage'})
+				if div is not None:
+					a = self.index_to_soup(self.HREF + div.a['href'])
+					articles.append({'title' : self.tag_to_string(tag.a) + ' c. d. ' + str(i), 'url' : self.HREF + div.a['href'], 'date' : '', 'description' : ''})
+					i = i + 1
+				else:
+					break
+
+		return feeds
--- a/resources/recipes/fr_online.recipe
+++ b/resources/recipes/fr_online.recipe
@ -1,67 +1,61 @@
-__license__   = 'GPL v3'
-__copyright__ = '2009, Justus Bisser <justus.bisser at gmail.com>'
+#!/usr/bin/env  python
+
+__license__            = 'GPL v3'
+__copyright__          = '2010, Christian Schmitt'
+
 '''
 fr-online.de
 '''
-import re

-from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.web.feeds.recipes import BasicNewsRecipe

-class Spiegel_ger(BasicNewsRecipe):
-    title                 = 'Frankfurter Rundschau'
-    __author__            = 'Justus Bisser'
-    description           = "Dies ist die Online-Ausgabe der Frankfurter Rundschau. Um die abgerufenen individuell einzustellen bearbeiten sie die Liste im erweiterten Modus. Die Feeds findet man auf http://www.fr-online.de/verlagsservice/fr_newsreader/?em_cnt=574255"
-    publisher             = 'Druck- und Verlagshaus Frankfurt am Main GmbH'
-    category              = 'FR Online, Frankfurter Rundschau, Nachrichten, News,Dienste, RSS, RSS, Feedreader, Newsfeed, iGoogle, Netvibes, Widget'
-    oldest_article        = 7
-    max_articles_per_feed = 100
-    language              = 'de'
-    lang                  = 'de-DE'
-    no_stylesheets        = True
-    use_embedded_content  = False
-    #encoding              = 'cp1252'
+class FROnlineRecipe(BasicNewsRecipe):
+  title                  = 'Frankfurter Rundschau'
+  __author__             = 'maccs'
+  description            = 'Nachrichten aus D und aller Welt'
+  encoding               = 'utf-8'
+  masthead_url =  'http://www.fr-online.de/image/view/-/1474018/data/823552/-/logo.png'
+  publisher              = 'Druck- und Verlagshaus Frankfurt am Main GmbH'
+  category               = 'news, germany, world'
+  language               = 'de'
+  publication_type       = 'newspaper'
+  use_embedded_content   = False
+  remove_javascript      = True
+  no_stylesheets         = True
+  oldest_article         = 1   # Increase this number if you're interested in older articles
+  max_articles_per_feed  = 50  # Seems a reasonable number to me
+  extra_css              = '''
+                            body { font-family: "arial", "verdana", "geneva", sans-serif; font-size: 12px; margin: 0px; background-color: #ffffff;}
+                            .imgSubline{background-color: #f4f4f4; font-size: 0.8em;}
+                            .p--heading-1 {font-weight: bold;}
+                            .calibre_navbar {font-size: 0.8em; font-family: "arial", "verdana", "geneva", sans-serif;}
+                            '''
+  remove_tags            = [dict(name='div', attrs={'id':'Logo'})]
+  cover_url              = 'http://www.fr-online.de/image/view/-/1474018/data/823552/-/logo.png'
+  cover_margins          = (100, 150, '#ffffff')

-    conversion_options = {
-                          'comment'          : description
-                        , 'tags'             : category
-                        , 'publisher'        : publisher
-                        , 'language'         : lang
-                        }

-    recursions = 0
-    max_articles_per_feed = 100
-    #keep_only_tags = [dict(name='div', attrs={'class':'text'})]
-    #tags_remove = [dict(name='div', attrs={'style':'text-align: left; margin: 4px 0px 0px 4px; width: 200px; float: right;'})]
-    remove_attributes = ['style']
-    feeds = []
-    #remove_tags_before = [dict(name='div', attrs={'style':'padding-left: 0px;'})]
-    #remove_tags_after = [dict(name='div', attrs={'class':'box_head_text'})]
+  feeds = []
+  feeds.append(('Startseite', u'http://www.fr-online.de/home/-/1472778/1472778/-/view/asFeed/-/index.xml'))
+  feeds.append(('Politik', u'http://www.fr-online.de/politik/-/1472596/1472596/-/view/asFeed/-/index.xml'))
+  feeds.append(('Meinung', u'http://www.fr-online.de/politik/meinung/-/1472602/1472602/-/view/asFeed/-/index.xml'))
+  feeds.append(('Wirtschaft', u'http://www.fr-online.de/wirtschaft/-/1472780/1472780/-/view/asFeed/-/index.xml'))
+  feeds.append(('Sport', u'http://www.fr-online.de/sport/-/1472784/1472784/-/view/asFeed/-/index.xml'))
+  feeds.append(('Eintracht Frankfurt', u'http://www.fr-online.de/sport/eintracht-frankfurt/-/1473446/1473446/-/view/asFeed/-/index.xml'))
+  feeds.append(('Kultur und Medien', u'http://www.fr-online.de/kultur/-/1472786/1472786/-/view/asFeed/-/index.xml'))
+  feeds.append(('Panorama', u'http://www.fr-online.de/panorama/-/1472782/1472782/-/view/asFeed/-/index.xml'))
+  feeds.append(('Frankfurt', u'http://www.fr-online.de/frankfurt/-/1472798/1472798/-/view/asFeed/-/index.xml'))
+  feeds.append(('Rhein-Main', u'http://www.fr-online.de/rhein-main/-/1472796/1472796/-/view/asFeed/-/index.xml'))
+  feeds.append(('Hanau', u'http://www.fr-online.de/rhein-main/hanau/-/1472866/1472866/-/view/asFeed/-/index.xml'))
+  feeds.append(('Darmstadt', u'http://www.fr-online.de/rhein-main/darmstadt/-/1472858/1472858/-/view/asFeed/-/index.xml'))
+  feeds.append(('Wiesbaden', u'http://www.fr-online.de/rhein-main/wiesbaden/-/1472860/1472860/-/view/asFeed/-/index.xml'))
+  feeds.append(('Offenbach', u'http://www.fr-online.de/rhein-main/offenbach/-/1472856/1472856/-/view/asFeed/-/index.xml'))
+  feeds.append(('Bad Homburg', u'http://www.fr-online.de/rhein-main/bad-homburg/-/1472864/1472864/-/view/asFeed/-/index.xml'))
+  feeds.append(('Digital', u'http://www.fr-online.de/digital/-/1472406/1472406/-/view/asFeed/-/index.xml'))
+  feeds.append(('Wissenschaft', u'http://www.fr-online.de/wissenschaft/-/1472788/1472788/-/view/asFeed/-/index.xml'))

-    # enable for all news
-    allNews = 0
-    if allNews:
-        feeds = [(u'Frankfurter Rundschau', u'http://www.fr-online.de/rss/sport/index.xml')]
-    else:
-        #select the feeds you like
-        feeds = [(u'Nachrichten', u'http://www.fr-online.de/rss/politik/index.xml')]
-        feeds.append((u'Kommentare und Analysen', u'http://www.fr-online.de/rss/meinung/index.xml'))
-        feeds.append((u'Dokumentationen', u'http://www.fr-online.de/rss/dokumentation/index.xml'))
-        feeds.append((u'Deutschlandtrend', u'http://www.fr-online.de/rss/deutschlandtrend/index.xml'))
-        feeds.append((u'Wirtschaft', u'http://www.fr-online.de/rss/wirtschaft/index.xml'))
-        feeds.append((u'Sport', u'http://www.fr-online.de/rss/sport/index.xml'))
-        feeds.append((u'Feuilleton', u'http://www.fr-online.de/rss/feuilleton/index.xml'))
-        feeds.append((u'Panorama', u'http://www.fr-online.de/rss/panorama/index.xml'))
-        feeds.append((u'Rhein Main und Hessen', u'http://www.fr-online.de/rss/hessen/index.xml'))
-        feeds.append((u'Fitness und Gesundheit', u'http://www.fr-online.de/rss/fit/index.xml'))
-        feeds.append((u'Multimedia', u'http://www.fr-online.de/rss/multimedia/index.xml'))
-        feeds.append((u'Wissen und Bildung', u'http://www.fr-online.de/rss/wissen/index.xml'))

-    def get_article_url(self, article):
-        url = article.link
-        regex = re.compile("0C[0-9]{6,8}0A?")
+  def print_version(self, url):
+    return url.replace('index.html', 'view/printVersion/-/index.html')

-        liste = regex.findall(url)
-        string = liste.pop(0)
-        string = string[2:len(string)-1]
-        return "http://www.fr-online.de/_em_cms/_globals/print.php?em_cnt=" + string

--- a/resources/recipes/histmag.recipe
+++ b/resources/recipes/histmag.recipe
@ -0,0 +1,59 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, matek09, matek09@gmail.com'
+
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
+class Histmag(BasicNewsRecipe):
+
+	title = u'Histmag'
+	__author__ = 'matek09'
+	description = u"Artykuly historyczne i publicystyczne"
+	encoding = 'utf-8'
+	no_stylesheets = True
+	language = 'pl'
+	remove_javascript = True
+	#max_articles_per_feed = 1
+	remove_tags_before = dict(dict(name = 'div', attrs = {'id' : 'article'}))
+	remove_tags_after = dict(dict(name = 'h2', attrs = {'class' : 'komentarze'}))
+	#keep_only_tags =[]
+	#keep_only_tags.append(dict(name = 'h2'))
+	#keep_only_tags.append(dict(name = 'p'))
+
+	remove_tags =[]
+	remove_tags.append(dict(name = 'p', attrs = {'class' : 'podpis'}))
+	remove_tags.append(dict(name = 'h2', attrs = {'class' : 'komentarze'}))
+	remove_tags.append(dict(name = 'img', attrs = {'src' : 'style/buttons/wesprzyjnas-1.jpg'}))
+
+	preprocess_regexps = [(re.compile(r'</span>'), lambda match: '</span><br><br>'),
+						(re.compile(r'<span>'), lambda match: '<br><br><span>')]
+	extra_css = '''
+					.left {font-size: x-small}
+					.right {font-size: x-small}
+				'''
+
+	def find_articles(self, soup):
+		articles = []
+		for div in soup.findAll('div', attrs={'class' : 'text'}):
+			articles.append({
+				'title' : self.tag_to_string(div.h3.a),
+				'url'   : 'http://www.histmag.org/' + div.h3.a['href'],
+				'date'  : self.tag_to_string(div.next('p')).split('|')[0],
+				'description' : self.tag_to_string(div.next('p', podpis=False)),
+				})
+		return articles
+
+	def parse_index(self):
+		soup = self.index_to_soup('http://histmag.org/?arc=4&dx=0')
+		feeds = []
+		feeds.append((u"Artykuly historyczne", self.find_articles(soup)))
+		soup = self.index_to_soup('http://histmag.org/?arc=5&dx=0')
+		feeds.append((u"Artykuly publicystyczne", self.find_articles(soup)))
+		soup = self.index_to_soup('http://histmag.org/?arc=1&dx=0')
+		feeds.append((u"Wydarzenia", self.find_articles(soup)))
+
+		return feeds
+
+
--- a/resources/recipes/newsweek_polska.recipe
+++ b/resources/recipes/newsweek_polska.recipe
@ -1,19 +1,22 @@
 #!/usr/bin/env  python

 __license__   = 'GPL v3'
-__copyright__ = '2010, Mateusz Kielar, matek09@gmail.com'
+__copyright__ = '2010, matek09, matek09@gmail.com'

 from calibre.web.feeds.news import BasicNewsRecipe

 class Newsweek(BasicNewsRecipe):
-	EDITION = 0
+	FIND_LAST_FULL_ISSUE = True
+	EDITION = '0'
+	EXCLUDE_LOCKED = True
+	LOCKED_ICO = 'http://www.newsweek.pl/bins/media/static/newsweek/img/ico_locked.gif'

 	title = u'Newsweek Polska'
-	__author__ = 'Mateusz Kielar'
+	__author__ = 'matek09'
 	description = 'Weekly magazine'
 	encoding = 'utf-8'
 	no_stylesheets = True
-	language = 'en'
+	language = 'pl'
 	remove_javascript = True

 	keep_only_tags =[]
@ -33,34 +36,54 @@ class Newsweek(BasicNewsRecipe):
 	def print_version(self, url):
 		return url.replace("http://www.newsweek.pl/artykuly/wydanie/" + str(self.EDITION), "http://www.newsweek.pl/artykuly") + '/print'

+	def is_locked(self, a):
+		if a.findNext('img')['src'] == 'http://www.newsweek.pl/bins/media/static/newsweek/img/ico_locked.gif':
+			return True
+		else:
+			return False
+
+	def is_full(self, issue_soup):
+		if len(issue_soup.findAll('img', attrs={'src' : 'http://www.newsweek.pl/bins/media/static/newsweek/img/ico_locked.gif'})) > 1:
+			return False
+		else:
+			return True
+
 	def find_last_full_issue(self):
-		page = self.index_to_soup('http://www.newsweek.pl/Frames/IssueCover.aspx')
-		issue = 'http://www.newsweek.pl/Frames/' + page.find(lambda tag: tag.name == 'span' and not tag.attrs).a['href']
-		page = self.index_to_soup(issue)
-		issue = 'http://www.newsweek.pl/Frames/' + page.find(lambda tag: tag.name == 'span' and not tag.attrs).a['href']
-		page = self.index_to_soup(issue)
-		self.EDITION = page.find('a', attrs={'target' : '_parent'})['href'].replace('/wydania/','')
+		frame_url = 'http://www.newsweek.pl/Frames/IssueCover.aspx'
+		while True:
+			frame_soup = self.index_to_soup(frame_url)
+			self.EDITION = frame_soup.find('a', attrs={'target' : '_parent'})['href'].replace('/wydania/','')
+			issue_soup = self.index_to_soup('http://www.newsweek.pl/wydania/' + self.EDITION)
+			if self.is_full(issue_soup):
+				break
+			frame_url = 'http://www.newsweek.pl/Frames/' + frame_soup.find(lambda tag: tag.name == 'span' and not tag.attrs).a['href']
+
+

 	def parse_index(self):
-		self.find_last_full_issue()
-		soup = self.index_to_soup('http://www.newsweek.pl/wydania/' + str(self.EDITION))
+		if self.FIND_LAST_FULL_ISSUE:
+			self.find_last_full_issue()
+		soup = self.index_to_soup('http://www.newsweek.pl/wydania/' + self.EDITION)
 		img = soup.find('img', id="ctl00_C1_PaperIsssueView_IssueImage", src=True)
 		self.cover_url = img['src']
 		feeds = []
 		parent = soup.find(id='content-left-big')
 		for txt in parent.findAll(attrs={'class':'txt_normal_red strong'}):
-			section = self.tag_to_string(txt).capitalize()
 			articles = list(self.find_articles(txt))
-			feeds.append((section, articles))
+			if len(articles) > 0:
+				section = self.tag_to_string(txt).capitalize()
+				feeds.append((section, articles))
 		return feeds

 	def find_articles(self, txt):
 		for a in txt.findAllNext( attrs={'class':['strong','hr']}):
 			if a.name in "div":
 				break
+			if (not self.FIND_LAST_FULL_ISSUE) & self.EXCLUDE_LOCKED & self.is_locked(a):
+				continue
 			yield {
 				'title' : self.tag_to_string(a),
-				'url'   : 'http://www.newsweek.pl'+a['href'],
+				'url'   : 'http://www.newsweek.pl' + a['href'],
 				'date'  : '',
 				'description' : ''
 				}
--- a/resources/recipes/nin.recipe
+++ b/resources/recipes/nin.recipe
@ -8,12 +8,15 @@ www.nin.co.rs
 import re
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
+from contextlib import nested, closing
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, CData, Tag
+from calibre import entity_to_unicode

 class Nin(BasicNewsRecipe):
    title                  = 'NIN online'
    __author__             = 'Darko Miletic'
    description            = 'Nedeljne Informativne Novine'
-    publisher              = 'NIN d.o.o.'
+    publisher              = 'NIN d.o.o. - Ringier d.o.o.'
    category               = 'news, politics, Serbia'
    no_stylesheets         = True
    delay                  = 1
@ -26,18 +29,29 @@ class Nin(BasicNewsRecipe):
    use_embedded_content   = False
    language               = 'sr'
    publication_type       = 'magazine'
-    extra_css              = ' @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: Verdana, Lucida, sans1, sans-serif} .article_description{font-family: Verdana, Lucida, sans1, sans-serif} .artTitle{font-size: x-large; font-weight: bold; color: #900} .izjava{font-size: x-large; font-weight: bold} .columnhead{font-size: small; font-weight: bold;} img{margin-top:0.5em; margin-bottom: 0.7em} b{margin-top: 1em} '
+    extra_css              = """ 
+                                 @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
+                                 body{font-family: Verdana, Lucida, sans1, sans-serif} 
+                                 .article_description{font-family: Verdana, Lucida, sans1, sans-serif} 
+                                 .artTitle{font-size: x-large; font-weight: bold; color: #900} 
+                                 .izjava{font-size: x-large; font-weight: bold} 
+                                 .columnhead{font-size: small; font-weight: bold;} 
+                                 img{margin-top:0.5em; margin-bottom: 0.7em; display: block} 
+                                 b{margin-top: 1em}
+                             """

    conversion_options = {
-                          'comment'          : description
-                        , 'tags'             : category
-                        , 'publisher'        : publisher
-                        , 'language'         : language
-                        , 'linearize_tables' : True
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
                        }

-    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
-    remove_attributes = ['height','width']
+    preprocess_regexps = [
+                           (re.compile(r'</body>.*?<html>', re.DOTALL|re.IGNORECASE),lambda match: '</body>')
+                          ,(re.compile(r'</html>.*?</html>', re.DOTALL|re.IGNORECASE),lambda match: '</html>')
+                          ,(re.compile(u'\u0110'), lambda match: u'\u00D0')
+                         ]

    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
@ -50,7 +64,10 @@ class Nin(BasicNewsRecipe):
        return br

    keep_only_tags    =[dict(name='td', attrs={'width':'520'})]
+    remove_tags_before =dict(name='span', attrs={'class':'izjava'})
    remove_tags_after =dict(name='html')
+    remove_tags = [dict(name=['object','link','iframe','meta','base'])]
+    remove_attributes=['border','background','height','width','align','valign']

    def get_cover_url(self):
        cover_url = None
@ -63,7 +80,7 @@ class Nin(BasicNewsRecipe):
    def parse_index(self):
        articles = []
        count = 0
-        soup = self.index_to_soup(self.PREFIX)
+        soup = self.index_to_soup(self.INDEX)
        for item in soup.findAll('a',attrs={'class':'lmeninavFont'}):
            count = count +1
            if self.test and count > 2:
@ -90,3 +107,45 @@ class Nin(BasicNewsRecipe):
            articles.append((section,inarts))
        return articles

+    def index_to_soup(self, url_or_raw, raw=False):
+        if re.match(r'\w+://', url_or_raw):
+            open_func = getattr(self.browser, 'open_novisit', self.browser.open)
+            with closing(open_func(url_or_raw)) as f:
+                _raw = f.read()
+            if not _raw:
+                raise RuntimeError('Could not fetch index from %s'%url_or_raw)
+        else:
+            _raw = url_or_raw
+        if raw:
+            return _raw
+        if not isinstance(_raw, unicode) and self.encoding:
+            if callable(self.encoding):
+                _raw = self.encoding(_raw)
+            else:
+                _raw = _raw.decode(self.encoding, 'replace')
+        massage = list(BeautifulSoup.MARKUP_MASSAGE)
+        enc = 'cp1252' if callable(self.encoding) or self.encoding is None else self.encoding
+        massage.append((re.compile(r'&(\S+?);'), lambda match:
+            entity_to_unicode(match, encoding=enc)))
+        massage.append((re.compile(r'[\x00-\x08]+'), lambda match:
+            ''))
+        return BeautifulSoup(_raw, markupMassage=massage)
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        for item in soup.findAll('div'):
+            if len(item.contents) == 0:
+               item.extract()
+        for item in soup.findAll(['td','tr']):
+            item.name='div'
+        for item in soup.findAll('img'):
+            if not item.has_key('alt'):
+               item['alt'] = 'image'
+        for tbl in soup.findAll('table'):
+            img = tbl.find('img')
+            if img:
+               img.extract()
+               tbl.replaceWith(img)
+        return soup
+        
--- a/resources/recipes/polityka.recipe
+++ b/resources/recipes/polityka.recipe
@ -1,18 +1,18 @@
 #!/usr/bin/env  python

 __license__   = 'GPL v3'
-__copyright__ = '2010, Mateusz Kielar, matek09@gmail.com'
+__copyright__ = '2010, matek09, matek09@gmail.com'

 from calibre.web.feeds.news import BasicNewsRecipe

 class Polityka(BasicNewsRecipe):

 	title = u'Polityka'
-	__author__ = 'Mateusz Kielar'
+	__author__ = 'matek09'
 	description = 'Weekly magazine. Last archive issue'
 	encoding = 'utf-8'
 	no_stylesheets = True
-	language = 'en'
+	language = 'pl'
 	remove_javascript = True

 	remove_tags_before = dict(dict(name = 'h2', attrs = {'class' : 'box_nag'}))
@ -48,7 +48,6 @@ class Polityka(BasicNewsRecipe):
 				for div in box.findAll('div', attrs={'class': 'list_tresc'}):
 					article_page = self.index_to_soup('http://archiwum.polityka.pl' + div.a['href'],)
 					section = self.tag_to_string(article_page.find('h2', attrs = {'class' : 'box_nag'})).split('/')[0].lstrip().rstrip()
-					print section
 					if not articles.has_key(section):
 						articles[section] = []
 					articles[section].append( {
--- a/resources/recipes/wprost.recipe
+++ b/resources/recipes/wprost.recipe
@ -0,0 +1,91 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, matek09, matek09@gmail.com'
+
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
+class Wprost(BasicNewsRecipe):
+	EDITION = 0
+	FIND_LAST_FULL_ISSUE = True
+	EXCLUDE_LOCKED = True
+	ICO_BLOCKED = 'http://www.wprost.pl/G/icons/ico_blocked.gif'
+
+	title = u'Wprost'
+	__author__ = 'matek09'
+	description = 'Weekly magazine'
+	encoding = 'ISO-8859-2'
+	no_stylesheets = True
+	language = 'pl'
+	remove_javascript = True
+
+	remove_tags_before = dict(dict(name = 'div', attrs = {'id' : 'print-layer'}))
+	remove_tags_after = dict(dict(name = 'div', attrs = {'id' : 'print-layer'}))
+
+	'''keep_only_tags =[]
+	keep_only_tags.append(dict(name = 'table', attrs = {'id' : 'title-table'}))
+	keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'div-header'}))
+	keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'div-content'}))
+	keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'def element-autor'}))'''
+
+	preprocess_regexps = [(re.compile(r'style="display: none;"'), lambda match: ''),
+						(re.compile(r'display: block;'), lambda match: '')]
+
+
+	remove_tags =[]
+	remove_tags.append(dict(name = 'div', attrs = {'class' : 'def element-date'}))
+	remove_tags.append(dict(name = 'div', attrs = {'class' : 'def silver'}))
+	remove_tags.append(dict(name = 'div', attrs = {'id' : 'content-main-column-right'}))
+
+
+	extra_css = '''
+					.div-header {font-size: x-small; font-weight: bold}
+					'''
+#h2 {font-size: x-large; font-weight: bold}
+	def is_blocked(self, a):
+		if a.findNextSibling('img') is None:
+			return False
+		else:
+			return True
+
+
+
+	def find_last_issue(self):
+		soup = self.index_to_soup('http://www.wprost.pl/archiwum/')
+		a = 0
+		if self.FIND_LAST_FULL_ISSUE:
+			ico_blocked = soup.findAll('img', attrs={'src' : self.ICO_BLOCKED})
+			a = ico_blocked[-1].findNext('a', attrs={'title' : re.compile('Zobacz spis tre.ci')})
+		else:
+			a = soup.find('a', attrs={'title' : re.compile('Zobacz spis tre.ci')})
+		self.EDITION = a['href'].replace('/tygodnik/?I=', '')
+		self.cover_url = a.img['src']
+
+
+
+	def parse_index(self):
+		self.find_last_issue()
+		soup = self.index_to_soup('http://www.wprost.pl/tygodnik/?I=' + self.EDITION)
+		feeds = []
+		for main_block in soup.findAll(attrs={'class':'main-block-s3 s3-head head-red3'}):
+			articles = list(self.find_articles(main_block))
+			if len(articles) > 0:
+				section = self.tag_to_string(main_block)
+				feeds.append((section, articles))
+		return feeds
+
+	def find_articles(self, main_block):
+		for a in main_block.findAllNext( attrs={'style':['','padding-top: 15px;']}):
+			if a.name in "td":
+				break
+			if self.EXCLUDE_LOCKED & self.is_blocked(a):
+				continue
+			yield {
+				'title' : self.tag_to_string(a),
+				'url'   : 'http://www.wprost.pl' + a['href'],
+				'date'  : '',
+				'description' : ''
+				}
+
+
--- a/setup/installer/linux/freeze2.py
+++ b/setup/installer/linux/freeze2.py
@ -340,6 +340,8 @@ class LinuxFreeze(Command):
                __builtin__.help = _Helper()

            def set_qt_plugin_path():
+                import uuid
+                uuid.uuid4() # Workaround for libuuid/PyQt conflict
                from PyQt4.Qt import QCoreApplication
                paths = list(map(unicode, QCoreApplication.libraryPaths()))
                paths.insert(0, sys.frozen_path + '/lib/qt_plugins')
--- a/src/calibre/init.py
+++ b/src/calibre/init.py
@ -3,7 +3,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import uuid, sys, os, re, logging, time, mimetypes, \
+import uuid, sys, os, re, logging, time, \
       __builtin__, warnings, multiprocessing
 from urllib import getproxies
 __builtin__.__dict__['dynamic_property'] = lambda(func): func(None)
@ -19,43 +19,18 @@ from calibre.constants import iswindows, isosx, islinux, isfreebsd, isfrozen, \
                              __appname__, __version__, __author__, \
                              win32event, win32api, winerror, fcntl, \
                              filesystem_encoding, plugins, config_dir
-from calibre.startup import winutil, winutilerror
+from calibre.startup import winutil, winutilerror, guess_type

-uuid.uuid4() # Imported before PyQt4 to workaround PyQt4 util-linux conflict on gentoo
+if islinux and not getattr(sys, 'frozen', False):
+    # Imported before PyQt4 to workaround PyQt4 util-linux conflict on gentoo
+    uuid.uuid4()

 if False:
+    # Prevent pyflakes from complaining
    winutil, winutilerror, __appname__, islinux, __version__
    fcntl, win32event, isfrozen, __author__, terminal_controller
-    winerror, win32api, isfreebsd
+    winerror, win32api, isfreebsd, guess_type

-mimetypes.add_type('application/epub+zip',                '.epub')
-mimetypes.add_type('text/x-sony-bbeb+xml',                '.lrs')
-mimetypes.add_type('application/xhtml+xml',               '.xhtml')
-mimetypes.add_type('image/svg+xml',                       '.svg')
-mimetypes.add_type('text/fb2+xml',                        '.fb2')
-mimetypes.add_type('application/x-sony-bbeb',             '.lrf')
-mimetypes.add_type('application/x-sony-bbeb',             '.lrx')
-mimetypes.add_type('application/x-dtbncx+xml',            '.ncx')
-mimetypes.add_type('application/adobe-page-template+xml', '.xpgt')
-mimetypes.add_type('application/x-font-opentype',         '.otf')
-mimetypes.add_type('application/x-font-truetype',         '.ttf')
-mimetypes.add_type('application/oebps-package+xml',       '.opf')
-mimetypes.add_type('application/vnd.palm',                '.pdb')
-mimetypes.add_type('application/x-mobipocket-ebook',      '.mobi')
-mimetypes.add_type('application/x-mobipocket-ebook',      '.prc')
-mimetypes.add_type('application/x-mobipocket-ebook',      '.azw')
-mimetypes.add_type('application/x-cbz',                   '.cbz')
-mimetypes.add_type('application/x-cbr',                   '.cbr')
-mimetypes.add_type('application/x-koboreader-ebook',      '.kobo')
-mimetypes.add_type('image/wmf',                           '.wmf')
-mimetypes.add_type('image/jpeg',                          '.jpg')
-mimetypes.add_type('image/jpeg',                          '.jpeg')
-mimetypes.add_type('image/png',                           '.png')
-mimetypes.add_type('image/gif',                           '.gif')
-mimetypes.add_type('image/bmp',                           '.bmp')
-mimetypes.add_type('image/svg+xml',                       '.svg')
-
-guess_type = mimetypes.guess_type
 import cssutils
 cssutils.log.setLevel(logging.WARN)

--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -19,7 +19,7 @@ class ANDROID(USBMS):

    VENDOR_ID   = {
            # HTC
-            0x0bb4 : { 0x0c02 : [0x100, 0x0227], 0x0c01 : [0x100, 0x0227], 0x0ff9
+            0x0bb4 : { 0x0c02 : [0x100, 0x0227, 0x0226], 0x0c01 : [0x100, 0x0227], 0x0ff9
                : [0x0100, 0x0227, 0x0226], 0x0c87: [0x0100, 0x0227, 0x0226],
                0xc92 : [0x100]},

--- a/src/calibre/ebooks/fb2/fb2ml.py
+++ b/src/calibre/ebooks/fb2/fb2ml.py
@ -91,6 +91,10 @@ class FB2MLizer(object):
        return u'<?xml version="1.0" encoding="UTF-8"?>\n%s' % etree.tostring(etree.fromstring(output), encoding=unicode, pretty_print=True)

    def clean_text(self, text):
+        text = re.sub(r'(?miu)<section>\s*</section>', '', text)
+        text = re.sub(r'(?miu)\s+</section>', '</section>', text)
+        text = re.sub(r'(?miu)</section><section>', '</section>\n\n<section>', text)
+
        text = re.sub(r'(?miu)<p>\s*</p>', '', text)
        text = re.sub(r'(?miu)\s+</p>', '</p>', text)
        text = re.sub(r'(?miu)</p><p>', '</p>\n\n<p>', text)
@ -166,11 +170,15 @@ class FB2MLizer(object):

    def get_text(self):
        text = []
-        for item in self.oeb_book.spine:
+        for i, item in enumerate(self.oeb_book.spine):
+            if self.opts.sectionize_chapters_using_file_structure and i is not 0:
+                text.append('<section>')
            self.log.debug('Converting %s to FictionBook2 XML' % item.href)
            stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
            text.append(self.add_page_anchor(item))
            text += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
+            if self.opts.sectionize_chapters_using_file_structure and i is not len(self.oeb_book.spine) - 1:
+                text.append('</section>')
        return ''.join(text)

    def fb2_body_footer(self):
@ -258,6 +266,10 @@ class FB2MLizer(object):
        if id_name:
            fb2_text.append(self.get_anchor(page, id_name))

+        if tag == 'h1' and self.opts.h1_to_title or tag == 'h2' and self.opts.h2_to_title or tag == 'h3' and self.opts.h3_to_title:
+            fb2_text.append('<title>')
+            tags.append('title')
+
        fb2_tag = TAG_MAP.get(tag, None)
        if fb2_tag == 'p':
            if 'p' in tag_stack+tags:
--- a/src/calibre/ebooks/fb2/output.py
+++ b/src/calibre/ebooks/fb2/output.py
@ -25,6 +25,20 @@ class FB2Output(OutputFormatPlugin):
                   'WARNING: ' \
                   'This option is experimental. It can cause conversion ' \
                   'to fail. It can also produce unexpected output.')),
+        OptionRecommendation(name='sectionize_chapters_using_file_structure',
+            recommended_value=False, level=OptionRecommendation.LOW,
+            help=_('Try to turn chapters into individual sections using the ' \
+                   'internal structure of the ebook. This works well for EPUB ' \
+                   'books that have been internally split by chapter.')),
+        OptionRecommendation(name='h1_to_title',
+            recommended_value=False, level=OptionRecommendation.LOW,
+            help=_('Wrap all h1 tags with fb2 title elements.')),
+        OptionRecommendation(name='h2_to_title',
+            recommended_value=False, level=OptionRecommendation.LOW,
+            help=_('Wrap all h2 tags with fb2 title elements.')),
+        OptionRecommendation(name='h3_to_title',
+            recommended_value=False, level=OptionRecommendation.LOW,
+            help=_('Wrap all h3 tags with fb2 title elements.')),
    ])

    def convert(self, oeb_book, output_path, input_plugin, opts, log):
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@ -504,6 +504,9 @@ class MobiReader(object):
            'x-large': '5',
            'xx-large': '6',
            }
+        def barename(x):
+            return x.rpartition(':')[-1]
+
        mobi_version = self.book_header.mobi_version
        for x in root.xpath('//ncx'):
            x.getparent().remove(x)
@ -512,8 +515,9 @@ class MobiReader(object):
            for x in tag.attrib:
                if ':' in x:
                    del tag.attrib[x]
-            if tag.tag in ('country-region', 'place', 'placetype', 'placename',
-                'state', 'city', 'street', 'address', 'content', 'form'):
+            if tag.tag and barename(tag.tag.lower()) in \
+                ('country-region', 'place', 'placetype', 'placename',
+                    'state', 'city', 'street', 'address', 'content', 'form'):
                tag.tag = 'div' if tag.tag in ('content', 'form') else 'span'
                for key in tag.attrib.keys():
                    tag.attrib.pop(key)
--- a/src/calibre/ebooks/oeb/transforms/jacket.py
+++ b/src/calibre/ebooks/oeb/transforms/jacket.py
@ -93,7 +93,7 @@ class Jacket(object):

 # Render Jacket {{{

-def get_rating(rating, rchar):
+def get_rating(rating, rchar, e_rchar):
    ans = ''
    try:
        num = float(rating)/2
@ -104,12 +104,12 @@ def get_rating(rating, rchar):
    if num < 1:
        return ans

-    ans = rchar * int(num)
+    ans = ("%s%s") % (rchar * int(num), e_rchar * (5 - int(num)))
    return ans

-
 def render_jacket(mi, output_profile,
-        alt_title=_('Unknown'), alt_tags=[], alt_comments=''):
+        alt_title=_('Unknown'), alt_tags=[], alt_comments='',
+        alt_publisher=('Unknown publisher')):
    css = P('jacket/stylesheet.css', data=True).decode('utf-8')

    try:
@ -124,12 +124,17 @@ def render_jacket(mi, output_profile,
    if not mi.series:
        series = ''

+    try:
+        publisher = mi.publisher if mi.publisher else alt_publisher
+    except:
+        publisher = _('Unknown publisher')
+
    try:
        pubdate = strftime(u'%Y', mi.pubdate.timetuple())
    except:
        pubdate = ''

-    rating = get_rating(mi.rating, output_profile.ratings_char)
+    rating = get_rating(mi.rating, output_profile.ratings_char, output_profile.empty_ratings_char)

    tags = mi.tags if mi.tags else alt_tags
    if tags:
@ -154,6 +159,7 @@ def render_jacket(mi, output_profile,
                    css=css,
                    title=title,
                    author=author,
+                    publisher=publisher,
                    pubdate_label=_('Published'), pubdate=pubdate,
                    series_label=_('Series'), series=series,
                    rating_label=_('Rating'), rating=rating,
@ -168,16 +174,16 @@ def render_jacket(mi, output_profile,
        # Post-process the generated html to strip out empty header items
        soup = BeautifulSoup(generated_html)
        if not series:
-            series_tag = soup.find('tr', attrs={'class':'cbj_series'})
+            series_tag = soup.find(attrs={'class':'cbj_series'})
            series_tag.extract()
        if not rating:
-            rating_tag = soup.find('tr', attrs={'class':'cbj_rating'})
+            rating_tag = soup.find(attrs={'class':'cbj_rating'})
            rating_tag.extract()
        if not tags:
-            tags_tag = soup.find('tr', attrs={'class':'cbj_tags'})
+            tags_tag = soup.find(attrs={'class':'cbj_tags'})
            tags_tag.extract()
        if not pubdate:
-            pubdate_tag = soup.find('tr', attrs={'class':'cbj_pubdate'})
+            pubdate_tag = soup.find(attrs={'class':'cbj_pubdate'})
            pubdate_tag.extract()
        if output_profile.short_name != 'kindle':
            hr_tag = soup.find('hr', attrs={'class':'cbj_kindle_banner_hr'})
--- a/src/calibre/ebooks/pml/pmlml.py
+++ b/src/calibre/ebooks/pml/pmlml.py
@ -216,7 +216,9 @@ class PMLMLizer(object):
            w = '\\w'
            width = elem.get('width')
            if width:
-                w += '="%s%%"' % width
+                if not width.endswith('%'):
+                    width += '%'
+                w += '="%s"' % width
            else:
                w += '="50%"'
            text.append(w)
@ -252,8 +254,8 @@ class PMLMLizer(object):
                    if href not in self.link_hrefs.keys():
                        self.link_hrefs[href] = 'calibre_link-%s' % len(self.link_hrefs.keys())
                    href = '#%s' % self.link_hrefs[href]
-                text.append('\\q="%s"' % href)
-                tags.append('q')
+                    text.append('\\q="%s"' % href)
+                    tags.append('q')

        # Anchor ids
        id_name = elem.get('id')
--- a/src/calibre/gui2/actions/catalog.py
+++ b/src/calibre/gui2/actions/catalog.py
@ -37,7 +37,8 @@ class GenerateCatalogAction(InterfaceAction):
 			dbspec[id] = {'ondevice': db.ondevice(id, index_is_id=True)}

        # Calling gui2.tools:generate_catalog()
-        ret = generate_catalog(self.gui, dbspec, ids, self.gui.device_manager)
+        ret = generate_catalog(self.gui, dbspec, ids, self.gui.device_manager,
+                db)
        if ret is None:
            return

--- a/src/calibre/gui2/actions/view.py
+++ b/src/calibre/gui2/actions/view.py
@ -12,7 +12,7 @@ from PyQt4.Qt import Qt, QMenu

 from calibre.constants import isosx
 from calibre.gui2 import error_dialog, Dispatcher, question_dialog, config, \
-        open_local_file
+        open_local_file, info_dialog
 from calibre.gui2.dialogs.choose_format import ChooseFormatDialog
 from calibre.utils.config import prefs
 from calibre.ptempfile import PersistentTemporaryFile
@ -89,18 +89,34 @@ class ViewAction(InterfaceAction):
        self._launch_viewer(name, viewer, internal)

    def view_specific_format(self, triggered):
-        rows = self.gui.library_view.selectionModel().selectedRows()
+        rows = list(self.gui.library_view.selectionModel().selectedRows())
        if not rows or len(rows) == 0:
            d = error_dialog(self.gui, _('Cannot view'), _('No book selected'))
            d.exec_()
            return

-        row = rows[0].row()
-        formats = self.gui.library_view.model().db.formats(row).upper().split(',')
-        d = ChooseFormatDialog(self.gui, _('Choose the format to view'), formats)
+        db = self.gui.library_view.model().db
+        rows = [r.row() for r in rows]
+        formats = [db.formats(row) for row in rows]
+        formats = [list(f.upper().split(',')) if f else None for f in formats]
+        all_fmts = set([])
+        for x in formats:
+            for f in x: all_fmts.add(f)
+        d = ChooseFormatDialog(self.gui, _('Choose the format to view'),
+                list(sorted(all_fmts)))
        if d.exec_() == d.Accepted:
-            format = d.format()
-            self.view_format(row, format)
+            fmt = d.format()
+            orig_num = len(rows)
+            rows = [rows[i] for i in range(len(rows)) if formats[i] and fmt in
+                    formats[i]]
+            if self._view_check(len(rows)):
+                for row in rows:
+                    self.view_format(row, fmt)
+                if len(rows) < orig_num:
+                    info_dialog(self.gui, _('Format unavailable'),
+                            _('Not all the selected books were available in'
+                                ' the %s format. You should convert'
+                                ' them first.')%fmt, show=True)

    def _view_check(self, num, max_=3):
        if num <= max_:
--- a/src/calibre/gui2/book_details.py
+++ b/src/calibre/gui2/book_details.py
@ -208,8 +208,9 @@ class BookInfo(QWebView):
        rows = u'\n'.join([u'<tr><td valign="top"><b>%s:</b></td><td valign="top">%s</td></tr>'%(k,t) for
            k, t in rows])
        comments = data.get(_('Comments'), '')
-        if comments and comments != u'None':
-            self.renderer.queue.put((rows, comments))
+        if not comments or comments == u'None':
+            comments = ''
+        self.renderer.queue.put((rows, comments))
        self._show_data(rows, '')


--- a/src/calibre/gui2/catalog/catalog_bibtex.py
+++ b/src/calibre/gui2/catalog/catalog_bibtex.py
@ -34,7 +34,7 @@ class PluginWidget(QWidget, Ui_Form):
                self.all_fields.append(x)
                QListWidgetItem(x, self.db_fields)

-    def initialize(self, name): #not working properly to update
+    def initialize(self, name, db): #not working properly to update
        self.name = name
        fields = gprefs.get(name+'_db_fields', self.all_fields)
        # Restore the activated db_fields from last use
--- a/src/calibre/gui2/catalog/catalog_csv_xml.py
+++ b/src/calibre/gui2/catalog/catalog_csv_xml.py
@ -28,7 +28,7 @@ class PluginWidget(QWidget, Ui_Form):
                self.all_fields.append(x)
                QListWidgetItem(x, self.db_fields)

-    def initialize(self, name):
+    def initialize(self, name, db):
        self.name = name
        fields = gprefs.get(name+'_db_fields', self.all_fields)
        # Restore the activated fields from last use
--- a/src/calibre/gui2/catalog/catalog_epub_mobi.py
+++ b/src/calibre/gui2/catalog/catalog_epub_mobi.py
@ -7,10 +7,11 @@ __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'


-from calibre.gui2 import gprefs
-from catalog_epub_mobi_ui import Ui_Form
 from calibre.ebooks.conversion.config import load_defaults
-from PyQt4.Qt import QWidget
+from calibre.gui2 import gprefs
+
+from catalog_epub_mobi_ui import Ui_Form
+from PyQt4.Qt import QWidget, QLineEdit

 class PluginWidget(QWidget,Ui_Form):

@ -23,7 +24,8 @@ class PluginWidget(QWidget,Ui_Form):
                     ('generate_recently_added', True),
                     ('note_tag','*'),
                     ('numbers_as_text', False),
-                     ('read_tag','+'),
+                     ('read_pattern','+'),
+                     ('read_source_field_cb','Tag'),
                     ('wishlist_tag','Wishlist'),
                     ]

@ -38,16 +40,54 @@ class PluginWidget(QWidget,Ui_Form):
        QWidget.__init__(self, parent)
        self.setupUi(self)

-    def initialize(self, name):
+    def initialize(self, name, db):
        self.name = name
+
+        # Populate the 'Read book' source fields
+        all_custom_fields = db.custom_field_keys()
+        custom_fields = {}
+        custom_fields['Tag'] = {'field':'tag', 'datatype':u'text'}
+        for custom_field in all_custom_fields:
+            field_md = db.metadata_for_field(custom_field)
+            if field_md['datatype'] in ['bool','composite','datetime','text']:
+                custom_fields[field_md['name']] = {'field':custom_field,
+                                                   'datatype':field_md['datatype']}
+
+        # Add the sorted eligible fields to the combo box
+        for cf in sorted(custom_fields):
+            self.read_source_field_cb.addItem(cf)
+
+        self.read_source_fields = custom_fields
+        self.read_source_field_cb.currentIndexChanged.connect(self.read_source_field_changed)
+
        # Update dialog fields from stored options
        for opt in self.OPTION_FIELDS:
            opt_value = gprefs.get(self.name + '_' + opt[0], opt[1])
-            if opt[0] in ['numbers_as_text','generate_titles','generate_series','generate_recently_added']:
+            if opt[0] in [
+                          'generate_recently_added',
+                          'generate_series',
+                          'generate_titles',
+                          'numbers_as_text',
+                          ]:
                getattr(self, opt[0]).setChecked(opt_value)
+
+            # Combo box
+            elif opt[0] in ['read_source_field_cb']:
+                # Look for last-stored combo box value
+                index = self.read_source_field_cb.findText(opt_value)
+                if index == -1:
+                    index = self.read_source_field_cb.findText('Tag')
+                self.read_source_field_cb.setCurrentIndex(index)
+
+            # Text fields
            else:
                getattr(self, opt[0]).setText(opt_value)

+        # Init self.read_source_field
+        cs = unicode(self.read_source_field_cb.currentText())
+        read_source_spec = self.read_source_fields[cs]
+        self.read_source_field = read_source_spec['field']
+
    def options(self):
        # Save/return the current options
        # exclude_genre stores literally
@ -55,16 +95,60 @@ class PluginWidget(QWidget,Ui_Form):
        # others store as lists
        opts_dict = {}
        for opt in self.OPTION_FIELDS:
-            if opt[0] in ['numbers_as_text','generate_titles','generate_series','generate_recently_added']:
+            # Save values to gprefs
+            if opt[0] in [
+                          'generate_recently_added',
+                          'generate_series',
+                          'generate_titles',
+                          'numbers_as_text',
+                          ]:
                opt_value = getattr(self,opt[0]).isChecked()
+
+            # Combo box uses .currentText()
+            elif opt[0] in ['read_source_field_cb']:
+                opt_value = unicode(getattr(self, opt[0]).currentText())
+
+            # text fields use .text()
            else:
                opt_value = unicode(getattr(self, opt[0]).text())
            gprefs.set(self.name + '_' + opt[0], opt_value)

-            if opt[0] in ['exclude_genre','numbers_as_text','generate_titles','generate_series','generate_recently_added']:
+            # Construct opts
+            if opt[0] in [
+                          'exclude_genre',
+                          'generate_recently_added',
+                          'generate_series',
+                          'generate_titles',
+                          'numbers_as_text',
+                          ]:
                opts_dict[opt[0]] = opt_value
            else:
                opts_dict[opt[0]] = opt_value.split(',')
-        opts_dict['output_profile'] = [load_defaults('page_setup')['output_profile']]

+        # Generate read_book_marker
+        opts_dict['read_book_marker'] = "%s:%s" % (self.read_source_field, self.read_pattern.text())
+
+        # Append the output profile
+        opts_dict['output_profile'] = [load_defaults('page_setup')['output_profile']]
        return opts_dict
+
+    def read_source_field_changed(self,new_index):
+        '''
+        Process changes in the read_source_field combo box
+        Currently using QLineEdit for all field types
+        Possible to modify to switch QWidget type
+        '''
+        new_source = str(self.read_source_field_cb.currentText())
+        read_source_spec = self.read_source_fields[str(new_source)]
+        self.read_source_field = read_source_spec['field']
+
+        # Change pattern input widget to match the source field datatype
+        if read_source_spec['datatype'] in ['bool','composite','datetime','text']:
+            if not isinstance(self.read_pattern, QLineEdit):
+                self.read_spec_hl.removeWidget(self.read_pattern)
+                dw = QLineEdit(self)
+                dw.setObjectName('read_pattern')
+                dw.setToolTip('Pattern for read book')
+                self.read_pattern = dw
+                self.read_spec_hl.addWidget(dw)
+
--- a/src/calibre/gui2/catalog/catalog_epub_mobi.ui
+++ b/src/calibre/gui2/catalog/catalog_epub_mobi.ui
@ -6,8 +6,8 @@
   <rect>
    <x>0</x>
    <y>0</y>
-    <width>579</width>
-    <height>411</height>
+    <width>627</width>
+    <height>549</height>
   </rect>
  </property>
  <property name="windowTitle">
@ -28,42 +28,28 @@
     </property>
    </widget>
   </item>
-   <item row="1" column="0">
-    <widget class="QLabel" name="label_3">
-     <property name="text">
-      <string>'Mark this book as read' tag:</string>
-     </property>
-    </widget>
-   </item>
-   <item row="1" column="1">
-    <widget class="QLineEdit" name="read_tag">
-     <property name="toolTip">
-      <string extracomment="Default: +"/>
-     </property>
-    </widget>
-   </item>
-   <item row="3" column="0">
+   <item row="4" column="0">
    <widget class="QLabel" name="label_4">
     <property name="text">
      <string>Additional note tag prefix:</string>
     </property>
    </widget>
   </item>
-   <item row="3" column="1">
+   <item row="4" column="1">
    <widget class="QLineEdit" name="note_tag">
     <property name="toolTip">
      <string extracomment="Default: *"/>
     </property>
    </widget>
   </item>
-   <item row="5" column="1">
+   <item row="6" column="1">
    <widget class="QLineEdit" name="exclude_genre">
     <property name="toolTip">
      <string extracomment="Default: \[[\w]*\]"/>
     </property>
    </widget>
   </item>
-   <item row="5" column="0">
+   <item row="6" column="0">
    <widget class="QLabel" name="label">
     <property name="text">
      <string>Regex pattern describing tags to exclude as genres:</string>
@ -76,7 +62,7 @@
     </property>
    </widget>
   </item>
-   <item row="6" column="1">
+   <item row="7" column="1">
    <widget class="QLabel" name="label_6">
     <property name="text">
      <string>Regex tips:
@ -88,7 +74,7 @@
     </property>
    </widget>
   </item>
-   <item row="7" column="0">
+   <item row="8" column="0">
    <spacer name="verticalSpacer">
     <property name="orientation">
      <enum>Qt::Vertical</enum>
@ -101,44 +87,84 @@
     </property>
    </spacer>
   </item>
-   <item row="9" column="0">
+   <item row="10" column="0">
    <widget class="QCheckBox" name="generate_titles">
     <property name="text">
      <string>Include 'Titles' Section</string>
     </property>
    </widget>
   </item>
-   <item row="11" column="0">
+   <item row="12" column="0">
    <widget class="QCheckBox" name="generate_recently_added">
     <property name="text">
      <string>Include 'Recently Added' Section</string>
     </property>
    </widget>
   </item>
-   <item row="12" column="0">
+   <item row="13" column="0">
    <widget class="QCheckBox" name="numbers_as_text">
     <property name="text">
      <string>Sort numbers as text</string>
     </property>
    </widget>
   </item>
-   <item row="10" column="0">
+   <item row="11" column="0">
    <widget class="QCheckBox" name="generate_series">
     <property name="text">
      <string>Include 'Series' Section</string>
     </property>
    </widget>
   </item>
-   <item row="2" column="1">
+   <item row="3" column="1">
    <widget class="QLineEdit" name="wishlist_tag"/>
   </item>
-   <item row="2" column="0">
+   <item row="3" column="0">
    <widget class="QLabel" name="label_5">
     <property name="text">
      <string>Wishlist tag:</string>
     </property>
    </widget>
   </item>
+   <item row="2" column="1">
+    <layout class="QHBoxLayout" name="read_spec_hl">
+     <property name="sizeConstraint">
+      <enum>QLayout::SetMinimumSize</enum>
+     </property>
+     <item>
+      <widget class="QComboBox" name="read_source_field_cb">
+       <property name="sizePolicy">
+        <sizepolicy hsizetype="MinimumExpanding" vsizetype="Fixed">
+         <horstretch>0</horstretch>
+         <verstretch>0</verstretch>
+        </sizepolicy>
+       </property>
+       <property name="toolTip">
+        <string>Source column for read book</string>
+       </property>
+       <property name="statusTip">
+        <string/>
+       </property>
+      </widget>
+     </item>
+     <item>
+      <widget class="QLineEdit" name="read_pattern">
+       <property name="toolTip">
+        <string>Pattern for read book</string>
+       </property>
+       <property name="statusTip">
+        <string/>
+       </property>
+      </widget>
+     </item>
+    </layout>
+   </item>
+   <item row="2" column="0">
+    <widget class="QLabel" name="label_3">
+     <property name="text">
+      <string>Books marked as read:</string>
+     </property>
+    </widget>
+   </item>
  </layout>
 </widget>
 <resources/>
--- a/src/calibre/gui2/convert/fb2_output.py
+++ b/src/calibre/gui2/convert/fb2_output.py
@ -17,6 +17,8 @@ class PluginWidget(Widget, Ui_Form):
    ICON = I('mimetypes/fb2.png')

    def __init__(self, parent, get_option, get_help, db=None, book_id=None):
-        Widget.__init__(self, parent, ['inline_toc', 'sectionize_chapters'])
+        Widget.__init__(self, parent, ['inline_toc', 'sectionize_chapters', 
+            'sectionize_chapters_using_file_structure', 'h1_to_title', 
+            'h2_to_title', 'h3_to_title'])
        self.db, self.book_id = db, book_id
        self.initialize_options(get_option, get_help, db, book_id)
--- a/src/calibre/gui2/convert/fb2_output.ui
+++ b/src/calibre/gui2/convert/fb2_output.ui
@ -14,7 +14,7 @@
   <string>Form</string>
  </property>
  <layout class="QGridLayout" name="gridLayout">
-   <item row="2" column="0">
+   <item row="6" column="0">
    <spacer name="verticalSpacer">
     <property name="orientation">
      <enum>Qt::Vertical</enum>
@ -41,6 +41,34 @@
     </property>
    </widget>
   </item>
+   <item row="2" column="0">
+    <widget class="QCheckBox" name="opt_sectionize_chapters_using_file_structure">
+     <property name="text">
+      <string>Sectionize Chapters using file structure</string>
+     </property>
+    </widget>
+   </item>
+   <item row="3" column="0">
+    <widget class="QCheckBox" name="opt_h1_to_title">
+     <property name="text">
+      <string>Wrap h1 tags with &lt;title&gt; elements</string>
+     </property>
+    </widget>
+   </item>
+   <item row="4" column="0">
+    <widget class="QCheckBox" name="opt_h2_to_title">
+     <property name="text">
+      <string>Wrap h2 tags with &lt;title&gt; elements</string>
+     </property>
+    </widget>
+   </item>
+   <item row="5" column="0">
+    <widget class="QCheckBox" name="opt_h3_to_title">
+     <property name="text">
+      <string>Wrap h3 tags with &lt;title&gt; elements</string>
+     </property>
+    </widget>
+   </item>
  </layout>
 </widget>
 <resources/>
--- a/src/calibre/gui2/dialogs/catalog.py
+++ b/src/calibre/gui2/dialogs/catalog.py
@ -19,7 +19,7 @@ from calibre.customize.ui import catalog_plugins
 class Catalog(QDialog, Ui_Dialog):
    ''' Catalog Dialog builder'''

-    def __init__(self, parent, dbspec, ids):
+    def __init__(self, parent, dbspec, ids, db):
        import re, cStringIO
        from calibre import prints as info
        from PyQt4.uic import compileUi
@ -51,7 +51,7 @@ class Catalog(QDialog, Ui_Dialog):
                    catalog_widget = __import__('calibre.gui2.catalog.'+name,
                            fromlist=[1])
                    pw = catalog_widget.PluginWidget()
-                    pw.initialize(name)
+                    pw.initialize(name, db)
                    pw.ICON = I('forward.png')
                    self.widgets.append(pw)
                    [self.fmts.append([file_type.upper(), pw.sync_enabled,pw]) for file_type in plugin.file_types]
--- a/src/calibre/gui2/search_box.py
+++ b/src/calibre/gui2/search_box.py
@ -29,10 +29,6 @@ class SearchLineEdit(QLineEdit):
        QLineEdit.mouseReleaseEvent(self, event)
        QLineEdit.selectAll(self)

-    def focusInEvent(self, event):
-        QLineEdit.focusInEvent(self, event)
-        QLineEdit.selectAll(self)
-
    def dropEvent(self, ev):
        self.parent().normalize_state()
        return QLineEdit.dropEvent(self, ev)
@ -256,7 +252,11 @@ class SavedSearchBox(QComboBox):

    def initialize(self, _search_box, colorize=False, help_text=_('Search')):
        self.search_box = _search_box
-        self.line_edit.setPlaceholderText(help_text)
+        try:
+           self.line_edit.setPlaceholderText(help_text)
+        except:
+            # Using Qt < 4.7
+            pass
        self.colorize = colorize
        self.clear()

@ -350,14 +350,17 @@ class SearchBoxMixin(object):
        shortcuts = QKeySequence.keyBindings(QKeySequence.Find)
        shortcuts = list(shortcuts) + [QKeySequence('/'), QKeySequence('Alt+S')]
        self.action_focus_search.setShortcuts(shortcuts)
-        self.action_focus_search.triggered.connect(lambda x:
-                self.search.setFocus(Qt.OtherFocusReason))
+        self.action_focus_search.triggered.connect(self.focus_search_box)
        self.addAction(self.action_focus_search)
        self.search.setStatusTip(re.sub(r'<\w+>', ' ',
            unicode(self.search.toolTip())))
        self.advanced_search_button.setStatusTip(self.advanced_search_button.toolTip())
        self.clear_button.setStatusTip(self.clear_button.toolTip())

+    def focus_search_box(self, *args):
+        self.search.setFocus(Qt.OtherFocusReason)
+        self.search.lineEdit().selectAll()
+
    def search_box_cleared(self):
        self.tags_view.clear()
        self.saved_search.clear()
--- a/src/calibre/gui2/tools.py
+++ b/src/calibre/gui2/tools.py
@ -245,11 +245,11 @@ def fetch_scheduled_recipe(arg):

    return 'gui_convert', args, _('Fetch news from ')+arg['title'], fmt.upper(), [pt]

-def generate_catalog(parent, dbspec, ids, device_manager):
+def generate_catalog(parent, dbspec, ids, device_manager, db):
    from calibre.gui2.dialogs.catalog import Catalog

    # Build the Catalog dialog in gui2.dialogs.catalog
-    d = Catalog(parent, dbspec, ids)
+    d = Catalog(parent, dbspec, ids, db)

    if d.exec_() != d.Accepted:
        return None
--- a/src/calibre/library/catalog.py
+++ b/src/calibre/library/catalog.py
@ -606,12 +606,12 @@ class EPUB_MOBI(CatalogPlugin):
                          help=_("Specifies the output profile.  In some cases, an output profile is required to optimize the catalog for the device.  For example, 'kindle' or 'kindle_dx' creates a structured Table of Contents with Sections and Articles.\n"
                          "Default: '%default'\n"
                          "Applies to: ePub, MOBI output formats")),
-                   Option('--read-tag',
-                          default='+',
-                          dest='read_tag',
+                   Option('--read-book-marker',
+                          default='tag:+',
+                          dest='read_book_marker',
                          action = None,
-                          help=_("Tag indicating book has been read.\n" "Default: '%default'\n"
-                          "Applies to: ePub, MOBI output formats")),
+                          help=_("field:pattern indicating book has been read.\n" "Default: '%default'\n"
+                          "Applies to ePub, MOBI output formats")),
                   Option('--wishlist-tag',
                          default='Wishlist',
                          dest='wishlist_tag',
@ -898,6 +898,8 @@ class EPUB_MOBI(CatalogPlugin):
            self.__plugin = plugin
            self.__progressInt = 0.0
            self.__progressString = ''
+            f, _, p = opts.read_book_marker.partition(':')
+            self.__read_book_marker = {'field':f, 'pattern':p}
            self.__reporter = report_progress
            self.__stylesheet = stylesheet
            self.__thumbs = None
@ -936,7 +938,6 @@ class EPUB_MOBI(CatalogPlugin):
            if self.opts.generate_series:
                self.__totalSteps += 2

-
        # Accessors
        if True:
            '''
@ -1210,7 +1211,7 @@ class EPUB_MOBI(CatalogPlugin):
            def READING_SYMBOL(self):
                def fget(self):
                    return '<span style="color:black">&#x25b7;</span>' if self.generateForKindle else \
-                           '<span style="color:white">%s</span>' % self.opts.read_tag
+                           '<span style="color:white">+</span>'
                return property(fget=fget)
            @dynamic_property
            def READ_SYMBOL(self):
@ -1401,8 +1402,7 @@ class EPUB_MOBI(CatalogPlugin):
                if record['cover']:
                    this_title['cover'] = re.sub('&amp;', '&', record['cover'])

-                # This may be updated in self.processSpecialTags()
-                this_title['read'] = False
+                this_title['read'] = self.discoverReadStatus(record)

                if record['tags']:
                    this_title['tags'] = self.processSpecialTags(record['tags'],
@ -2675,13 +2675,7 @@ class EPUB_MOBI(CatalogPlugin):
                pBookTag = Tag(soup, "p")
                ptc = 0

-                #  book with read/reading/unread symbol
-                for tag in book['tags']:
-                    if tag == self.opts.read_tag:
-                        book['read'] = True
-                        break
-                else:
-                    book['read'] = False
+                book['read'] = self.discoverReadStatus(book)

                #  book with read|reading|unread symbol or wishlist item
                if self.opts.wishlist_tag in book.get('tags', []):
@ -2689,7 +2683,7 @@ class EPUB_MOBI(CatalogPlugin):
                    pBookTag.insert(ptc,NavigableString(self.MISSING_SYMBOL))
                    ptc += 1
                else:
-                    if book['read']:
+                    if book.get('read', False):
                        # check mark
                        pBookTag.insert(ptc,NavigableString(self.READ_SYMBOL))
                        pBookTag['class'] = "read_book"
@ -4027,6 +4021,34 @@ class EPUB_MOBI(CatalogPlugin):
            if not os.path.isdir(images_path):
                os.makedirs(images_path)

+        def discoverReadStatus(self, record):
+            '''
+            Given a field:pattern spec, discover if this book marked as read
+
+            if field == tag, scan tags for pattern
+            if custom field, try regex match for pattern
+            This allows maximum flexibility with fields of type
+             datatype bool: #field_name:True
+             datatype text: #field_name:<string>
+             datatype datetime: #field_name:.*
+
+            '''
+            # Legacy handling of special 'read' tag
+            field = self.__read_book_marker['field']
+            pat = self.__read_book_marker['pattern']
+            if field == 'tag' and pat in record['tags']:
+                return True
+
+            field_contents = self.__db.get_field(record['id'],
+                                        field,
+                                        index_is_id=True)
+            if field_contents:
+                if re.search(pat, unicode(field_contents),
+                        re.IGNORECASE) is not None:
+                    return True
+
+            return False
+
        def filterDbTags(self, tags):
            # Remove the special marker tags from the database's tag list,
            # return sorted list of normalized genre tags
@ -4519,7 +4541,6 @@ class EPUB_MOBI(CatalogPlugin):
            markerTags = []
            markerTags.extend(self.opts.exclude_tags.split(','))
            markerTags.extend(self.opts.note_tag.split(','))
-            markerTags.extend(self.opts.read_tag.split(','))
            return markerTags

        def letter_or_symbol(self,char):
@ -4629,6 +4650,7 @@ class EPUB_MOBI(CatalogPlugin):

            if open_pTag:
                result.insert(rtc, pTag)
+                rtc += 1

            paras = result.findAll('p')
            for p in paras:
@ -4647,10 +4669,12 @@ class EPUB_MOBI(CatalogPlugin):
                tag = self.convertHTMLEntities(tag)
                if tag.startswith(opts.note_tag):
                    this_title['notes'] = tag[len(self.opts.note_tag):]
-                elif tag == opts.read_tag:
-                    this_title['read'] = True
                elif re.search(opts.exclude_genre, tag):
                    continue
+                elif self.__read_book_marker['field'] == 'tag' and \
+                     tag == self.__read_book_marker['pattern']:
+                    # remove 'read' tag
+                    continue
                else:
                    tag_list.append(tag)
            return tag_list
@ -4759,7 +4783,7 @@ class EPUB_MOBI(CatalogPlugin):
        for key in keys:
            if key in ['catalog_title','authorClip','connected_kindle','descriptionClip',
                       'exclude_genre','exclude_tags','note_tag','numbers_as_text',
-                       'output_profile','read_tag',
+                       'output_profile','read_book_marker',
                       'search_text','sort_by','sort_descriptions_by_author','sync',
                        'wishlist_tag']:
                build_log.append("  %s: %s" % (key, opts_dict[key]))
--- a/src/calibre/library/cli.py
+++ b/src/calibre/library/cli.py
@ -640,7 +640,7 @@ def catalog_option_parser(args):
    log = Log()
    parser = get_parser(_(
    '''
-    %prog catalog /path/to/destination.(csv|epub|mobi|xml ...) [options]
+    %prog catalog /path/to/destination.(CSV|EPUB|MOBI|XML ...) [options]

    Export a catalog in format specified by path/to/destination extension.
    Options control how entries are displayed in the generated catalog ouput.
--- a/src/calibre/startup.py
+++ b/src/calibre/startup.py
@ -199,6 +199,11 @@ if not _run_once:

    __builtin__.__dict__['lopen'] = local_open

+
+    import mimetypes
+    mimetypes.init([P('mime.types')])
+    guess_type = mimetypes.guess_type
+
 def test_lopen():
    from calibre.ptempfile import TemporaryDirectory
    from calibre import CurrentDir