Merge from trunk

2025-07-09 03:04:10 -04:00 · 2011-03-09 08:21:23 +00:00 · 2011-03-09 08:21:23 +00:00 · 9638baa6ee
commit 9638baa6ee
parent 1a2950c2b2 4c180ba4cf
7 changed files with 444 additions and 266 deletions
--- a/resources/recipes/ming_pao.recipe
+++ b/resources/recipes/ming_pao.recipe
@ -1,7 +1,20 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010-2011, Eddie Lau'
 # Users of Kindle 3 (with limited system-level CJK support)
 # please replace the following "True" with "False".
 __MakePeriodical__ = True
 # Turn it to True if your device supports display of CJK titles
 __UseChineseTitle__ = False
 '''
 Change Log:
 2011/03/06: add new articles for finance section, also a new section "Columns"
 2011/02/28: rearrange the sections
            [Disabled until Kindle has better CJK support and can remember last (section,article) read in Sections & Articles
            View] make it the same title if generating a periodical, so past issue will be automatically put into "Past Issues"
            folder in Kindle 3
 2011/02/20: skip duplicated links in finance section, put photos which may extend a whole page to the back of the articles
            clean up the indentation
 2010/12/07: add entertainment section, use newspaper front page as ebook cover, suppress date display in section list
@ -19,55 +32,58 @@ import os, datetime, re
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from contextlib import nested
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
 from calibre.ebooks.metadata.opf2 import OPFCreator
 from calibre.ebooks.metadata.toc import TOC
 from calibre.ebooks.metadata import MetaInformation
 class MPHKRecipe(BasicNewsRecipe):
-    IsCJKWellSupported = True  # Set to False to avoid generating periodical in which CJK characters can't be displayed in section/article view
+	title          = 'Ming Pao - Hong Kong'
-    title          = 'Ming Pao - Hong Kong'
+	oldest_article = 1
-    oldest_article = 1
+	max_articles_per_feed = 100
-    max_articles_per_feed = 100
+	__author__            = 'Eddie Lau'
-    __author__            = 'Eddie Lau'
+	description           = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
-    description = ('Hong Kong Chinese Newspaper (http://news.mingpao.com). If'
+	publisher             = 'MingPao'
-                  'you are using a Kindle with firmware < 3.1, customize the'
+	category              = 'Chinese, News, Hong Kong'
-                  'recipe')
+	remove_javascript = True
-    publisher             = 'MingPao'
+	use_embedded_content   = False
-    category              = 'Chinese, News, Hong Kong'
+	no_stylesheets = True
-    remove_javascript = True
+	language = 'zh'
-    use_embedded_content   = False
+	encoding = 'Big5-HKSCS'
-    no_stylesheets = True
+	recursions = 0
-    language = 'zh'
+	conversion_options = {'linearize_tables':True}
-    encoding = 'Big5-HKSCS'
+	timefmt = ''
-    recursions = 0
+	extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}'
-    conversion_options = {'linearize_tables':True}
+	masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
-    timefmt = ''
+	keep_only_tags = [dict(name='h1'),
    extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}'
    masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
    keep_only_tags = [dict(name='h1'),
                      dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
-                      dict(attrs={'id':['newscontent']}), # entertainment page content
+                      dict(name='font', attrs={'color':['AA0000']}), # for column articles title
                      dict(attrs={'id':['newscontent']}), # entertainment and column page content
                      dict(attrs={'id':['newscontent01','newscontent02']}),
                      dict(attrs={'class':['photo']})
                      ]
-    remove_tags = [dict(name='style'),
+	remove_tags = [dict(name='style'),
-                   dict(attrs={'id':['newscontent135']})]  # for the finance page
+    			   dict(attrs={'id':['newscontent135']}),  # for the finance page
-    remove_attributes = ['width']
+    			   dict(name='table')]  # for content fetched from life.mingpao.com
-    preprocess_regexps = [
+	remove_attributes = ['width']
 	preprocess_regexps = [
                          (re.compile(r'<h5>', re.DOTALL|re.IGNORECASE),
                          lambda match: '<h1>'),
                          (re.compile(r'</h5>', re.DOTALL|re.IGNORECASE),
                          lambda match: '</h1>'),
                          (re.compile(r'<p><a href=.+?</a></p>', re.DOTALL|re.IGNORECASE), # for entertainment page
-                          lambda match: '')
+                          lambda match: ''),
                          # skip <br> after title in life.mingpao.com fetched article
                          (re.compile(r"<div id='newscontent'><br>", re.DOTALL|re.IGNORECASE),
                          lambda match: "<div id='newscontent'>"),
                          (re.compile(r"<br><br></b>", re.DOTALL|re.IGNORECASE),
                          lambda match: "</b>")
                         ]
-    def image_url_processor(cls, baseurl, url):
+	def image_url_processor(cls, baseurl, url):
-        # trick: break the url at the first occurance of digit, add an additional
+		# trick: break the url at the first occurance of digit, add an additional
-        # '_' at the front
+		# '_' at the front
-        # not working, may need to move this to preprocess_html() method
+		# not working, may need to move this to preprocess_html() method
 #        minIdx = 10000
 #        i0 = url.find('0')
 #        if i0 >= 0 and i0 < minIdx:
@ -99,253 +115,314 @@ class MPHKRecipe(BasicNewsRecipe):
 #        i9 = url.find('9')
 #        if i9 >= 0 and i9 < minIdx:
 #           minIdx = i9
-        return url
+		return url
-    def get_dtlocal(self):
+	def get_dtlocal(self):
-        dt_utc = datetime.datetime.utcnow()
+		dt_utc = datetime.datetime.utcnow()
-        # convert UTC to local hk time - at around HKT 6.00am, all news are available
+		# convert UTC to local hk time - at around HKT 6.00am, all news are available
-        dt_local = dt_utc - datetime.timedelta(-2.0/24)
+		dt_local = dt_utc - datetime.timedelta(-2.0/24)
-        return dt_local
+		return dt_local
-    def get_fetchdate(self):
+	def get_fetchdate(self):
-        return self.get_dtlocal().strftime("%Y%m%d")
+		return self.get_dtlocal().strftime("%Y%m%d")
-    def get_fetchformatteddate(self):
+	def get_fetchformatteddate(self):
-        return self.get_dtlocal().strftime("%Y-%m-%d")
+		return self.get_dtlocal().strftime("%Y-%m-%d")
-    def get_fetchday(self):
+	def get_fetchday(self):
-        # convert UTC to local hk time - at around HKT 6.00am, all news are available
+		# convert UTC to local hk time - at around HKT 6.00am, all news are available
-        return self.get_dtlocal().strftime("%d")
+		return self.get_dtlocal().strftime("%d")
-    def get_cover_url(self):
+	def get_cover_url(self):
-        cover = 'http://news.mingpao.com/' + self.get_fetchdate() + '/' + self.get_fetchdate() + '_' + self.get_fetchday() + 'gacov.jpg'
+		cover = 'http://news.mingpao.com/' + self.get_fetchdate() + '/' + self.get_fetchdate() + '_' + self.get_fetchday() + 'gacov.jpg'
-        br = BasicNewsRecipe.get_browser()
+		br = BasicNewsRecipe.get_browser()
-        try:
+		try:
-            br.open(cover)
+			br.open(cover)
-        except:
+		except:
-            cover = None
+			cover = None
-        return cover
+		return cover
-    def parse_index(self):
+	def parse_index(self):
-        feeds = []
+		feeds = []
-        dateStr = self.get_fetchdate()
+		dateStr = self.get_fetchdate()
-        for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
+
-                           (u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
+		for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
-                           (u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://news.mingpao.com/' + dateStr + '/mrindex.htm'),
+		                   (u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
-                           (u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
+		                   (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]:
 			articles = self.parse_section(url)
 			if articles:
 				feeds.append((title, articles))
 		# special- editorial
 		ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
 		if ed_articles:
 			feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
 		for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
                           (u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
-                           (u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm'),
+                           (u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm')]:
-                           ('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
+			articles = self.parse_section(url)
-                           (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm'),
+			if articles:
-                           (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm'),
+				feeds.append((title, articles))
-                           (u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
+
 		# special - finance
 		#fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
 		fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
 		if fin_articles:
 			feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
 		for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
                           (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
 			articles = self.parse_section(url)
 			if articles:
 				feeds.append((title, articles))
 		# special - entertainment
 		ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
 		if ent_articles:
 			feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
 		for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
                           (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
-            articles = self.parse_section(url)
+			articles = self.parse_section(url)
-            if articles:
+			if articles:
-                feeds.append((title, articles))
+				feeds.append((title, articles))
        # special - finance
        fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
        if fin_articles:
            feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
        # special - entertainment
        ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
        if ent_articles:
            feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
        return feeds
    def parse_section(self, url):
        dateStr = self.get_fetchdate()
        soup = self.index_to_soup(url)
        divs = soup.findAll(attrs={'class': ['bullet','bullet_grey']})
        current_articles = []
        included_urls = []
        divs.reverse()
        for i in divs:
            a = i.find('a', href = True)
            title = self.tag_to_string(a)
            url = a.get('href', False)
            url = 'http://news.mingpao.com/' + dateStr + '/' +url
            if url not in included_urls and url.rfind('Redirect') == -1:
                current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
                included_urls.append(url)
        current_articles.reverse()
        return current_articles
-    def parse_fin_section(self, url):
+		# special- columns
-        dateStr = self.get_fetchdate()
+		col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
-        soup = self.index_to_soup(url)
+		if col_articles:
-        a = soup.findAll('a', href= True)
+			feeds.append((u'\u5c08\u6b04 Columns', col_articles))
        current_articles = []
        included_urls = []
        for i in a:
            url = 'http://www.mpfinance.com/cfm/' + i.get('href', False)
            if url not in included_urls and not url.rfind(dateStr) == -1 and url.rfind('index') == -1:
                title = self.tag_to_string(i)
                current_articles.append({'title': title, 'url': url, 'description':''})
                included_urls.append(url)
        return current_articles
-    def parse_ent_section(self, url):
+		return feeds
        self.get_fetchdate()
        soup = self.index_to_soup(url)
        a = soup.findAll('a', href=True)
        a.reverse()
        current_articles = []
        included_urls = []
        for i in a:
            title = self.tag_to_string(i)
            url = 'http://ol.mingpao.com/cfm/' + i.get('href', False)
            if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('star') == -1):
                current_articles.append({'title': title, 'url': url, 'description': ''})
                included_urls.append(url)
        current_articles.reverse()
        return current_articles
-    def preprocess_html(self, soup):
+	def parse_section(self, url):
-        for item in soup.findAll(style=True):
+		dateStr = self.get_fetchdate()
-            del item['style']
+		soup = self.index_to_soup(url)
-        for item in soup.findAll(style=True):
+		divs = soup.findAll(attrs={'class': ['bullet','bullet_grey']})
-            del item['width']
+		current_articles = []
-        for item in soup.findAll(stype=True):
+		included_urls = []
-            del item['absmiddle']
+		divs.reverse()
-        return soup
+		for i in divs:
 			a = i.find('a', href = True)
 			title = self.tag_to_string(a)
 			url = a.get('href', False)
 			url = 'http://news.mingpao.com/' + dateStr + '/' +url
 			if url not in included_urls and url.rfind('Redirect') == -1:
 				current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
 				included_urls.append(url)
 		current_articles.reverse()
 		return current_articles
-    def create_opf(self, feeds, dir=None):
+	def parse_ed_section(self, url):
-        if dir is None:
+		self.get_fetchdate()
-            dir = self.output_dir
+		soup = self.index_to_soup(url)
-        if self.IsCJKWellSupported == True:
+		a = soup.findAll('a', href=True)
-            # use Chinese title
+		a.reverse()
-            title = u'\u660e\u5831 (\u9999\u6e2f) ' + self.get_fetchformatteddate()
+		current_articles = []
-        else:
+		included_urls = []
-            # use English title
+		for i in a:
-            title = self.short_title() + ' ' + self.get_fetchformatteddate()
+			title = self.tag_to_string(i)
-        if True:  # force date in title
+			url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
-            #    title += strftime(self.timefmt)
+			if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('nal') == -1):
-            mi = MetaInformation(title, [self.publisher])
+				current_articles.append({'title': title, 'url': url, 'description': ''})
-            mi.publisher = self.publisher
+				included_urls.append(url)
-            mi.author_sort = self.publisher
+		current_articles.reverse()
-            if self.IsCJKWellSupported == True:
+		return current_articles
                mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
            else:
                mi.publication_type = self.publication_type+':'+self.short_title()
            #mi.timestamp = nowf()
            mi.timestamp = self.get_dtlocal()
            mi.comments = self.description
            if not isinstance(mi.comments, unicode):
                mi.comments = mi.comments.decode('utf-8', 'replace')
            #mi.pubdate = nowf()
            mi.pubdate = self.get_dtlocal()
            opf_path = os.path.join(dir, 'index.opf')
            ncx_path = os.path.join(dir, 'index.ncx')
            opf = OPFCreator(dir, mi)
            # Add mastheadImage entry to <guide> section
            mp = getattr(self, 'masthead_path', None)
            if mp is not None and os.access(mp, os.R_OK):
                from calibre.ebooks.metadata.opf2 import Guide
                ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
                ref.type = 'masthead'
                ref.title = 'Masthead Image'
                opf.guide.append(ref)
-            manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
+	def parse_fin_section(self, url):
-            manifest.append(os.path.join(dir, 'index.html'))
+		self.get_fetchdate()
-            manifest.append(os.path.join(dir, 'index.ncx'))
+		soup = self.index_to_soup(url)
 		a = soup.findAll('a', href= True)
 		current_articles = []
 		included_urls = []
 		for i in a:
 			#url = 'http://www.mpfinance.com/cfm/' + i.get('href', False)
 			url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
 			#if url not in included_urls and not url.rfind(dateStr) == -1 and url.rfind('index') == -1:
 			if url not in included_urls and (not url.rfind('txt') == -1) and (not url.rfind('nal') == -1):
 				title = self.tag_to_string(i)
 				current_articles.append({'title': title, 'url': url, 'description':''})
 				included_urls.append(url)
 		return current_articles
-            # Get cover
+	def parse_ent_section(self, url):
-            cpath = getattr(self, 'cover_path', None)
+		self.get_fetchdate()
-            if cpath is None:
+		soup = self.index_to_soup(url)
-                pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
+		a = soup.findAll('a', href=True)
-                if self.default_cover(pf):
+		a.reverse()
-                    cpath =  pf.name
+		current_articles = []
-            if cpath is not None and os.access(cpath, os.R_OK):
+		included_urls = []
-                opf.cover = cpath
+		for i in a:
-                manifest.append(cpath)
+			title = self.tag_to_string(i)
 			url = 'http://ol.mingpao.com/cfm/' + i.get('href', False)
 			if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('star') == -1):
 				current_articles.append({'title': title, 'url': url, 'description': ''})
 				included_urls.append(url)
 		current_articles.reverse()
 		return current_articles
-            # Get masthead
+	def parse_col_section(self, url):
-            mpath = getattr(self, 'masthead_path', None)
+		self.get_fetchdate()
-            if mpath is not None and os.access(mpath, os.R_OK):
+		soup = self.index_to_soup(url)
-                manifest.append(mpath)
+		a = soup.findAll('a', href=True)
 		a.reverse()
 		current_articles = []
 		included_urls = []
 		for i in a:
 			title = self.tag_to_string(i)
 			url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
 			if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('ncl') == -1):
 				current_articles.append({'title': title, 'url': url, 'description': ''})
 				included_urls.append(url)
 		current_articles.reverse()
 		return current_articles
-            opf.create_manifest_from_files_in(manifest)
+	def preprocess_html(self, soup):
-            for mani in opf.manifest:
+		for item in soup.findAll(style=True):
-                if mani.path.endswith('.ncx'):
+			del item['style']
-                    mani.id = 'ncx'
+		for item in soup.findAll(style=True):
-                if mani.path.endswith('mastheadImage.jpg'):
+			del item['width']
-                    mani.id = 'masthead-image'
+		for item in soup.findAll(stype=True):
-            entries = ['index.html']
+			del item['absmiddle']
-            toc = TOC(base_path=dir)
+		return soup
            self.play_order_counter = 0
            self.play_order_map = {}
-        def feed_index(num, parent):
+	def create_opf(self, feeds, dir=None):
-            f = feeds[num]
+		if dir is None:
-            for j, a in enumerate(f):
+			dir = self.output_dir
-                if getattr(a, 'downloaded', False):
+		if __UseChineseTitle__ == True:
-                    adir = 'feed_%d/article_%d/'%(num, j)
+			title = u'\u660e\u5831 (\u9999\u6e2f)'
-                    auth = a.author
+		else:
-                    if not auth:
+			title = self.short_title()
-                        auth = None
+		# if not generating a periodical, force date to apply in title
-                    desc = a.text_summary
+		if __MakePeriodical__ == False:
-                    if not desc:
+			title = title + ' ' + self.get_fetchformatteddate()
-                        desc = None
+		if True:
-                    else:
+			mi = MetaInformation(title, [self.publisher])
-                        desc = self.description_limiter(desc)
+			mi.publisher = self.publisher
-                    entries.append('%sindex.html'%adir)
+			mi.author_sort = self.publisher
-                    po = self.play_order_map.get(entries[-1], None)
+			if __MakePeriodical__ == True:
-                    if po is None:
+				mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
-                        self.play_order_counter += 1
+			else:
-                        po = self.play_order_counter
+				mi.publication_type = self.publication_type+':'+self.short_title()
-                    parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
+			#mi.timestamp = nowf()
 			mi.timestamp = self.get_dtlocal()
 			mi.comments = self.description
 			if not isinstance(mi.comments, unicode):
 				mi.comments = mi.comments.decode('utf-8', 'replace')
 			#mi.pubdate = nowf()
 			mi.pubdate = self.get_dtlocal()
 			opf_path = os.path.join(dir, 'index.opf')
 			ncx_path = os.path.join(dir, 'index.ncx')
 			opf = OPFCreator(dir, mi)
 			# Add mastheadImage entry to <guide> section
 			mp = getattr(self, 'masthead_path', None)
 			if mp is not None and os.access(mp, os.R_OK):
 				from calibre.ebooks.metadata.opf2 import Guide
 				ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
 				ref.type = 'masthead'
 				ref.title = 'Masthead Image'
 				opf.guide.append(ref)
 			manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
 			manifest.append(os.path.join(dir, 'index.html'))
 			manifest.append(os.path.join(dir, 'index.ncx'))
 			# Get cover
 			cpath = getattr(self, 'cover_path', None)
 			if cpath is None:
 				pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
 				if self.default_cover(pf):
 					cpath =  pf.name
 			if cpath is not None and os.access(cpath, os.R_OK):
 				opf.cover = cpath
 				manifest.append(cpath)
 			# Get masthead
 			mpath = getattr(self, 'masthead_path', None)
 			if mpath is not None and os.access(mpath, os.R_OK):
 				manifest.append(mpath)
 			opf.create_manifest_from_files_in(manifest)
 			for mani in opf.manifest:
 				if mani.path.endswith('.ncx'):
 					mani.id = 'ncx'
 				if mani.path.endswith('mastheadImage.jpg'):
 					mani.id = 'masthead-image'
 			entries = ['index.html']
 			toc = TOC(base_path=dir)
 			self.play_order_counter = 0
 			self.play_order_map = {}
 		def feed_index(num, parent):
 			f = feeds[num]
 			for j, a in enumerate(f):
 				if getattr(a, 'downloaded', False):
 					adir = 'feed_%d/article_%d/'%(num, j)
 					auth = a.author
 					if not auth:
 						auth = None
 					desc = a.text_summary
 					if not desc:
 						desc = None
 					else:
 						desc = self.description_limiter(desc)
 					entries.append('%sindex.html'%adir)
 					po = self.play_order_map.get(entries[-1], None)
 					if po is None:
 						self.play_order_counter += 1
 						po = self.play_order_counter
 					parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
                                    play_order=po, author=auth, description=desc)
-                    last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
+					last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
-                    for sp in a.sub_pages:
+					for sp in a.sub_pages:
-                        prefix = os.path.commonprefix([opf_path, sp])
+						prefix = os.path.commonprefix([opf_path, sp])
-                        relp = sp[len(prefix):]
+						relp = sp[len(prefix):]
-                        entries.append(relp.replace(os.sep, '/'))
+						entries.append(relp.replace(os.sep, '/'))
-                        last = sp
+						last = sp
-                    if os.path.exists(last):
+					if os.path.exists(last):
-                        with open(last, 'rb') as fi:
+						with open(last, 'rb') as fi:
-                            src = fi.read().decode('utf-8')
+							src = fi.read().decode('utf-8')
-                        soup = BeautifulSoup(src)
+						soup = BeautifulSoup(src)
-                        body = soup.find('body')
+						body = soup.find('body')
-                        if body is not None:
+						if body is not None:
-                            prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
+							prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
-                            templ = self.navbar.generate(True, num, j, len(f),
+							templ = self.navbar.generate(True, num, j, len(f),
                                            not self.has_single_feed,
                                            a.orig_url, self.publisher, prefix=prefix,
                                            center=self.center_navbar)
-                            elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
+							elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
-                            body.insert(len(body.contents), elem)
+							body.insert(len(body.contents), elem)
-                            with open(last, 'wb') as fi:
+							with open(last, 'wb') as fi:
-                                fi.write(unicode(soup).encode('utf-8'))
+								fi.write(unicode(soup).encode('utf-8'))
-        if len(feeds) == 0:
+		if len(feeds) == 0:
-            raise Exception('All feeds are empty, aborting.')
+			raise Exception('All feeds are empty, aborting.')
-        if len(feeds) > 1:
+		if len(feeds) > 1:
-            for i, f in enumerate(feeds):
+			for i, f in enumerate(feeds):
-                entries.append('feed_%d/index.html'%i)
+				entries.append('feed_%d/index.html'%i)
-                po = self.play_order_map.get(entries[-1], None)
+				po = self.play_order_map.get(entries[-1], None)
-                if po is None:
+				if po is None:
-                    self.play_order_counter += 1
+					self.play_order_counter += 1
-                    po = self.play_order_counter
+					po = self.play_order_counter
-                auth = getattr(f, 'author', None)
+				auth = getattr(f, 'author', None)
-                if not auth:
+				if not auth:
-                    auth = None
+					auth = None
-                desc = getattr(f, 'description', None)
+				desc = getattr(f, 'description', None)
-                if not desc:
+				if not desc:
-                    desc = None
+					desc = None
-                feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
+				feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
                           f.title, play_order=po, description=desc, author=auth))
-        else:
+		else:
-            entries.append('feed_%d/index.html'%0)
+			entries.append('feed_%d/index.html'%0)
-            feed_index(0, toc)
+			feed_index(0, toc)
-        for i, p in enumerate(entries):
+		for i, p in enumerate(entries):
-            entries[i] = os.path.join(dir, p.replace('/', os.sep))
+			entries[i] = os.path.join(dir, p.replace('/', os.sep))
-        opf.create_spine(entries)
+		opf.create_spine(entries)
-        opf.set_toc(toc)
+		opf.set_toc(toc)
-        with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
+		with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
-            opf.render(opf_file, ncx_file)
+			opf.render(opf_file, ncx_file)
--- a/src/calibre/devices/init.py
+++ b/src/calibre/devices/init.py
@ -30,6 +30,7 @@ def strftime(epoch, zone=time.gmtime):
 def get_connected_device():
    from calibre.customize.ui import device_plugins
    from calibre.devices.scanner import DeviceScanner
    import uuid
    dev = None
    scanner = DeviceScanner()
    scanner.scan()
@ -47,7 +48,7 @@ def get_connected_device():
    for d in connected_devices:
        try:
-            d.open()
+            d.open(str(uuid.uuid4()))
        except:
            continue
        else:
--- a/src/calibre/devices/prs500/cli/main.py
+++ b/src/calibre/devices/prs500/cli/main.py
@ -6,7 +6,7 @@ Provides a command-line and optional graphical interface to the SONY Reader PRS-
 For usage information run the script.
 """
-import StringIO, sys, time, os
+import StringIO, sys, time, os, uuid
 from optparse import OptionParser
 from calibre import __version__, __appname__
@ -213,7 +213,7 @@ def main():
    for d in connected_devices:
        try:
-            d.open()
+            d.open(str(uuid.uuid4()))
        except:
            continue
        else:
--- a/src/calibre/gui2/custom_column_widgets.py
+++ b/src/calibre/gui2/custom_column_widgets.py
@ -5,7 +5,6 @@ __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import sys
 from functools import partial
 from PyQt4.Qt import QComboBox, QLabel, QSpinBox, QDoubleSpinBox, QDateEdit, \
@ -85,7 +84,7 @@ class Int(Base):
        self.widgets = [QLabel('&'+self.col_metadata['name']+':', parent),
                QSpinBox(parent)]
        w = self.widgets[1]
-        w.setRange(-100, sys.maxint)
+        w.setRange(-100, 100000000)
        w.setSpecialValueText(_('Undefined'))
        w.setSingleStep(1)
@ -108,7 +107,7 @@ class Float(Int):
        self.widgets = [QLabel('&'+self.col_metadata['name']+':', parent),
                QDoubleSpinBox(parent)]
        w = self.widgets[1]
-        w.setRange(-100., float(sys.maxint))
+        w.setRange(-100., float(100000000))
        w.setDecimals(2)
        w.setSpecialValueText(_('Undefined'))
        w.setSingleStep(1)
@ -289,7 +288,7 @@ class Series(Base):
        self.widgets.append(QLabel('&'+self.col_metadata['name']+_(' index:'), parent))
        w = QDoubleSpinBox(parent)
-        w.setRange(-100., float(sys.maxint))
+        w.setRange(-100., float(100000000))
        w.setDecimals(2)
        w.setSpecialValueText(_('Undefined'))
        w.setSingleStep(1)
@ -595,7 +594,7 @@ class BulkInt(BulkBase):
    def setup_ui(self, parent):
        self.make_widgets(parent, QSpinBox)
-        self.main_widget.setRange(-100, sys.maxint)
+        self.main_widget.setRange(-100, 100000000)
        self.main_widget.setSpecialValueText(_('Undefined'))
        self.main_widget.setSingleStep(1)
@ -617,7 +616,7 @@ class BulkFloat(BulkInt):
    def setup_ui(self, parent):
        self.make_widgets(parent, QDoubleSpinBox)
-        self.main_widget.setRange(-100., float(sys.maxint))
+        self.main_widget.setRange(-100., float(100000000))
        self.main_widget.setDecimals(2)
        self.main_widget.setSpecialValueText(_('Undefined'))
        self.main_widget.setSingleStep(1)
--- a/src/calibre/gui2/library/delegates.py
+++ b/src/calibre/gui2/library/delegates.py
@ -5,7 +5,6 @@ __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import sys
 from math import cos, sin, pi
 from PyQt4.Qt import QColor, Qt, QModelIndex, QSize, \
@ -245,13 +244,13 @@ class CcTextDelegate(QStyledItemDelegate): # {{{
        typ = m.custom_columns[col]['datatype']
        if typ == 'int':
            editor = QSpinBox(parent)
-            editor.setRange(-100, sys.maxint)
+            editor.setRange(-100, 100000000)
            editor.setSpecialValueText(_('Undefined'))
            editor.setSingleStep(1)
        elif typ == 'float':
            editor = QDoubleSpinBox(parent)
            editor.setSpecialValueText(_('Undefined'))
-            editor.setRange(-100., float(sys.maxint))
+            editor.setRange(-100., 100000000)
            editor.setDecimals(2)
        else:
            editor = MultiCompleteLineEdit(parent)
--- a/src/calibre/utils/magick/init.py
+++ b/src/calibre/utils/magick/init.py
@ -95,6 +95,26 @@ class DrawingWand(_magick.DrawingWand): # {{{
            self.font_size_ = float(val)
        return property(fget=fget, fset=fset, doc=_magick.DrawingWand.font_size_.__doc__)
    @dynamic_property
    def stroke_color(self):
        def fget(self):
            return self.stroke_color_.color
        def fset(self, val):
            col = PixelWand()
            col.color = unicode(val)
            self.stroke_color_ = col
        return property(fget=fget, fset=fset, doc=_magick.DrawingWand.font_size_.__doc__)
    @dynamic_property
    def fill_color(self):
        def fget(self):
            return self.fill_color_.color
        def fset(self, val):
            col = PixelWand()
            col.color = unicode(val)
            self.fill_color_ = col
        return property(fget=fget, fset=fset, doc=_magick.DrawingWand.font_size_.__doc__)
 # }}}
 class Image(_magick.Image): # {{{
--- a/src/calibre/utils/magick/magick.c
+++ b/src/calibre/utils/magick/magick.c
@ -263,6 +263,78 @@ magick_DrawingWand_fontsize_setter(magick_DrawingWand *self, PyObject *val, void
 // }}}
 // DrawingWand.stroke_color {{{
 static PyObject *
 magick_DrawingWand_stroke_color_getter(magick_DrawingWand *self, void *closure) {
    NULL_CHECK(NULL)
    magick_PixelWand *pw;
    PixelWand *wand = NewPixelWand();
    if (wand == NULL) return PyErr_NoMemory();
    DrawGetStrokeColor(self->wand, wand);
    pw = (magick_PixelWand*) magick_PixelWandType.tp_alloc(&magick_PixelWandType, 0);
    if (pw == NULL) return PyErr_NoMemory();
    pw->wand = wand;
    return Py_BuildValue("O", (PyObject *)pw);
 }
 static int
 magick_DrawingWand_stroke_color_setter(magick_DrawingWand *self, PyObject *val, void *closure) {
    NULL_CHECK(-1)
    if (val == NULL) {
        PyErr_SetString(PyExc_TypeError, "Cannot delete DrawingWand stroke color");
        return -1;
    }
    magick_PixelWand *pw;
    pw = (magick_PixelWand*)val;
    if (!IsPixelWand(pw->wand)) { PyErr_SetString(PyExc_TypeError, "Invalid PixelWand"); return -1; }
    DrawSetStrokeColor(self->wand, pw->wand);
    return 0;
 }
 // }}}
 // DrawingWand.fill_color {{{
 static PyObject *
 magick_DrawingWand_fill_color_getter(magick_DrawingWand *self, void *closure) {
    NULL_CHECK(NULL)
    magick_PixelWand *pw;
    PixelWand *wand = NewPixelWand();
    if (wand == NULL) return PyErr_NoMemory();
    DrawGetFillColor(self->wand, wand);
    pw = (magick_PixelWand*) magick_PixelWandType.tp_alloc(&magick_PixelWandType, 0);
    if (pw == NULL) return PyErr_NoMemory();
    pw->wand = wand;
    return Py_BuildValue("O", (PyObject *)pw);
 }
 static int
 magick_DrawingWand_fill_color_setter(magick_DrawingWand *self, PyObject *val, void *closure) {
    NULL_CHECK(-1)
    if (val == NULL) {
        PyErr_SetString(PyExc_TypeError, "Cannot delete DrawingWand fill color");
        return -1;
    }
    magick_PixelWand *pw;
    pw = (magick_PixelWand*)val;
    if (!IsPixelWand(pw->wand)) { PyErr_SetString(PyExc_TypeError, "Invalid PixelWand"); return -1; }
    DrawSetFillColor(self->wand, pw->wand);
    return 0;
 }
 // }}}
 // DrawingWand.text_antialias {{{
 static PyObject *
 magick_DrawingWand_textantialias_getter(magick_DrawingWand *self, void *closure) {
@ -336,6 +408,16 @@ static PyGetSetDef  magick_DrawingWand_getsetters[] = {
     (char *)"DrawingWand fontsize",
     NULL},
    {(char *)"stroke_color_", 
     (getter)magick_DrawingWand_stroke_color_getter, (setter)magick_DrawingWand_stroke_color_setter,
     (char *)"DrawingWand stroke color",
     NULL},
    {(char *)"fill_color_", 
     (getter)magick_DrawingWand_fill_color_getter, (setter)magick_DrawingWand_fill_color_setter,
     (char *)"DrawingWand fill color",
     NULL},
    {(char *)"text_antialias", 
     (getter)magick_DrawingWand_textantialias_getter, (setter)magick_DrawingWand_textantialias_setter,
     (char *)"DrawingWand text antialias",