diff --git a/recipes/eluniversal_ve.recipe b/recipes/eluniversal_ve.recipe index 28667cd39b..d7c2c4710b 100644 --- a/recipes/eluniversal_ve.recipe +++ b/recipes/eluniversal_ve.recipe @@ -1,5 +1,5 @@ __license__ = 'GPL v3' -__copyright__ = '2010, Darko Miletic ' +__copyright__ = '2010-2011, Darko Miletic ' ''' www.eluniversal.com ''' @@ -15,12 +15,20 @@ class ElUniversal(BasicNewsRecipe): max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False + remove_empty_feeds = True encoding = 'cp1252' publisher = 'El Universal' category = 'news, Caracas, Venezuela, world' language = 'es_VE' + publication_type = 'newspaper' cover_url = strftime('http://static.eluniversal.com/%Y/%m/%d/portada.jpg') - + extra_css = """ + .txt60{font-family: Tahoma,Geneva,sans-serif; font-size: small} + .txt29{font-family: Tahoma,Geneva,sans-serif; font-size: small; color: gray} + .txt38{font-family: Georgia,"Times New Roman",Times,serif; font-size: xx-large} + .txt35{font-family: Georgia,"Times New Roman",Times,serif; font-size: large} + body{font-family: Verdana,Arial,Helvetica,sans-serif} + """ conversion_options = { 'comments' : description ,'tags' : category @@ -28,10 +36,11 @@ class ElUniversal(BasicNewsRecipe): ,'publisher' : publisher } - keep_only_tags = [dict(name='div', attrs={'class':'Nota'})] + remove_tags_before=dict(attrs={'class':'header-print MB10'}) + remove_tags_after= dict(attrs={'id':'SizeText'}) remove_tags = [ - dict(name=['object','link','script','iframe']) - ,dict(name='div',attrs={'class':'Herramientas'}) + dict(name=['object','link','script','iframe','meta']) + ,dict(attrs={'class':'header-print MB10'}) ] feeds = [ diff --git a/recipes/financial_times.recipe b/recipes/financial_times.recipe index e750b6f113..0079b2be3a 100644 --- a/recipes/financial_times.recipe +++ b/recipes/financial_times.recipe @@ -1,32 +1,41 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' -__copyright__ = '2008, Darko Miletic ' +__copyright__ = '2010-2011, Darko Miletic ' ''' -ft.com +www.ft.com ''' +import datetime from calibre.web.feeds.news import BasicNewsRecipe -class FinancialTimes(BasicNewsRecipe): - title = u'Financial Times' - __author__ = 'Darko Miletic and Sujata Raman' - description = ('Financial world news. Available after 5AM ' - 'GMT, daily.') +class FinancialTimes_rss(BasicNewsRecipe): + title = 'Financial Times' + __author__ = 'Darko Miletic' + description = "The Financial Times (FT) is one of the world's leading business news and information organisations, recognised internationally for its authority, integrity and accuracy." + publisher = 'The Financial Times Ltd.' + category = 'news, finances, politics, World' oldest_article = 2 - language = 'en' - - max_articles_per_feed = 100 + language = 'en' + max_articles_per_feed = 250 no_stylesheets = True use_embedded_content = False needs_subscription = True - simultaneous_downloads= 1 - delay = 1 + encoding = 'utf8' + publication_type = 'newspaper' + masthead_url = 'http://im.media.ft.com/m/img/masthead_main.jpg' + LOGIN = 'https://registration.ft.com/registration/barrier/login' + INDEX = 'http://www.ft.com' - LOGIN = 'https://registration.ft.com/registration/barrier/login' + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + , 'linearize_tables' : True + } def get_browser(self): br = BasicNewsRecipe.get_browser() + br.open(self.INDEX) if self.username is not None and self.password is not None: br.open(self.LOGIN) br.select_form(name='loginForm') @@ -35,31 +44,63 @@ class FinancialTimes(BasicNewsRecipe): br.submit() return br - keep_only_tags = [ dict(name='div', attrs={'id':'cont'}) ] - remove_tags_after = dict(name='p', attrs={'class':'copyright'}) + keep_only_tags = [dict(name='div', attrs={'class':['fullstory fullstoryHeader','fullstory fullstoryBody','ft-story-header','ft-story-body','index-detail']})] remove_tags = [ - dict(name='div', attrs={'id':'floating-con'}) + dict(name='div', attrs={'id':'floating-con'}) + ,dict(name=['meta','iframe','base','object','embed','link']) + ,dict(attrs={'class':['storyTools','story-package','screen-copy','story-package separator','expandable-image']}) ] + remove_attributes = ['width','height','lang'] - extra_css = ''' - body{font-family:Arial,Helvetica,sans-serif;} - h2(font-size:large;} - .ft-story-header(font-size:xx-small;} - .ft-story-body(font-size:small;} - a{color:#003399;} + extra_css = """ + body{font-family: Georgia,Times,"Times New Roman",serif} + h2{font-size:large} + .ft-story-header{font-size: x-small} .container{font-size:x-small;} h3{font-size:x-small;color:#003399;} - ''' + .copyright{font-size: x-small} + img{margin-top: 0.8em; display: block} + .lastUpdated{font-family: Arial,Helvetica,sans-serif; font-size: x-small} + .byline,.ft-story-body,.ft-story-header{font-family: Arial,Helvetica,sans-serif} + """ + feeds = [ (u'UK' , u'http://www.ft.com/rss/home/uk' ) ,(u'US' , u'http://www.ft.com/rss/home/us' ) - ,(u'Europe' , u'http://www.ft.com/rss/home/europe' ) ,(u'Asia' , u'http://www.ft.com/rss/home/asia' ) ,(u'Middle East', u'http://www.ft.com/rss/home/middleeast') ] def preprocess_html(self, soup): - content_type = soup.find('meta', {'http-equiv':'Content-Type'}) - if content_type: - content_type['content'] = 'text/html; charset=utf-8' + items = ['promo-box','promo-title', + 'promo-headline','promo-image', + 'promo-intro','promo-link','subhead'] + for item in items: + for it in soup.findAll(item): + it.name = 'div' + it.attrs = [] + for item in soup.findAll(style=True): + del item['style'] + for item in soup.findAll('a'): + limg = item.find('img') + if item.string is not None: + str = item.string + item.replaceWith(str) + else: + if limg: + item.name = 'div' + item.attrs = [] + else: + str = self.tag_to_string(item) + item.replaceWith(str) + for item in soup.findAll('img'): + if not item.has_key('alt'): + item['alt'] = 'image' return soup + + def get_cover_url(self): + cdate = datetime.date.today() + if cdate.isoweekday() == 7: + cdate -= datetime.timedelta(days=1) + return cdate.strftime('http://specials.ft.com/vtf_pdf/%d%m%y_FRONT1_USA.pdf') + diff --git a/recipes/financial_times_uk.recipe b/recipes/financial_times_uk.recipe index 6fe1ac6acd..e06eb0dc77 100644 --- a/recipes/financial_times_uk.recipe +++ b/recipes/financial_times_uk.recipe @@ -3,6 +3,8 @@ __copyright__ = '2010-2011, Darko Miletic ' ''' www.ft.com/uk-edition ''' + +import datetime from calibre import strftime from calibre.web.feeds.news import BasicNewsRecipe @@ -20,7 +22,6 @@ class FinancialTimes(BasicNewsRecipe): needs_subscription = True encoding = 'utf8' publication_type = 'newspaper' - cover_url = strftime('http://specials.ft.com/vtf_pdf/%d%m%y_FRONT1_LON.pdf') masthead_url = 'http://im.media.ft.com/m/img/masthead_main.jpg' LOGIN = 'https://registration.ft.com/registration/barrier/login' INDEX = 'http://www.ft.com/uk-edition' @@ -128,3 +129,10 @@ class FinancialTimes(BasicNewsRecipe): if not item.has_key('alt'): item['alt'] = 'image' return soup + + def get_cover_url(self): + cdate = datetime.date.today() + if cdate.isoweekday() == 7: + cdate -= datetime.timedelta(days=1) + return cdate.strftime('http://specials.ft.com/vtf_pdf/%d%m%y_FRONT1_LON.pdf') + \ No newline at end of file diff --git a/recipes/hbr.recipe b/recipes/hbr.recipe index cd7dcd2061..1152a48784 100644 --- a/recipes/hbr.recipe +++ b/recipes/hbr.recipe @@ -1,5 +1,6 @@ from calibre.web.feeds.news import BasicNewsRecipe import re +from datetime import date, timedelta class HBR(BasicNewsRecipe): @@ -12,13 +13,14 @@ class HBR(BasicNewsRecipe): no_stylesheets = True LOGIN_URL = 'http://hbr.org/login?request_url=/' - INDEX = 'http://hbr.org/current' + INDEX = 'http://hbr.org/archive-toc/BR' keep_only_tags = [dict(name='div', id='pageContainer')] remove_tags = [dict(id=['mastheadContainer', 'magazineHeadline', 'articleToolbarTopRD', 'pageRightSubColumn', 'pageRightColumn', 'todayOnHBRListWidget', 'mostWidget', 'keepUpWithHBR', 'mailingListTout', 'partnerCenter', 'pageFooter', + 'superNavHeadContainer', 'hbrDisqus', 'articleToolbarTop', 'articleToolbarBottom', 'articleToolbarRD']), dict(name='iframe')] extra_css = ''' @@ -55,9 +57,14 @@ class HBR(BasicNewsRecipe): def hbr_get_toc(self): - soup = self.index_to_soup(self.INDEX) - url = soup.find('a', text=lambda t:'Full Table of Contents' in t).parent.get('href') - return self.index_to_soup('http://hbr.org'+url) + today = date.today() + future = today + timedelta(days=30) + for x in [x.strftime('%y%m') for x in (future, today)]: + url = self.INDEX + x + soup = self.index_to_soup(url) + if not soup.find(text='Issue Not Found'): + return soup + raise Exception('Could not find current issue') def hbr_parse_section(self, container, feeds): current_section = None diff --git a/recipes/icons/financial_times.png b/recipes/icons/financial_times.png new file mode 100644 index 0000000000..2a769d9dbb Binary files /dev/null and b/recipes/icons/financial_times.png differ diff --git a/recipes/ming_pao.recipe b/recipes/ming_pao.recipe index 08ee20cb15..947d85692f 100644 --- a/recipes/ming_pao.recipe +++ b/recipes/ming_pao.recipe @@ -1,17 +1,23 @@ -# -*- coding: utf-8 -*- __license__ = 'GPL v3' __copyright__ = '2010-2011, Eddie Lau' +# Region - Hong Kong, Vancouver, Toronto +__Region__ = 'Hong Kong' # Users of Kindle 3 with limited system-level CJK support # please replace the following "True" with "False". __MakePeriodical__ = True # Turn below to true if your device supports display of CJK titles __UseChineseTitle__ = False -# Trun below to true if you wish to use life.mingpao.com as the main article source +# Set it to False if you want to skip images +__KeepImages__ = True +# (HK only) Turn below to true if you wish to use life.mingpao.com as the main article source __UseLife__ = True + ''' Change Log: +2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source + provide options to remove all images in the file 2011/05/12: switch the main parse source to life.mingpao.com, which has more photos on the article pages 2011/03/06: add new articles for finance section, also a new section "Columns" 2011/02/28: rearrange the sections @@ -34,21 +40,96 @@ Change Log: import os, datetime, re from calibre.web.feeds.recipes import BasicNewsRecipe from contextlib import nested - - from calibre.ebooks.BeautifulSoup import BeautifulSoup from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.metadata.toc import TOC from calibre.ebooks.metadata import MetaInformation -class MPHKRecipe(BasicNewsRecipe): - title = 'Ming Pao - Hong Kong' +# MAIN CLASS +class MPRecipe(BasicNewsRecipe): + if __Region__ == 'Hong Kong': + title = 'Ming Pao - Hong Kong' + description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)' + category = 'Chinese, News, Hong Kong' + extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}' + masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif' + keep_only_tags = [dict(name='h1'), + dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title + dict(name='font', attrs={'color':['AA0000']}), # for column articles title + dict(attrs={'id':['newscontent']}), # entertainment and column page content + dict(attrs={'id':['newscontent01','newscontent02']}), + dict(attrs={'class':['photo']}), + dict(name='table', attrs={'width':['100%'], 'border':['0'], 'cellspacing':['5'], 'cellpadding':['0']}), # content in printed version of life.mingpao.com + dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}) # images for source from life.mingpao.com + ] + if __KeepImages__: + remove_tags = [dict(name='style'), + dict(attrs={'id':['newscontent135']}), # for the finance page from mpfinance.com + dict(name='font', attrs={'size':['2'], 'color':['666666']}), # article date in life.mingpao.com article + #dict(name='table') # for content fetched from life.mingpao.com + ] + else: + remove_tags = [dict(name='style'), + dict(attrs={'id':['newscontent135']}), # for the finance page from mpfinance.com + dict(name='font', attrs={'size':['2'], 'color':['666666']}), # article date in life.mingpao.com article + dict(name='img'), + #dict(name='table') # for content fetched from life.mingpao.com + ] + remove_attributes = ['width'] + preprocess_regexps = [ + (re.compile(r'
', re.DOTALL|re.IGNORECASE), + lambda match: '

'), + (re.compile(r'

', re.DOTALL|re.IGNORECASE), + lambda match: ''), + (re.compile(r'

', re.DOTALL|re.IGNORECASE), # for entertainment page + lambda match: ''), + # skip
after title in life.mingpao.com fetched article + (re.compile(r"

", re.DOTALL|re.IGNORECASE), + lambda match: "
"), + (re.compile(r"

", re.DOTALL|re.IGNORECASE), + lambda match: "") + ] + elif __Region__ == 'Vancouver': + title = 'Ming Pao - Vancouver' + description = 'Vancouver Chinese Newspaper (http://www.mingpaovan.com)' + category = 'Chinese, News, Vancouver' + extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}' + masthead_url = 'http://www.mingpaovan.com/image/mainlogo2_VAN2.gif' + keep_only_tags = [dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['1']}), + dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['3'], 'cellpadding':['3'], 'id':['tblContent3']}), + dict(name='table', attrs={'width':['180'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['0'], 'bgcolor':['F0F0F0']}), + ] + if __KeepImages__: + remove_tags = [dict(name='img', attrs={'src':['../../../image/magnifier.gif']})] # the magnifier icon + else: + remove_tags = [dict(name='img')] + remove_attributes = ['width'] + preprocess_regexps = [(re.compile(r' ', re.DOTALL|re.IGNORECASE), + lambda match: ''), + ] + elif __Region__ == 'Toronto': + title = 'Ming Pao - Toronto' + description = 'Toronto Chinese Newspaper (http://www.mingpaotor.com)' + category = 'Chinese, News, Toronto' + extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}' + masthead_url = 'http://www.mingpaotor.com/image/mainlogo2_TOR2.gif' + keep_only_tags = [dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['1']}), + dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['3'], 'cellpadding':['3'], 'id':['tblContent3']}), + dict(name='table', attrs={'width':['180'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['0'], 'bgcolor':['F0F0F0']}), + ] + if __KeepImages__: + remove_tags = [dict(name='img', attrs={'src':['../../../image/magnifier.gif']})] # the magnifier icon + else: + remove_tags = [dict(name='img')] + remove_attributes = ['width'] + preprocess_regexps = [(re.compile(r' ', re.DOTALL|re.IGNORECASE), + lambda match: ''), + ] + oldest_article = 1 max_articles_per_feed = 100 __author__ = 'Eddie Lau' - description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)' publisher = 'MingPao' - category = 'Chinese, News, Hong Kong' remove_javascript = True use_embedded_content = False no_stylesheets = True @@ -57,33 +138,6 @@ class MPHKRecipe(BasicNewsRecipe): recursions = 0 conversion_options = {'linearize_tables':True} timefmt = '' - extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}' - masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif' - keep_only_tags = [dict(name='h1'), - dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title - dict(name='font', attrs={'color':['AA0000']}), # for column articles title - dict(attrs={'id':['newscontent']}), # entertainment and column page content - dict(attrs={'id':['newscontent01','newscontent02']}), - dict(attrs={'class':['photo']}), - dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}) # images for source from life.mingpao.com - ] - remove_tags = [dict(name='style'), - dict(attrs={'id':['newscontent135']}), # for the finance page from mpfinance.com - dict(name='table')] # for content fetched from life.mingpao.com - remove_attributes = ['width'] - preprocess_regexps = [ - (re.compile(r'
', re.DOTALL|re.IGNORECASE), - lambda match: '

'), - (re.compile(r'

', re.DOTALL|re.IGNORECASE), - lambda match: ''), - (re.compile(r'

', re.DOTALL|re.IGNORECASE), # for entertainment page - lambda match: ''), - # skip
after title in life.mingpao.com fetched article - (re.compile(r"

", re.DOTALL|re.IGNORECASE), - lambda match: "
"), - (re.compile(r"

", re.DOTALL|re.IGNORECASE), - lambda match: "") - ] def image_url_processor(cls, baseurl, url): # trick: break the url at the first occurance of digit, add an additional @@ -124,8 +178,18 @@ class MPHKRecipe(BasicNewsRecipe): def get_dtlocal(self): dt_utc = datetime.datetime.utcnow() - # convert UTC to local hk time - at around HKT 6.00am, all news are available - dt_local = dt_utc - datetime.timedelta(-2.0/24) + if __Region__ == 'Hong Kong': + # convert UTC to local hk time - at HKT 5.30am, all news are available + dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(5.5/24) + # dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(5.5/24) + elif __Region__ == 'Vancouver': + # convert UTC to local Vancouver time - at PST time 5.30am, all news are available + dt_local = dt_utc + datetime.timedelta(-8.0/24) - datetime.timedelta(5.5/24) + #dt_local = dt_utc.astimezone(pytz.timezone('America/Vancouver')) - datetime.timedelta(5.5/24) + elif __Region__ == 'Toronto': + # convert UTC to local Toronto time - at EST time 8.30am, all news are available + dt_local = dt_utc + datetime.timedelta(-5.0/24) - datetime.timedelta(8.5/24) + #dt_local = dt_utc.astimezone(pytz.timezone('America/Toronto')) - datetime.timedelta(8.5/24) return dt_local def get_fetchdate(self): @@ -135,13 +199,15 @@ class MPHKRecipe(BasicNewsRecipe): return self.get_dtlocal().strftime("%Y-%m-%d") def get_fetchday(self): - # dt_utc = datetime.datetime.utcnow() - # convert UTC to local hk time - at around HKT 6.00am, all news are available - # dt_local = dt_utc - datetime.timedelta(-2.0/24) return self.get_dtlocal().strftime("%d") def get_cover_url(self): - cover = 'http://news.mingpao.com/' + self.get_fetchdate() + '/' + self.get_fetchdate() + '_' + self.get_fetchday() + 'gacov.jpg' + if __Region__ == 'Hong Kong': + cover = 'http://news.mingpao.com/' + self.get_fetchdate() + '/' + self.get_fetchdate() + '_' + self.get_fetchday() + 'gacov.jpg' + elif __Region__ == 'Vancouver': + cover = 'http://www.mingpaovan.com/ftp/News/' + self.get_fetchdate() + '/' + self.get_fetchday() + 'pgva1s.jpg' + elif __Region__ == 'Toronto': + cover = 'http://www.mingpaotor.com/ftp/News/' + self.get_fetchdate() + '/' + self.get_fetchday() + 'pgtas.jpg' br = BasicNewsRecipe.get_browser() try: br.open(cover) @@ -153,76 +219,104 @@ class MPHKRecipe(BasicNewsRecipe): feeds = [] dateStr = self.get_fetchdate() - if __UseLife__: - for title, url, keystr in [(u'\u8981\u805e Headline', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalga', 'nal'), - (u'\u6e2f\u805e Local', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgb', 'nal'), - (u'\u6559\u80b2 Education', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgf', 'nal'), - (u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr', 'nal'), - (u'\u8ad6\u58c7 Forum', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalfa', 'nal'), - (u'\u4e2d\u570b China', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalca', 'nal'), - (u'\u570b\u969b World', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalta', 'nal'), - (u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal'), - (u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'), - (u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal'), - (u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')]: - articles = self.parse_section2(url, keystr) + if __Region__ == 'Hong Kong': + if __UseLife__: + for title, url, keystr in [(u'\u8981\u805e Headline', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalga', 'nal'), + (u'\u6e2f\u805e Local', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgb', 'nal'), + (u'\u6559\u80b2 Education', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgf', 'nal'), + (u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr', 'nal'), + (u'\u8ad6\u58c7 Forum', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalfa', 'nal'), + (u'\u4e2d\u570b China', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalca', 'nal'), + (u'\u570b\u969b World', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalta', 'nal'), + (u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal'), + (u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'), + (u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal'), + (u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')]: + articles = self.parse_section2(url, keystr) + if articles: + feeds.append((title, articles)) + + for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'), + (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]: + articles = self.parse_section(url) + if articles: + feeds.append((title, articles)) + else: + for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'), + (u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'), + (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]: + articles = self.parse_section(url) + if articles: + feeds.append((title, articles)) + + # special- editorial + ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr') + if ed_articles: + feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles)) + + for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'), + (u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'), + (u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm')]: + articles = self.parse_section(url) + if articles: + feeds.append((title, articles)) + + # special - finance + #fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm') + fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea') + if fin_articles: + feeds.append((u'\u7d93\u6fdf Finance', fin_articles)) + + for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'), + (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]: + articles = self.parse_section(url) + if articles: + feeds.append((title, articles)) + + # special - entertainment + ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm') + if ent_articles: + feeds.append((u'\u5f71\u8996 Film/TV', ent_articles)) + + for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'), + (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]: + articles = self.parse_section(url) + if articles: + feeds.append((title, articles)) + + + # special- columns + col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn') + if col_articles: + feeds.append((u'\u5c08\u6b04 Columns', col_articles)) + elif __Region__ == 'Vancouver': + for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VAindex.htm'), + (u'\u52a0\u570b Canada', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VBindex.htm'), + (u'\u793e\u5340 Local', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VDindex.htm'), + (u'\u6e2f\u805e Hong Kong', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/HK-VGindex.htm'), + (u'\u570b\u969b World', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VTindex.htm'), + (u'\u4e2d\u570b China', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VCindex.htm'), + (u'\u7d93\u6fdf Economics', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VEindex.htm'), + (u'\u9ad4\u80b2 Sports', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VSindex.htm'), + (u'\u5f71\u8996 Film/TV', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/HK-MAindex.htm'), + (u'\u526f\u520a Supplements', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/WWindex.htm'),]: + articles = self.parse_section3(url, 'http://www.mingpaovan.com/') if articles: feeds.append((title, articles)) - - for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'), - (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]: - articles = self.parse_section(url) + elif __Region__ == 'Toronto': + for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TAindex.htm'), + (u'\u52a0\u570b Canada', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TDindex.htm'), + (u'\u793e\u5340 Local', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TFindex.htm'), + (u'\u4e2d\u570b China', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TCAindex.htm'), + (u'\u570b\u969b World', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TTAindex.htm'), + (u'\u6e2f\u805e Hong Kong', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/HK-GAindex.htm'), + (u'\u7d93\u6fdf Economics', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/THindex.htm'), + (u'\u9ad4\u80b2 Sports', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TSindex.htm'), + (u'\u5f71\u8996 Film/TV', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/HK-MAindex.htm'), + (u'\u526f\u520a Supplements', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/WWindex.htm'),]: + articles = self.parse_section3(url, 'http://www.mingpaotor.com/') if articles: feeds.append((title, articles)) - else: - for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'), - (u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'), - (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]: - articles = self.parse_section(url) - if articles: - feeds.append((title, articles)) - - # special- editorial - ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr') - if ed_articles: - feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles)) - - for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'), - (u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'), - (u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm')]: - articles = self.parse_section(url) - if articles: - feeds.append((title, articles)) - - # special - finance - #fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm') - fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea') - if fin_articles: - feeds.append((u'\u7d93\u6fdf Finance', fin_articles)) - - for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'), - (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]: - articles = self.parse_section(url) - if articles: - feeds.append((title, articles)) - - # special - entertainment - ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm') - if ent_articles: - feeds.append((u'\u5f71\u8996 Film/TV', ent_articles)) - - for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'), - (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]: - articles = self.parse_section(url) - if articles: - feeds.append((title, articles)) - - - # special- columns - col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn') - if col_articles: - feeds.append((u'\u5c08\u6b04 Columns', col_articles)) - return feeds # parse from news.mingpao.com @@ -256,11 +350,30 @@ class MPHKRecipe(BasicNewsRecipe): title = self.tag_to_string(i) url = 'http://life.mingpao.com/cfm/' + i.get('href', False) if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1): + url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article current_articles.append({'title': title, 'url': url, 'description': ''}) included_urls.append(url) current_articles.reverse() return current_articles + # parse from www.mingpaovan.com + def parse_section3(self, url, baseUrl): + self.get_fetchdate() + soup = self.index_to_soup(url) + divs = soup.findAll(attrs={'class': ['ListContentLargeLink']}) + current_articles = [] + included_urls = [] + divs.reverse() + for i in divs: + title = self.tag_to_string(i) + urlstr = i.get('href', False) + urlstr = baseUrl + '/' + urlstr.replace('../../../', '') + if urlstr not in included_urls: + current_articles.append({'title': title, 'url': urlstr, 'description': '', 'date': ''}) + included_urls.append(urlstr) + current_articles.reverse() + return current_articles + def parse_ed_section(self, url): self.get_fetchdate() soup = self.index_to_soup(url) @@ -338,7 +451,12 @@ class MPHKRecipe(BasicNewsRecipe): if dir is None: dir = self.output_dir if __UseChineseTitle__ == True: - title = u'\u660e\u5831 (\u9999\u6e2f)' + if __Region__ == 'Hong Kong': + title = u'\u660e\u5831 (\u9999\u6e2f)' + elif __Region__ == 'Vancouver': + title = u'\u660e\u5831 (\u6eab\u54e5\u83ef)' + elif __Region__ == 'Toronto': + title = u'\u660e\u5831 (\u591a\u502b\u591a)' else: title = self.short_title() # if not generating a periodical, force date to apply in title diff --git a/recipes/ming_pao_toronto.recipe b/recipes/ming_pao_toronto.recipe new file mode 100644 index 0000000000..9f3d7f510c --- /dev/null +++ b/recipes/ming_pao_toronto.recipe @@ -0,0 +1,594 @@ +__license__ = 'GPL v3' +__copyright__ = '2010-2011, Eddie Lau' + +# Region - Hong Kong, Vancouver, Toronto +__Region__ = 'Toronto' +# Users of Kindle 3 with limited system-level CJK support +# please replace the following "True" with "False". +__MakePeriodical__ = True +# Turn below to true if your device supports display of CJK titles +__UseChineseTitle__ = False +# Set it to False if you want to skip images +__KeepImages__ = True +# (HK only) Turn below to true if you wish to use life.mingpao.com as the main article source +__UseLife__ = True + + +''' +Change Log: +2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source + provide options to remove all images in the file +2011/05/12: switch the main parse source to life.mingpao.com, which has more photos on the article pages +2011/03/06: add new articles for finance section, also a new section "Columns" +2011/02/28: rearrange the sections + [Disabled until Kindle has better CJK support and can remember last (section,article) read in Sections & Articles + View] make it the same title if generating a periodical, so past issue will be automatically put into "Past Issues" + folder in Kindle 3 +2011/02/20: skip duplicated links in finance section, put photos which may extend a whole page to the back of the articles + clean up the indentation +2010/12/07: add entertainment section, use newspaper front page as ebook cover, suppress date display in section list + (to avoid wrong date display in case the user generates the ebook in a time zone different from HKT) +2010/11/22: add English section, remove eco-news section which is not updated daily, correct + ordering of articles +2010/11/12: add news image and eco-news section +2010/11/08: add parsing of finance section +2010/11/06: temporary work-around for Kindle device having no capability to display unicode + in section/article list. +2010/10/31: skip repeated articles in section pages +''' + +import os, datetime, re +from calibre.web.feeds.recipes import BasicNewsRecipe +from contextlib import nested +from calibre.ebooks.BeautifulSoup import BeautifulSoup +from calibre.ebooks.metadata.opf2 import OPFCreator +from calibre.ebooks.metadata.toc import TOC +from calibre.ebooks.metadata import MetaInformation + +# MAIN CLASS +class MPRecipe(BasicNewsRecipe): + if __Region__ == 'Hong Kong': + title = 'Ming Pao - Hong Kong' + description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)' + category = 'Chinese, News, Hong Kong' + extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}' + masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif' + keep_only_tags = [dict(name='h1'), + dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title + dict(name='font', attrs={'color':['AA0000']}), # for column articles title + dict(attrs={'id':['newscontent']}), # entertainment and column page content + dict(attrs={'id':['newscontent01','newscontent02']}), + dict(attrs={'class':['photo']}), + dict(name='table', attrs={'width':['100%'], 'border':['0'], 'cellspacing':['5'], 'cellpadding':['0']}), # content in printed version of life.mingpao.com + dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}) # images for source from life.mingpao.com + ] + if __KeepImages__: + remove_tags = [dict(name='style'), + dict(attrs={'id':['newscontent135']}), # for the finance page from mpfinance.com + dict(name='font', attrs={'size':['2'], 'color':['666666']}), # article date in life.mingpao.com article + #dict(name='table') # for content fetched from life.mingpao.com + ] + else: + remove_tags = [dict(name='style'), + dict(attrs={'id':['newscontent135']}), # for the finance page from mpfinance.com + dict(name='font', attrs={'size':['2'], 'color':['666666']}), # article date in life.mingpao.com article + dict(name='img'), + #dict(name='table') # for content fetched from life.mingpao.com + ] + remove_attributes = ['width'] + preprocess_regexps = [ + (re.compile(r'
', re.DOTALL|re.IGNORECASE), + lambda match: '

'), + (re.compile(r'

', re.DOTALL|re.IGNORECASE), + lambda match: ''), + (re.compile(r'

', re.DOTALL|re.IGNORECASE), # for entertainment page + lambda match: ''), + # skip
after title in life.mingpao.com fetched article + (re.compile(r"

", re.DOTALL|re.IGNORECASE), + lambda match: "
"), + (re.compile(r"

", re.DOTALL|re.IGNORECASE), + lambda match: "") + ] + elif __Region__ == 'Vancouver': + title = 'Ming Pao - Vancouver' + description = 'Vancouver Chinese Newspaper (http://www.mingpaovan.com)' + category = 'Chinese, News, Vancouver' + extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}' + masthead_url = 'http://www.mingpaovan.com/image/mainlogo2_VAN2.gif' + keep_only_tags = [dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['1']}), + dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['3'], 'cellpadding':['3'], 'id':['tblContent3']}), + dict(name='table', attrs={'width':['180'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['0'], 'bgcolor':['F0F0F0']}), + ] + if __KeepImages__: + remove_tags = [dict(name='img', attrs={'src':['../../../image/magnifier.gif']})] # the magnifier icon + else: + remove_tags = [dict(name='img')] + remove_attributes = ['width'] + preprocess_regexps = [(re.compile(r' ', re.DOTALL|re.IGNORECASE), + lambda match: ''), + ] + elif __Region__ == 'Toronto': + title = 'Ming Pao - Toronto' + description = 'Toronto Chinese Newspaper (http://www.mingpaotor.com)' + category = 'Chinese, News, Toronto' + extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}' + masthead_url = 'http://www.mingpaotor.com/image/mainlogo2_TOR2.gif' + keep_only_tags = [dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['1']}), + dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['3'], 'cellpadding':['3'], 'id':['tblContent3']}), + dict(name='table', attrs={'width':['180'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['0'], 'bgcolor':['F0F0F0']}), + ] + if __KeepImages__: + remove_tags = [dict(name='img', attrs={'src':['../../../image/magnifier.gif']})] # the magnifier icon + else: + remove_tags = [dict(name='img')] + remove_attributes = ['width'] + preprocess_regexps = [(re.compile(r' ', re.DOTALL|re.IGNORECASE), + lambda match: ''), + ] + + oldest_article = 1 + max_articles_per_feed = 100 + __author__ = 'Eddie Lau' + publisher = 'MingPao' + remove_javascript = True + use_embedded_content = False + no_stylesheets = True + language = 'zh' + encoding = 'Big5-HKSCS' + recursions = 0 + conversion_options = {'linearize_tables':True} + timefmt = '' + + def image_url_processor(cls, baseurl, url): + # trick: break the url at the first occurance of digit, add an additional + # '_' at the front + # not working, may need to move this to preprocess_html() method +# minIdx = 10000 +# i0 = url.find('0') +# if i0 >= 0 and i0 < minIdx: +# minIdx = i0 +# i1 = url.find('1') +# if i1 >= 0 and i1 < minIdx: +# minIdx = i1 +# i2 = url.find('2') +# if i2 >= 0 and i2 < minIdx: +# minIdx = i2 +# i3 = url.find('3') +# if i3 >= 0 and i0 < minIdx: +# minIdx = i3 +# i4 = url.find('4') +# if i4 >= 0 and i4 < minIdx: +# minIdx = i4 +# i5 = url.find('5') +# if i5 >= 0 and i5 < minIdx: +# minIdx = i5 +# i6 = url.find('6') +# if i6 >= 0 and i6 < minIdx: +# minIdx = i6 +# i7 = url.find('7') +# if i7 >= 0 and i7 < minIdx: +# minIdx = i7 +# i8 = url.find('8') +# if i8 >= 0 and i8 < minIdx: +# minIdx = i8 +# i9 = url.find('9') +# if i9 >= 0 and i9 < minIdx: +# minIdx = i9 + return url + + def get_dtlocal(self): + dt_utc = datetime.datetime.utcnow() + if __Region__ == 'Hong Kong': + # convert UTC to local hk time - at HKT 5.30am, all news are available + dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(5.5/24) + # dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(5.5/24) + elif __Region__ == 'Vancouver': + # convert UTC to local Vancouver time - at PST time 5.30am, all news are available + dt_local = dt_utc + datetime.timedelta(-8.0/24) - datetime.timedelta(5.5/24) + #dt_local = dt_utc.astimezone(pytz.timezone('America/Vancouver')) - datetime.timedelta(5.5/24) + elif __Region__ == 'Toronto': + # convert UTC to local Toronto time - at EST time 8.30am, all news are available + dt_local = dt_utc + datetime.timedelta(-5.0/24) - datetime.timedelta(8.5/24) + #dt_local = dt_utc.astimezone(pytz.timezone('America/Toronto')) - datetime.timedelta(8.5/24) + return dt_local + + def get_fetchdate(self): + return self.get_dtlocal().strftime("%Y%m%d") + + def get_fetchformatteddate(self): + return self.get_dtlocal().strftime("%Y-%m-%d") + + def get_fetchday(self): + return self.get_dtlocal().strftime("%d") + + def get_cover_url(self): + if __Region__ == 'Hong Kong': + cover = 'http://news.mingpao.com/' + self.get_fetchdate() + '/' + self.get_fetchdate() + '_' + self.get_fetchday() + 'gacov.jpg' + elif __Region__ == 'Vancouver': + cover = 'http://www.mingpaovan.com/ftp/News/' + self.get_fetchdate() + '/' + self.get_fetchday() + 'pgva1s.jpg' + elif __Region__ == 'Toronto': + cover = 'http://www.mingpaotor.com/ftp/News/' + self.get_fetchdate() + '/' + self.get_fetchday() + 'pgtas.jpg' + br = BasicNewsRecipe.get_browser() + try: + br.open(cover) + except: + cover = None + return cover + + def parse_index(self): + feeds = [] + dateStr = self.get_fetchdate() + + if __Region__ == 'Hong Kong': + if __UseLife__: + for title, url, keystr in [(u'\u8981\u805e Headline', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalga', 'nal'), + (u'\u6e2f\u805e Local', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgb', 'nal'), + (u'\u6559\u80b2 Education', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgf', 'nal'), + (u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr', 'nal'), + (u'\u8ad6\u58c7 Forum', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalfa', 'nal'), + (u'\u4e2d\u570b China', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalca', 'nal'), + (u'\u570b\u969b World', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalta', 'nal'), + (u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal'), + (u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'), + (u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal'), + (u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')]: + articles = self.parse_section2(url, keystr) + if articles: + feeds.append((title, articles)) + + for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'), + (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]: + articles = self.parse_section(url) + if articles: + feeds.append((title, articles)) + else: + for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'), + (u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'), + (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]: + articles = self.parse_section(url) + if articles: + feeds.append((title, articles)) + + # special- editorial + ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr') + if ed_articles: + feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles)) + + for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'), + (u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'), + (u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm')]: + articles = self.parse_section(url) + if articles: + feeds.append((title, articles)) + + # special - finance + #fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm') + fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea') + if fin_articles: + feeds.append((u'\u7d93\u6fdf Finance', fin_articles)) + + for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'), + (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]: + articles = self.parse_section(url) + if articles: + feeds.append((title, articles)) + + # special - entertainment + ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm') + if ent_articles: + feeds.append((u'\u5f71\u8996 Film/TV', ent_articles)) + + for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'), + (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]: + articles = self.parse_section(url) + if articles: + feeds.append((title, articles)) + + + # special- columns + col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn') + if col_articles: + feeds.append((u'\u5c08\u6b04 Columns', col_articles)) + elif __Region__ == 'Vancouver': + for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VAindex.htm'), + (u'\u52a0\u570b Canada', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VBindex.htm'), + (u'\u793e\u5340 Local', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VDindex.htm'), + (u'\u6e2f\u805e Hong Kong', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/HK-VGindex.htm'), + (u'\u570b\u969b World', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VTindex.htm'), + (u'\u4e2d\u570b China', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VCindex.htm'), + (u'\u7d93\u6fdf Economics', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VEindex.htm'), + (u'\u9ad4\u80b2 Sports', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VSindex.htm'), + (u'\u5f71\u8996 Film/TV', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/HK-MAindex.htm'), + (u'\u526f\u520a Supplements', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/WWindex.htm'),]: + articles = self.parse_section3(url, 'http://www.mingpaovan.com/') + if articles: + feeds.append((title, articles)) + elif __Region__ == 'Toronto': + for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TAindex.htm'), + (u'\u52a0\u570b Canada', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TDindex.htm'), + (u'\u793e\u5340 Local', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TFindex.htm'), + (u'\u4e2d\u570b China', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TCAindex.htm'), + (u'\u570b\u969b World', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TTAindex.htm'), + (u'\u6e2f\u805e Hong Kong', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/HK-GAindex.htm'), + (u'\u7d93\u6fdf Economics', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/THindex.htm'), + (u'\u9ad4\u80b2 Sports', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TSindex.htm'), + (u'\u5f71\u8996 Film/TV', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/HK-MAindex.htm'), + (u'\u526f\u520a Supplements', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/WWindex.htm'),]: + articles = self.parse_section3(url, 'http://www.mingpaotor.com/') + if articles: + feeds.append((title, articles)) + return feeds + + # parse from news.mingpao.com + def parse_section(self, url): + dateStr = self.get_fetchdate() + soup = self.index_to_soup(url) + divs = soup.findAll(attrs={'class': ['bullet','bullet_grey']}) + current_articles = [] + included_urls = [] + divs.reverse() + for i in divs: + a = i.find('a', href = True) + title = self.tag_to_string(a) + url = a.get('href', False) + url = 'http://news.mingpao.com/' + dateStr + '/' +url + if url not in included_urls and url.rfind('Redirect') == -1: + current_articles.append({'title': title, 'url': url, 'description':'', 'date':''}) + included_urls.append(url) + current_articles.reverse() + return current_articles + + # parse from life.mingpao.com + def parse_section2(self, url, keystr): + self.get_fetchdate() + soup = self.index_to_soup(url) + a = soup.findAll('a', href=True) + a.reverse() + current_articles = [] + included_urls = [] + for i in a: + title = self.tag_to_string(i) + url = 'http://life.mingpao.com/cfm/' + i.get('href', False) + if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1): + url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article + current_articles.append({'title': title, 'url': url, 'description': ''}) + included_urls.append(url) + current_articles.reverse() + return current_articles + + # parse from www.mingpaovan.com + def parse_section3(self, url, baseUrl): + self.get_fetchdate() + soup = self.index_to_soup(url) + divs = soup.findAll(attrs={'class': ['ListContentLargeLink']}) + current_articles = [] + included_urls = [] + divs.reverse() + for i in divs: + title = self.tag_to_string(i) + urlstr = i.get('href', False) + urlstr = baseUrl + '/' + urlstr.replace('../../../', '') + if urlstr not in included_urls: + current_articles.append({'title': title, 'url': urlstr, 'description': '', 'date': ''}) + included_urls.append(urlstr) + current_articles.reverse() + return current_articles + + def parse_ed_section(self, url): + self.get_fetchdate() + soup = self.index_to_soup(url) + a = soup.findAll('a', href=True) + a.reverse() + current_articles = [] + included_urls = [] + for i in a: + title = self.tag_to_string(i) + url = 'http://life.mingpao.com/cfm/' + i.get('href', False) + if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('nal') == -1): + current_articles.append({'title': title, 'url': url, 'description': ''}) + included_urls.append(url) + current_articles.reverse() + return current_articles + + def parse_fin_section(self, url): + self.get_fetchdate() + soup = self.index_to_soup(url) + a = soup.findAll('a', href= True) + current_articles = [] + included_urls = [] + for i in a: + #url = 'http://www.mpfinance.com/cfm/' + i.get('href', False) + url = 'http://life.mingpao.com/cfm/' + i.get('href', False) + #if url not in included_urls and not url.rfind(dateStr) == -1 and url.rfind('index') == -1: + if url not in included_urls and (not url.rfind('txt') == -1) and (not url.rfind('nal') == -1): + title = self.tag_to_string(i) + current_articles.append({'title': title, 'url': url, 'description':''}) + included_urls.append(url) + return current_articles + + def parse_ent_section(self, url): + self.get_fetchdate() + soup = self.index_to_soup(url) + a = soup.findAll('a', href=True) + a.reverse() + current_articles = [] + included_urls = [] + for i in a: + title = self.tag_to_string(i) + url = 'http://ol.mingpao.com/cfm/' + i.get('href', False) + if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('star') == -1): + current_articles.append({'title': title, 'url': url, 'description': ''}) + included_urls.append(url) + current_articles.reverse() + return current_articles + + def parse_col_section(self, url): + self.get_fetchdate() + soup = self.index_to_soup(url) + a = soup.findAll('a', href=True) + a.reverse() + current_articles = [] + included_urls = [] + for i in a: + title = self.tag_to_string(i) + url = 'http://life.mingpao.com/cfm/' + i.get('href', False) + if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('ncl') == -1): + current_articles.append({'title': title, 'url': url, 'description': ''}) + included_urls.append(url) + current_articles.reverse() + return current_articles + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + for item in soup.findAll(style=True): + del item['width'] + for item in soup.findAll(stype=True): + del item['absmiddle'] + return soup + + def create_opf(self, feeds, dir=None): + if dir is None: + dir = self.output_dir + if __UseChineseTitle__ == True: + if __Region__ == 'Hong Kong': + title = u'\u660e\u5831 (\u9999\u6e2f)' + elif __Region__ == 'Vancouver': + title = u'\u660e\u5831 (\u6eab\u54e5\u83ef)' + elif __Region__ == 'Toronto': + title = u'\u660e\u5831 (\u591a\u502b\u591a)' + else: + title = self.short_title() + # if not generating a periodical, force date to apply in title + if __MakePeriodical__ == False: + title = title + ' ' + self.get_fetchformatteddate() + if True: + mi = MetaInformation(title, [self.publisher]) + mi.publisher = self.publisher + mi.author_sort = self.publisher + if __MakePeriodical__ == True: + mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title() + else: + mi.publication_type = self.publication_type+':'+self.short_title() + #mi.timestamp = nowf() + mi.timestamp = self.get_dtlocal() + mi.comments = self.description + if not isinstance(mi.comments, unicode): + mi.comments = mi.comments.decode('utf-8', 'replace') + #mi.pubdate = nowf() + mi.pubdate = self.get_dtlocal() + opf_path = os.path.join(dir, 'index.opf') + ncx_path = os.path.join(dir, 'index.ncx') + opf = OPFCreator(dir, mi) + # Add mastheadImage entry to section + mp = getattr(self, 'masthead_path', None) + if mp is not None and os.access(mp, os.R_OK): + from calibre.ebooks.metadata.opf2 import Guide + ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu()) + ref.type = 'masthead' + ref.title = 'Masthead Image' + opf.guide.append(ref) + + manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))] + manifest.append(os.path.join(dir, 'index.html')) + manifest.append(os.path.join(dir, 'index.ncx')) + + # Get cover + cpath = getattr(self, 'cover_path', None) + if cpath is None: + pf = open(os.path.join(dir, 'cover.jpg'), 'wb') + if self.default_cover(pf): + cpath = pf.name + if cpath is not None and os.access(cpath, os.R_OK): + opf.cover = cpath + manifest.append(cpath) + + # Get masthead + mpath = getattr(self, 'masthead_path', None) + if mpath is not None and os.access(mpath, os.R_OK): + manifest.append(mpath) + + opf.create_manifest_from_files_in(manifest) + for mani in opf.manifest: + if mani.path.endswith('.ncx'): + mani.id = 'ncx' + if mani.path.endswith('mastheadImage.jpg'): + mani.id = 'masthead-image' + entries = ['index.html'] + toc = TOC(base_path=dir) + self.play_order_counter = 0 + self.play_order_map = {} + + def feed_index(num, parent): + f = feeds[num] + for j, a in enumerate(f): + if getattr(a, 'downloaded', False): + adir = 'feed_%d/article_%d/'%(num, j) + auth = a.author + if not auth: + auth = None + desc = a.text_summary + if not desc: + desc = None + else: + desc = self.description_limiter(desc) + entries.append('%sindex.html'%adir) + po = self.play_order_map.get(entries[-1], None) + if po is None: + self.play_order_counter += 1 + po = self.play_order_counter + parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'), + play_order=po, author=auth, description=desc) + last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep)) + for sp in a.sub_pages: + prefix = os.path.commonprefix([opf_path, sp]) + relp = sp[len(prefix):] + entries.append(relp.replace(os.sep, '/')) + last = sp + + if os.path.exists(last): + with open(last, 'rb') as fi: + src = fi.read().decode('utf-8') + soup = BeautifulSoup(src) + body = soup.find('body') + if body is not None: + prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last)))) + templ = self.navbar.generate(True, num, j, len(f), + not self.has_single_feed, + a.orig_url, self.publisher, prefix=prefix, + center=self.center_navbar) + elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div') + body.insert(len(body.contents), elem) + with open(last, 'wb') as fi: + fi.write(unicode(soup).encode('utf-8')) + if len(feeds) == 0: + raise Exception('All feeds are empty, aborting.') + + if len(feeds) > 1: + for i, f in enumerate(feeds): + entries.append('feed_%d/index.html'%i) + po = self.play_order_map.get(entries[-1], None) + if po is None: + self.play_order_counter += 1 + po = self.play_order_counter + auth = getattr(f, 'author', None) + if not auth: + auth = None + desc = getattr(f, 'description', None) + if not desc: + desc = None + feed_index(i, toc.add_item('feed_%d/index.html'%i, None, + f.title, play_order=po, description=desc, author=auth)) + + else: + entries.append('feed_%d/index.html'%0) + feed_index(0, toc) + + for i, p in enumerate(entries): + entries[i] = os.path.join(dir, p.replace('/', os.sep)) + opf.create_spine(entries) + opf.set_toc(toc) + + with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file): + opf.render(opf_file, ncx_file) + diff --git a/recipes/ming_pao_vancouver.recipe b/recipes/ming_pao_vancouver.recipe new file mode 100644 index 0000000000..3b13211d01 --- /dev/null +++ b/recipes/ming_pao_vancouver.recipe @@ -0,0 +1,594 @@ +__license__ = 'GPL v3' +__copyright__ = '2010-2011, Eddie Lau' + +# Region - Hong Kong, Vancouver, Toronto +__Region__ = 'Vancouver' +# Users of Kindle 3 with limited system-level CJK support +# please replace the following "True" with "False". +__MakePeriodical__ = True +# Turn below to true if your device supports display of CJK titles +__UseChineseTitle__ = False +# Set it to False if you want to skip images +__KeepImages__ = True +# (HK only) Turn below to true if you wish to use life.mingpao.com as the main article source +__UseLife__ = True + + +''' +Change Log: +2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source + provide options to remove all images in the file +2011/05/12: switch the main parse source to life.mingpao.com, which has more photos on the article pages +2011/03/06: add new articles for finance section, also a new section "Columns" +2011/02/28: rearrange the sections + [Disabled until Kindle has better CJK support and can remember last (section,article) read in Sections & Articles + View] make it the same title if generating a periodical, so past issue will be automatically put into "Past Issues" + folder in Kindle 3 +2011/02/20: skip duplicated links in finance section, put photos which may extend a whole page to the back of the articles + clean up the indentation +2010/12/07: add entertainment section, use newspaper front page as ebook cover, suppress date display in section list + (to avoid wrong date display in case the user generates the ebook in a time zone different from HKT) +2010/11/22: add English section, remove eco-news section which is not updated daily, correct + ordering of articles +2010/11/12: add news image and eco-news section +2010/11/08: add parsing of finance section +2010/11/06: temporary work-around for Kindle device having no capability to display unicode + in section/article list. +2010/10/31: skip repeated articles in section pages +''' + +import os, datetime, re +from calibre.web.feeds.recipes import BasicNewsRecipe +from contextlib import nested +from calibre.ebooks.BeautifulSoup import BeautifulSoup +from calibre.ebooks.metadata.opf2 import OPFCreator +from calibre.ebooks.metadata.toc import TOC +from calibre.ebooks.metadata import MetaInformation + +# MAIN CLASS +class MPRecipe(BasicNewsRecipe): + if __Region__ == 'Hong Kong': + title = 'Ming Pao - Hong Kong' + description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)' + category = 'Chinese, News, Hong Kong' + extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}' + masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif' + keep_only_tags = [dict(name='h1'), + dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title + dict(name='font', attrs={'color':['AA0000']}), # for column articles title + dict(attrs={'id':['newscontent']}), # entertainment and column page content + dict(attrs={'id':['newscontent01','newscontent02']}), + dict(attrs={'class':['photo']}), + dict(name='table', attrs={'width':['100%'], 'border':['0'], 'cellspacing':['5'], 'cellpadding':['0']}), # content in printed version of life.mingpao.com + dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}) # images for source from life.mingpao.com + ] + if __KeepImages__: + remove_tags = [dict(name='style'), + dict(attrs={'id':['newscontent135']}), # for the finance page from mpfinance.com + dict(name='font', attrs={'size':['2'], 'color':['666666']}), # article date in life.mingpao.com article + #dict(name='table') # for content fetched from life.mingpao.com + ] + else: + remove_tags = [dict(name='style'), + dict(attrs={'id':['newscontent135']}), # for the finance page from mpfinance.com + dict(name='font', attrs={'size':['2'], 'color':['666666']}), # article date in life.mingpao.com article + dict(name='img'), + #dict(name='table') # for content fetched from life.mingpao.com + ] + remove_attributes = ['width'] + preprocess_regexps = [ + (re.compile(r'
', re.DOTALL|re.IGNORECASE), + lambda match: '

'), + (re.compile(r'

', re.DOTALL|re.IGNORECASE), + lambda match: ''), + (re.compile(r'

', re.DOTALL|re.IGNORECASE), # for entertainment page + lambda match: ''), + # skip
after title in life.mingpao.com fetched article + (re.compile(r"

", re.DOTALL|re.IGNORECASE), + lambda match: "
"), + (re.compile(r"

", re.DOTALL|re.IGNORECASE), + lambda match: "") + ] + elif __Region__ == 'Vancouver': + title = 'Ming Pao - Vancouver' + description = 'Vancouver Chinese Newspaper (http://www.mingpaovan.com)' + category = 'Chinese, News, Vancouver' + extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}' + masthead_url = 'http://www.mingpaovan.com/image/mainlogo2_VAN2.gif' + keep_only_tags = [dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['1']}), + dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['3'], 'cellpadding':['3'], 'id':['tblContent3']}), + dict(name='table', attrs={'width':['180'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['0'], 'bgcolor':['F0F0F0']}), + ] + if __KeepImages__: + remove_tags = [dict(name='img', attrs={'src':['../../../image/magnifier.gif']})] # the magnifier icon + else: + remove_tags = [dict(name='img')] + remove_attributes = ['width'] + preprocess_regexps = [(re.compile(r' ', re.DOTALL|re.IGNORECASE), + lambda match: ''), + ] + elif __Region__ == 'Toronto': + title = 'Ming Pao - Toronto' + description = 'Toronto Chinese Newspaper (http://www.mingpaotor.com)' + category = 'Chinese, News, Toronto' + extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}' + masthead_url = 'http://www.mingpaotor.com/image/mainlogo2_TOR2.gif' + keep_only_tags = [dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['1']}), + dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['3'], 'cellpadding':['3'], 'id':['tblContent3']}), + dict(name='table', attrs={'width':['180'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['0'], 'bgcolor':['F0F0F0']}), + ] + if __KeepImages__: + remove_tags = [dict(name='img', attrs={'src':['../../../image/magnifier.gif']})] # the magnifier icon + else: + remove_tags = [dict(name='img')] + remove_attributes = ['width'] + preprocess_regexps = [(re.compile(r' ', re.DOTALL|re.IGNORECASE), + lambda match: ''), + ] + + oldest_article = 1 + max_articles_per_feed = 100 + __author__ = 'Eddie Lau' + publisher = 'MingPao' + remove_javascript = True + use_embedded_content = False + no_stylesheets = True + language = 'zh' + encoding = 'Big5-HKSCS' + recursions = 0 + conversion_options = {'linearize_tables':True} + timefmt = '' + + def image_url_processor(cls, baseurl, url): + # trick: break the url at the first occurance of digit, add an additional + # '_' at the front + # not working, may need to move this to preprocess_html() method +# minIdx = 10000 +# i0 = url.find('0') +# if i0 >= 0 and i0 < minIdx: +# minIdx = i0 +# i1 = url.find('1') +# if i1 >= 0 and i1 < minIdx: +# minIdx = i1 +# i2 = url.find('2') +# if i2 >= 0 and i2 < minIdx: +# minIdx = i2 +# i3 = url.find('3') +# if i3 >= 0 and i0 < minIdx: +# minIdx = i3 +# i4 = url.find('4') +# if i4 >= 0 and i4 < minIdx: +# minIdx = i4 +# i5 = url.find('5') +# if i5 >= 0 and i5 < minIdx: +# minIdx = i5 +# i6 = url.find('6') +# if i6 >= 0 and i6 < minIdx: +# minIdx = i6 +# i7 = url.find('7') +# if i7 >= 0 and i7 < minIdx: +# minIdx = i7 +# i8 = url.find('8') +# if i8 >= 0 and i8 < minIdx: +# minIdx = i8 +# i9 = url.find('9') +# if i9 >= 0 and i9 < minIdx: +# minIdx = i9 + return url + + def get_dtlocal(self): + dt_utc = datetime.datetime.utcnow() + if __Region__ == 'Hong Kong': + # convert UTC to local hk time - at HKT 5.30am, all news are available + dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(5.5/24) + # dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(5.5/24) + elif __Region__ == 'Vancouver': + # convert UTC to local Vancouver time - at PST time 5.30am, all news are available + dt_local = dt_utc + datetime.timedelta(-8.0/24) - datetime.timedelta(5.5/24) + #dt_local = dt_utc.astimezone(pytz.timezone('America/Vancouver')) - datetime.timedelta(5.5/24) + elif __Region__ == 'Toronto': + # convert UTC to local Toronto time - at EST time 8.30am, all news are available + dt_local = dt_utc + datetime.timedelta(-5.0/24) - datetime.timedelta(8.5/24) + #dt_local = dt_utc.astimezone(pytz.timezone('America/Toronto')) - datetime.timedelta(8.5/24) + return dt_local + + def get_fetchdate(self): + return self.get_dtlocal().strftime("%Y%m%d") + + def get_fetchformatteddate(self): + return self.get_dtlocal().strftime("%Y-%m-%d") + + def get_fetchday(self): + return self.get_dtlocal().strftime("%d") + + def get_cover_url(self): + if __Region__ == 'Hong Kong': + cover = 'http://news.mingpao.com/' + self.get_fetchdate() + '/' + self.get_fetchdate() + '_' + self.get_fetchday() + 'gacov.jpg' + elif __Region__ == 'Vancouver': + cover = 'http://www.mingpaovan.com/ftp/News/' + self.get_fetchdate() + '/' + self.get_fetchday() + 'pgva1s.jpg' + elif __Region__ == 'Toronto': + cover = 'http://www.mingpaotor.com/ftp/News/' + self.get_fetchdate() + '/' + self.get_fetchday() + 'pgtas.jpg' + br = BasicNewsRecipe.get_browser() + try: + br.open(cover) + except: + cover = None + return cover + + def parse_index(self): + feeds = [] + dateStr = self.get_fetchdate() + + if __Region__ == 'Hong Kong': + if __UseLife__: + for title, url, keystr in [(u'\u8981\u805e Headline', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalga', 'nal'), + (u'\u6e2f\u805e Local', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgb', 'nal'), + (u'\u6559\u80b2 Education', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgf', 'nal'), + (u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr', 'nal'), + (u'\u8ad6\u58c7 Forum', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalfa', 'nal'), + (u'\u4e2d\u570b China', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalca', 'nal'), + (u'\u570b\u969b World', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalta', 'nal'), + (u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal'), + (u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'), + (u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal'), + (u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')]: + articles = self.parse_section2(url, keystr) + if articles: + feeds.append((title, articles)) + + for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'), + (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]: + articles = self.parse_section(url) + if articles: + feeds.append((title, articles)) + else: + for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'), + (u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'), + (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]: + articles = self.parse_section(url) + if articles: + feeds.append((title, articles)) + + # special- editorial + ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr') + if ed_articles: + feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles)) + + for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'), + (u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'), + (u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm')]: + articles = self.parse_section(url) + if articles: + feeds.append((title, articles)) + + # special - finance + #fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm') + fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea') + if fin_articles: + feeds.append((u'\u7d93\u6fdf Finance', fin_articles)) + + for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'), + (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]: + articles = self.parse_section(url) + if articles: + feeds.append((title, articles)) + + # special - entertainment + ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm') + if ent_articles: + feeds.append((u'\u5f71\u8996 Film/TV', ent_articles)) + + for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'), + (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]: + articles = self.parse_section(url) + if articles: + feeds.append((title, articles)) + + + # special- columns + col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn') + if col_articles: + feeds.append((u'\u5c08\u6b04 Columns', col_articles)) + elif __Region__ == 'Vancouver': + for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VAindex.htm'), + (u'\u52a0\u570b Canada', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VBindex.htm'), + (u'\u793e\u5340 Local', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VDindex.htm'), + (u'\u6e2f\u805e Hong Kong', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/HK-VGindex.htm'), + (u'\u570b\u969b World', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VTindex.htm'), + (u'\u4e2d\u570b China', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VCindex.htm'), + (u'\u7d93\u6fdf Economics', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VEindex.htm'), + (u'\u9ad4\u80b2 Sports', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VSindex.htm'), + (u'\u5f71\u8996 Film/TV', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/HK-MAindex.htm'), + (u'\u526f\u520a Supplements', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/WWindex.htm'),]: + articles = self.parse_section3(url, 'http://www.mingpaovan.com/') + if articles: + feeds.append((title, articles)) + elif __Region__ == 'Toronto': + for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TAindex.htm'), + (u'\u52a0\u570b Canada', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TDindex.htm'), + (u'\u793e\u5340 Local', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TFindex.htm'), + (u'\u4e2d\u570b China', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TCAindex.htm'), + (u'\u570b\u969b World', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TTAindex.htm'), + (u'\u6e2f\u805e Hong Kong', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/HK-GAindex.htm'), + (u'\u7d93\u6fdf Economics', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/THindex.htm'), + (u'\u9ad4\u80b2 Sports', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TSindex.htm'), + (u'\u5f71\u8996 Film/TV', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/HK-MAindex.htm'), + (u'\u526f\u520a Supplements', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/WWindex.htm'),]: + articles = self.parse_section3(url, 'http://www.mingpaotor.com/') + if articles: + feeds.append((title, articles)) + return feeds + + # parse from news.mingpao.com + def parse_section(self, url): + dateStr = self.get_fetchdate() + soup = self.index_to_soup(url) + divs = soup.findAll(attrs={'class': ['bullet','bullet_grey']}) + current_articles = [] + included_urls = [] + divs.reverse() + for i in divs: + a = i.find('a', href = True) + title = self.tag_to_string(a) + url = a.get('href', False) + url = 'http://news.mingpao.com/' + dateStr + '/' +url + if url not in included_urls and url.rfind('Redirect') == -1: + current_articles.append({'title': title, 'url': url, 'description':'', 'date':''}) + included_urls.append(url) + current_articles.reverse() + return current_articles + + # parse from life.mingpao.com + def parse_section2(self, url, keystr): + self.get_fetchdate() + soup = self.index_to_soup(url) + a = soup.findAll('a', href=True) + a.reverse() + current_articles = [] + included_urls = [] + for i in a: + title = self.tag_to_string(i) + url = 'http://life.mingpao.com/cfm/' + i.get('href', False) + if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1): + url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article + current_articles.append({'title': title, 'url': url, 'description': ''}) + included_urls.append(url) + current_articles.reverse() + return current_articles + + # parse from www.mingpaovan.com + def parse_section3(self, url, baseUrl): + self.get_fetchdate() + soup = self.index_to_soup(url) + divs = soup.findAll(attrs={'class': ['ListContentLargeLink']}) + current_articles = [] + included_urls = [] + divs.reverse() + for i in divs: + title = self.tag_to_string(i) + urlstr = i.get('href', False) + urlstr = baseUrl + '/' + urlstr.replace('../../../', '') + if urlstr not in included_urls: + current_articles.append({'title': title, 'url': urlstr, 'description': '', 'date': ''}) + included_urls.append(urlstr) + current_articles.reverse() + return current_articles + + def parse_ed_section(self, url): + self.get_fetchdate() + soup = self.index_to_soup(url) + a = soup.findAll('a', href=True) + a.reverse() + current_articles = [] + included_urls = [] + for i in a: + title = self.tag_to_string(i) + url = 'http://life.mingpao.com/cfm/' + i.get('href', False) + if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('nal') == -1): + current_articles.append({'title': title, 'url': url, 'description': ''}) + included_urls.append(url) + current_articles.reverse() + return current_articles + + def parse_fin_section(self, url): + self.get_fetchdate() + soup = self.index_to_soup(url) + a = soup.findAll('a', href= True) + current_articles = [] + included_urls = [] + for i in a: + #url = 'http://www.mpfinance.com/cfm/' + i.get('href', False) + url = 'http://life.mingpao.com/cfm/' + i.get('href', False) + #if url not in included_urls and not url.rfind(dateStr) == -1 and url.rfind('index') == -1: + if url not in included_urls and (not url.rfind('txt') == -1) and (not url.rfind('nal') == -1): + title = self.tag_to_string(i) + current_articles.append({'title': title, 'url': url, 'description':''}) + included_urls.append(url) + return current_articles + + def parse_ent_section(self, url): + self.get_fetchdate() + soup = self.index_to_soup(url) + a = soup.findAll('a', href=True) + a.reverse() + current_articles = [] + included_urls = [] + for i in a: + title = self.tag_to_string(i) + url = 'http://ol.mingpao.com/cfm/' + i.get('href', False) + if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('star') == -1): + current_articles.append({'title': title, 'url': url, 'description': ''}) + included_urls.append(url) + current_articles.reverse() + return current_articles + + def parse_col_section(self, url): + self.get_fetchdate() + soup = self.index_to_soup(url) + a = soup.findAll('a', href=True) + a.reverse() + current_articles = [] + included_urls = [] + for i in a: + title = self.tag_to_string(i) + url = 'http://life.mingpao.com/cfm/' + i.get('href', False) + if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('ncl') == -1): + current_articles.append({'title': title, 'url': url, 'description': ''}) + included_urls.append(url) + current_articles.reverse() + return current_articles + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + for item in soup.findAll(style=True): + del item['width'] + for item in soup.findAll(stype=True): + del item['absmiddle'] + return soup + + def create_opf(self, feeds, dir=None): + if dir is None: + dir = self.output_dir + if __UseChineseTitle__ == True: + if __Region__ == 'Hong Kong': + title = u'\u660e\u5831 (\u9999\u6e2f)' + elif __Region__ == 'Vancouver': + title = u'\u660e\u5831 (\u6eab\u54e5\u83ef)' + elif __Region__ == 'Toronto': + title = u'\u660e\u5831 (\u591a\u502b\u591a)' + else: + title = self.short_title() + # if not generating a periodical, force date to apply in title + if __MakePeriodical__ == False: + title = title + ' ' + self.get_fetchformatteddate() + if True: + mi = MetaInformation(title, [self.publisher]) + mi.publisher = self.publisher + mi.author_sort = self.publisher + if __MakePeriodical__ == True: + mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title() + else: + mi.publication_type = self.publication_type+':'+self.short_title() + #mi.timestamp = nowf() + mi.timestamp = self.get_dtlocal() + mi.comments = self.description + if not isinstance(mi.comments, unicode): + mi.comments = mi.comments.decode('utf-8', 'replace') + #mi.pubdate = nowf() + mi.pubdate = self.get_dtlocal() + opf_path = os.path.join(dir, 'index.opf') + ncx_path = os.path.join(dir, 'index.ncx') + opf = OPFCreator(dir, mi) + # Add mastheadImage entry to section + mp = getattr(self, 'masthead_path', None) + if mp is not None and os.access(mp, os.R_OK): + from calibre.ebooks.metadata.opf2 import Guide + ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu()) + ref.type = 'masthead' + ref.title = 'Masthead Image' + opf.guide.append(ref) + + manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))] + manifest.append(os.path.join(dir, 'index.html')) + manifest.append(os.path.join(dir, 'index.ncx')) + + # Get cover + cpath = getattr(self, 'cover_path', None) + if cpath is None: + pf = open(os.path.join(dir, 'cover.jpg'), 'wb') + if self.default_cover(pf): + cpath = pf.name + if cpath is not None and os.access(cpath, os.R_OK): + opf.cover = cpath + manifest.append(cpath) + + # Get masthead + mpath = getattr(self, 'masthead_path', None) + if mpath is not None and os.access(mpath, os.R_OK): + manifest.append(mpath) + + opf.create_manifest_from_files_in(manifest) + for mani in opf.manifest: + if mani.path.endswith('.ncx'): + mani.id = 'ncx' + if mani.path.endswith('mastheadImage.jpg'): + mani.id = 'masthead-image' + entries = ['index.html'] + toc = TOC(base_path=dir) + self.play_order_counter = 0 + self.play_order_map = {} + + def feed_index(num, parent): + f = feeds[num] + for j, a in enumerate(f): + if getattr(a, 'downloaded', False): + adir = 'feed_%d/article_%d/'%(num, j) + auth = a.author + if not auth: + auth = None + desc = a.text_summary + if not desc: + desc = None + else: + desc = self.description_limiter(desc) + entries.append('%sindex.html'%adir) + po = self.play_order_map.get(entries[-1], None) + if po is None: + self.play_order_counter += 1 + po = self.play_order_counter + parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'), + play_order=po, author=auth, description=desc) + last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep)) + for sp in a.sub_pages: + prefix = os.path.commonprefix([opf_path, sp]) + relp = sp[len(prefix):] + entries.append(relp.replace(os.sep, '/')) + last = sp + + if os.path.exists(last): + with open(last, 'rb') as fi: + src = fi.read().decode('utf-8') + soup = BeautifulSoup(src) + body = soup.find('body') + if body is not None: + prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last)))) + templ = self.navbar.generate(True, num, j, len(f), + not self.has_single_feed, + a.orig_url, self.publisher, prefix=prefix, + center=self.center_navbar) + elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div') + body.insert(len(body.contents), elem) + with open(last, 'wb') as fi: + fi.write(unicode(soup).encode('utf-8')) + if len(feeds) == 0: + raise Exception('All feeds are empty, aborting.') + + if len(feeds) > 1: + for i, f in enumerate(feeds): + entries.append('feed_%d/index.html'%i) + po = self.play_order_map.get(entries[-1], None) + if po is None: + self.play_order_counter += 1 + po = self.play_order_counter + auth = getattr(f, 'author', None) + if not auth: + auth = None + desc = getattr(f, 'description', None) + if not desc: + desc = None + feed_index(i, toc.add_item('feed_%d/index.html'%i, None, + f.title, play_order=po, description=desc, author=auth)) + + else: + entries.append('feed_%d/index.html'%0) + feed_index(0, toc) + + for i, p in enumerate(entries): + entries[i] = os.path.join(dir, p.replace('/', os.sep)) + opf.create_spine(entries) + opf.set_toc(toc) + + with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file): + opf.render(opf_file, ncx_file) + diff --git a/recipes/wprost.recipe b/recipes/wprost.recipe index b317571981..b271665125 100644 --- a/recipes/wprost.recipe +++ b/recipes/wprost.recipe @@ -2,90 +2,92 @@ __license__ = 'GPL v3' __copyright__ = '2010, matek09, matek09@gmail.com' +__copyright__ = 'Modified 2011, Mariusz Wolek ' from calibre.web.feeds.news import BasicNewsRecipe import re class Wprost(BasicNewsRecipe): - EDITION = 0 - FIND_LAST_FULL_ISSUE = True - EXCLUDE_LOCKED = True - ICO_BLOCKED = 'http://www.wprost.pl/G/icons/ico_blocked.gif' + EDITION = 0 + FIND_LAST_FULL_ISSUE = True + EXCLUDE_LOCKED = True + ICO_BLOCKED = 'http://www.wprost.pl/G/icons/ico_blocked.gif' - title = u'Wprost' - __author__ = 'matek09' - description = 'Weekly magazine' - encoding = 'ISO-8859-2' - no_stylesheets = True - language = 'pl' - remove_javascript = True + title = u'Wprost' + __author__ = 'matek09' + description = 'Weekly magazine' + encoding = 'ISO-8859-2' + no_stylesheets = True + language = 'pl' + remove_javascript = True - remove_tags_before = dict(dict(name = 'div', attrs = {'id' : 'print-layer'})) - remove_tags_after = dict(dict(name = 'div', attrs = {'id' : 'print-layer'})) + remove_tags_before = dict(dict(name = 'div', attrs = {'id' : 'print-layer'})) + remove_tags_after = dict(dict(name = 'div', attrs = {'id' : 'print-layer'})) - '''keep_only_tags =[] - keep_only_tags.append(dict(name = 'table', attrs = {'id' : 'title-table'})) - keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'div-header'})) - keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'div-content'})) - keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'def element-autor'}))''' + '''keep_only_tags =[] + keep_only_tags.append(dict(name = 'table', attrs = {'id' : 'title-table'})) + keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'div-header'})) + keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'div-content'})) + keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'def element-autor'}))''' - preprocess_regexps = [(re.compile(r'style="display: none;"'), lambda match: ''), - (re.compile(r'display: block;'), lambda match: '')] + preprocess_regexps = [(re.compile(r'style="display: none;"'), lambda match: ''), + (re.compile(r'display: block;'), lambda match: ''), + (re.compile(r'\\\<\/table\>'), lambda match: ''), + (re.compile(r'\'), lambda match: ''), + (re.compile(r'\'), lambda match: ''), + (re.compile(r'\'%( _('Ids')+':', links))) + elif field == 'authors' and not isdevice: + authors = [] + formatter = EvalFormatter() + for aut in mi.authors: + if mi.author_link_map[aut]: + link = mi.author_link_map[aut] + elif gprefs.get('default_author_link'): + vals = {'author': aut.replace(' ', '+')} + try: + vals['author_sort'] = mi.author_sort_map[aut].replace(' ', '+') + except: + vals['author_sort'] = aut.replace(' ', '+') + link = formatter.safe_format( + gprefs.get('default_author_link'), vals, '', vals) + if link: + link = prepare_string_for_xml(link) + authors.append(u'%s'%(link, aut)) + else: + authors.append(aut) + ans.append((field, u''%(name, + u' & '.join(authors)))) else: val = mi.format_field(field)[-1] if val is None: diff --git a/src/calibre/gui2/dialogs/edit_authors_dialog.py b/src/calibre/gui2/dialogs/edit_authors_dialog.py index a791551d27..300715c6e0 100644 --- a/src/calibre/gui2/dialogs/edit_authors_dialog.py +++ b/src/calibre/gui2/dialogs/edit_authors_dialog.py @@ -4,10 +4,11 @@ __docformat__ = 'restructuredtext en' __license__ = 'GPL v3' from PyQt4.Qt import (Qt, QDialog, QTableWidgetItem, QAbstractItemView, QIcon, - QDialogButtonBox, QFrame, QLabel, QTimer, QMenu, QApplication) + QDialogButtonBox, QFrame, QLabel, QTimer, QMenu, QApplication, + QByteArray) from calibre.ebooks.metadata import author_to_author_sort -from calibre.gui2 import error_dialog +from calibre.gui2 import error_dialog, gprefs from calibre.gui2.dialogs.edit_authors_dialog_ui import Ui_EditAuthorsDialog from calibre.utils.icu import sort_key @@ -20,7 +21,7 @@ class tableItem(QTableWidgetItem): class EditAuthorsDialog(QDialog, Ui_EditAuthorsDialog): - def __init__(self, parent, db, id_to_select, select_sort): + def __init__(self, parent, db, id_to_select, select_sort, select_link): QDialog.__init__(self, parent) Ui_EditAuthorsDialog.__init__(self) self.setupUi(self) @@ -29,11 +30,19 @@ class EditAuthorsDialog(QDialog, Ui_EditAuthorsDialog): self.setWindowFlags(self.windowFlags()&(~Qt.WindowContextHelpButtonHint)) self.setWindowIcon(icon) + try: + self.table_column_widths = \ + gprefs.get('manage_authors_table_widths', None) + geom = gprefs.get('manage_authors_dialog_geometry', bytearray('')) + self.restoreGeometry(QByteArray(geom)) + except: + pass + self.buttonBox.accepted.connect(self.accepted) # Set up the column headings self.table.setSelectionMode(QAbstractItemView.SingleSelection) - self.table.setColumnCount(2) + self.table.setColumnCount(3) self.down_arrow_icon = QIcon(I('arrow-down.png')) self.up_arrow_icon = QIcon(I('arrow-up.png')) self.blank_icon = QIcon(I('blank.png')) @@ -43,26 +52,35 @@ class EditAuthorsDialog(QDialog, Ui_EditAuthorsDialog): self.aus_col = QTableWidgetItem(_('Author sort')) self.table.setHorizontalHeaderItem(1, self.aus_col) self.aus_col.setIcon(self.up_arrow_icon) + self.aul_col = QTableWidgetItem(_('Link')) + self.table.setHorizontalHeaderItem(2, self.aul_col) + self.aus_col.setIcon(self.blank_icon) # Add the data self.authors = {} auts = db.get_authors_with_ids() self.table.setRowCount(len(auts)) select_item = None - for row, (id, author, sort) in enumerate(auts): + for row, (id, author, sort, link) in enumerate(auts): author = author.replace('|', ',') - self.authors[id] = (author, sort) + self.authors[id] = (author, sort, link) aut = tableItem(author) aut.setData(Qt.UserRole, id) sort = tableItem(sort) + link = tableItem(link) self.table.setItem(row, 0, aut) self.table.setItem(row, 1, sort) + self.table.setItem(row, 2, link) if id == id_to_select: if select_sort: select_item = sort + elif select_link: + select_item = link else: select_item = aut self.table.resizeColumnsToContents() + if self.table.columnWidth(2) < 200: + self.table.setColumnWidth(2, 200) # set up the cellChanged signal only after the table is filled self.table.cellChanged.connect(self.cell_changed) @@ -115,6 +133,28 @@ class EditAuthorsDialog(QDialog, Ui_EditAuthorsDialog): self.table.setContextMenuPolicy(Qt.CustomContextMenu) self.table.customContextMenuRequested .connect(self.show_context_menu) + def save_state(self): + self.table_column_widths = [] + for c in range(0, self.table.columnCount()): + self.table_column_widths.append(self.table.columnWidth(c)) + gprefs['manage_authors_table_widths'] = self.table_column_widths + gprefs['manage_authors_dialog_geometry'] = bytearray(self.saveGeometry()) + + def resizeEvent(self, *args): + QDialog.resizeEvent(self, *args) + if self.table_column_widths is not None: + for c,w in enumerate(self.table_column_widths): + self.table.setColumnWidth(c, w) + else: + # the vertical scroll bar might not be rendered, so might not yet + # have a width. Assume 25. Not a problem because user-changed column + # widths will be remembered + w = self.table.width() - 25 - self.table.verticalHeader().width() + w /= self.table.columnCount() + for c in range(0, self.table.columnCount()): + self.table.setColumnWidth(c, w) + self.save_state() + def show_context_menu(self, point): self.context_item = self.table.itemAt(point) case_menu = QMenu(_('Change Case')) @@ -231,14 +271,16 @@ class EditAuthorsDialog(QDialog, Ui_EditAuthorsDialog): self.auth_col.setIcon(self.blank_icon) def accepted(self): + self.save_state() self.result = [] for row in range(0,self.table.rowCount()): id = self.table.item(row, 0).data(Qt.UserRole).toInt()[0] aut = unicode(self.table.item(row, 0).text()).strip() sort = unicode(self.table.item(row, 1).text()).strip() - orig_aut,orig_sort = self.authors[id] - if orig_aut != aut or orig_sort != sort: - self.result.append((id, orig_aut, aut, sort)) + link = unicode(self.table.item(row, 2).text()).strip() + orig_aut,orig_sort,orig_link = self.authors[id] + if orig_aut != aut or orig_sort != sort or orig_link != link: + self.result.append((id, orig_aut, aut, sort, link)) def do_recalc_author_sort(self): self.table.cellChanged.disconnect() @@ -276,6 +318,6 @@ class EditAuthorsDialog(QDialog, Ui_EditAuthorsDialog): c.setText(author_to_author_sort(aut)) item = c else: - item = self.table.item(row, 1) + item = self.table.item(row, col) self.table.setCurrentItem(item) self.table.scrollToItem(item) diff --git a/src/calibre/gui2/dialogs/metadata_bulk.py b/src/calibre/gui2/dialogs/metadata_bulk.py index 7c7c78629c..22dfb98956 100644 --- a/src/calibre/gui2/dialogs/metadata_bulk.py +++ b/src/calibre/gui2/dialogs/metadata_bulk.py @@ -12,7 +12,7 @@ from PyQt4.Qt import Qt, QDialog, QGridLayout, QVBoxLayout, QFont, QLabel, \ from calibre.gui2.dialogs.metadata_bulk_ui import Ui_MetadataBulkDialog from calibre.gui2.dialogs.tag_editor import TagEditor from calibre.ebooks.metadata import string_to_authors, authors_to_string, title_sort -from calibre.ebooks.metadata.book.base import composite_formatter +from calibre.ebooks.metadata.book.base import SafeFormat from calibre.gui2.custom_column_widgets import populate_metadata_page from calibre.gui2 import error_dialog, ResizableDialog, UNDEFINED_QDATE, \ gprefs, question_dialog @@ -499,7 +499,7 @@ class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog): def s_r_get_field(self, mi, field): if field: if field == '{template}': - v = composite_formatter.safe_format\ + v = SafeFormat().safe_format\ (unicode(self.s_r_template.text()), mi, _('S/R TEMPLATE ERROR'), mi) return [v] fm = self.db.metadata_for_field(field) diff --git a/src/calibre/gui2/dialogs/quickview.py b/src/calibre/gui2/dialogs/quickview.py index 3a69368730..30b68a7b7d 100644 --- a/src/calibre/gui2/dialogs/quickview.py +++ b/src/calibre/gui2/dialogs/quickview.py @@ -18,16 +18,29 @@ class TableItem(QTableWidgetItem): A QTableWidgetItem that sorts on a separate string and uses ICU rules ''' - def __init__(self, val, sort): + def __init__(self, val, sort, idx=0): self.sort = sort + self.sort_idx = idx QTableWidgetItem.__init__(self, val) self.setFlags(Qt.ItemIsEnabled|Qt.ItemIsSelectable) def __ge__(self, other): - return sort_key(self.sort) >= sort_key(other.sort) + l = sort_key(self.sort) + r = sort_key(other.sort) + if l > r: + return 1 + if l == r: + return self.sort_idx >= other.sort_idx + return 0 def __lt__(self, other): - return sort_key(self.sort) < sort_key(other.sort) + l = sort_key(self.sort) + r = sort_key(other.sort) + if l < r: + return 1 + if l == r: + return self.sort_idx < other.sort_idx + return 0 class Quickview(QDialog, Ui_Quickview): @@ -95,6 +108,15 @@ class Quickview(QDialog, Ui_Quickview): self.search_button.clicked.connect(self.do_search) view.model().new_bookdisplay_data.connect(self.book_was_changed) + def set_database(self, db): + self.db = db + self.items.blockSignals(True) + self.books_table.blockSignals(True) + self.items.clear() + self.books_table.setRowCount(0) + self.books_table.blockSignals(False) + self.items.blockSignals(False) + # search button def do_search(self): if self.last_search is not None: @@ -185,7 +207,7 @@ class Quickview(QDialog, Ui_Quickview): series = mi.format_field('series')[1] if series is None: series = '' - a = TableItem(series, series) + a = TableItem(series, mi.series, mi.series_index) a.setToolTip(tt) self.books_table.setItem(row, 2, a) self.books_table.setRowHeight(row, self.books_table_row_height) diff --git a/src/calibre/gui2/dialogs/quickview.ui b/src/calibre/gui2/dialogs/quickview.ui index 2cdc7b7379..4b040e34d3 100644 --- a/src/calibre/gui2/dialogs/quickview.ui +++ b/src/calibre/gui2/dialogs/quickview.ui @@ -57,19 +57,6 @@ - - - - Qt::Vertical - - - - 0 - 0 - - - - diff --git a/src/calibre/gui2/dialogs/restore_library.py b/src/calibre/gui2/dialogs/restore_library.py index a57d6c86c1..60b224d1cd 100644 --- a/src/calibre/gui2/dialogs/restore_library.py +++ b/src/calibre/gui2/dialogs/restore_library.py @@ -54,7 +54,7 @@ class DBRestore(QDialog): def reject(self): self.rejected = True self.restorer.progress_callback = lambda x, y: x - QDialog.rejecet(self) + QDialog.reject(self) def update(self): if self.restorer.is_alive(): diff --git a/src/calibre/gui2/dialogs/template_dialog.py b/src/calibre/gui2/dialogs/template_dialog.py index f78e7a7383..7d30f37bc1 100644 --- a/src/calibre/gui2/dialogs/template_dialog.py +++ b/src/calibre/gui2/dialogs/template_dialog.py @@ -11,7 +11,7 @@ from PyQt4.Qt import (Qt, QDialog, QDialogButtonBox, QSyntaxHighlighter, QFont, from calibre.gui2 import error_dialog from calibre.gui2.dialogs.template_dialog_ui import Ui_TemplateDialog from calibre.utils.formatter_functions import formatter_functions -from calibre.ebooks.metadata.book.base import composite_formatter, Metadata +from calibre.ebooks.metadata.book.base import SafeFormat, Metadata from calibre.library.coloring import (displayable_columns) @@ -270,7 +270,7 @@ class TemplateDialog(QDialog, Ui_TemplateDialog): self.highlighter.regenerate_paren_positions() self.text_cursor_changed() self.template_value.setText( - composite_formatter.safe_format(cur_text, self.mi, + SafeFormat().safe_format(cur_text, self.mi, _('EXCEPTION: '), self.mi)) def text_cursor_changed(self): diff --git a/src/calibre/gui2/library/models.py b/src/calibre/gui2/library/models.py index 40d6e2b6cf..8cbc2e1979 100644 --- a/src/calibre/gui2/library/models.py +++ b/src/calibre/gui2/library/models.py @@ -14,7 +14,7 @@ from PyQt4.Qt import (QAbstractTableModel, Qt, pyqtSignal, QIcon, QImage, from calibre.gui2 import NONE, UNDEFINED_QDATE from calibre.utils.pyparsing import ParseException from calibre.ebooks.metadata import fmt_sidx, authors_to_string, string_to_authors -from calibre.ebooks.metadata.book.base import composite_formatter +from calibre.ebooks.metadata.book.base import SafeFormat from calibre.ptempfile import PersistentTemporaryFile from calibre.utils.config import tweaks, prefs from calibre.utils.date import dt_factory, qt_to_dt @@ -91,6 +91,7 @@ class BooksModel(QAbstractTableModel): # {{{ self.current_highlighted_idx = None self.highlight_only = False self.colors = frozenset([unicode(c) for c in QColor.colorNames()]) + self.formatter = SafeFormat() self.read_config() def change_alignment(self, colname, alignment): @@ -711,7 +712,7 @@ class BooksModel(QAbstractTableModel): # {{{ try: if mi is None: mi = self.db.get_metadata(id_, index_is_id=True) - color = composite_formatter.safe_format(fmt, mi, '', mi) + color = self.formatter.safe_format(fmt, mi, '', mi) if color in self.colors: color = QColor(color) if color.isValid(): diff --git a/src/calibre/gui2/metadata/basic_widgets.py b/src/calibre/gui2/metadata/basic_widgets.py index 2d6c79d0e3..227a2257bc 100644 --- a/src/calibre/gui2/metadata/basic_widgets.py +++ b/src/calibre/gui2/metadata/basic_widgets.py @@ -1092,11 +1092,12 @@ class IdentifiersEdit(QLineEdit): # {{{ for x in parts: c = x.split(':') if len(c) > 1: - if c[0] == 'isbn': + itype = c[0].lower() + if itype == 'isbn': v = check_isbn(c[1]) if v is not None: c[1] = v - ans[c[0]] = c[1] + ans[itype] = c[1] return ans def fset(self, val): if not val: @@ -1112,7 +1113,7 @@ class IdentifiersEdit(QLineEdit): # {{{ if v is not None: val[k] = v ids = sorted(val.iteritems(), key=keygen) - txt = ', '.join(['%s:%s'%(k, v) for k, v in ids]) + txt = ', '.join(['%s:%s'%(k.lower(), v) for k, v in ids]) self.setText(txt.strip()) self.setCursorPosition(0) return property(fget=fget, fset=fset) diff --git a/src/calibre/gui2/preferences/look_feel.py b/src/calibre/gui2/preferences/look_feel.py index a2850679f1..a0f4953c9a 100644 --- a/src/calibre/gui2/preferences/look_feel.py +++ b/src/calibre/gui2/preferences/look_feel.py @@ -138,6 +138,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form): (_('Partitioned'), 'partition')] r('tags_browser_partition_method', gprefs, choices=choices) r('tags_browser_collapse_at', gprefs) + r('default_author_link', gprefs) choices = set([k for k in db.field_metadata.all_field_keys() if db.field_metadata[k]['is_category'] and diff --git a/src/calibre/gui2/preferences/look_feel.ui b/src/calibre/gui2/preferences/look_feel.ui index cc9133a36f..07d533fdef 100644 --- a/src/calibre/gui2/preferences/look_feel.ui +++ b/src/calibre/gui2/preferences/look_feel.ui @@ -192,7 +192,7 @@ Book Details - + Select displayed metadata @@ -243,6 +243,31 @@ + + + + + + Default author link template: + + + opt_default_author_link + + + + + + + <p>Enter a template to be used to create a link for +an author in the books information dialog. This template will +be used when no link has been provided for the author using +Manage Authors. You can use the values {author} and +{author_sort}, and any template function. + + + + + diff --git a/src/calibre/gui2/preferences/main.py b/src/calibre/gui2/preferences/main.py index 85a5fc018c..774b7f8958 100644 --- a/src/calibre/gui2/preferences/main.py +++ b/src/calibre/gui2/preferences/main.py @@ -357,7 +357,6 @@ class Preferences(QMainWindow): bytearray(self.saveGeometry())) if self.committed: self.gui.must_restart_before_config = self.must_restart - self.gui.tags_view.set_new_model() # in case columns changed self.gui.tags_view.recount() self.gui.create_device_menu() self.gui.set_device_menu_items_state(bool(self.gui.device_connected)) diff --git a/src/calibre/gui2/preferences/search.py b/src/calibre/gui2/preferences/search.py index 7bdb12ec55..c86de7f2a3 100644 --- a/src/calibre/gui2/preferences/search.py +++ b/src/calibre/gui2/preferences/search.py @@ -173,7 +173,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form): def refresh_gui(self, gui): gui.set_highlight_only_button_icon() if self.muc_changed: - gui.tags_view.set_new_model() + gui.tags_view.recount() gui.search.search_as_you_type(config['search_as_you_type']) gui.search.do_search() diff --git a/src/calibre/gui2/store/stores/archive_org_plugin.py b/src/calibre/gui2/store/stores/archive_org_plugin.py index 691f819e8c..b2ea4e04f1 100644 --- a/src/calibre/gui2/store/stores/archive_org_plugin.py +++ b/src/calibre/gui2/store/stores/archive_org_plugin.py @@ -6,11 +6,7 @@ __license__ = 'GPL 3' __copyright__ = '2011, John Schember ' __docformat__ = 'restructuredtext en' -from contextlib import closing -from lxml import html - -from calibre import browser from calibre.gui2.store.basic_config import BasicStoreConfig from calibre.gui2.store.opensearch_store import OpenSearchStore from calibre.gui2.store.search_result import SearchResult @@ -19,9 +15,9 @@ class ArchiveOrgStore(BasicStoreConfig, OpenSearchStore): open_search_url = 'http://bookserver.archive.org/catalog/opensearch.xml' web_url = 'http://www.archive.org/details/texts' - + # http://bookserver.archive.org/catalog/ - + def search(self, query, max_results=10, timeout=60): for s in OpenSearchStore.search(self, query, max_results, timeout): s.detail_item = 'http://www.archive.org/details/' + s.detail_item.split(':')[-1] @@ -39,5 +35,5 @@ class ArchiveOrgStore(BasicStoreConfig, OpenSearchStore): idata = html.fromstring(nf.read()) formats = ', '.join(idata.xpath('//p[@id="dl" and @class="content"]//a/text()')) search_result.formats = formats.upper() - + return True diff --git a/src/calibre/gui2/store/stores/manybooks_plugin.py b/src/calibre/gui2/store/stores/manybooks_plugin.py index efd8d21e68..829a97012f 100644 --- a/src/calibre/gui2/store/stores/manybooks_plugin.py +++ b/src/calibre/gui2/store/stores/manybooks_plugin.py @@ -20,7 +20,7 @@ from calibre.gui2.store import StorePlugin from calibre.gui2.store.basic_config import BasicStoreConfig from calibre.gui2.store.search_result import SearchResult from calibre.gui2.store.web_store_dialog import WebStoreDialog - + class ManyBooksStore(BasicStoreConfig, StorePlugin): def open(self, parent=None, detail_item=None, external=False): @@ -29,7 +29,7 @@ class ManyBooksStore(BasicStoreConfig, StorePlugin): detail_url = None if detail_item: detail_url = url + detail_item - + if external or self.config.get('open_external', False): open_url(QUrl(url_slash_cleaner(detail_url if detail_url else url))) else: @@ -44,16 +44,16 @@ class ManyBooksStore(BasicStoreConfig, StorePlugin): # secondary titles. Google is also faster. # Using a google search so we can search on both fields at once. url = 'http://www.google.com/xhtml?q=site:manybooks.net+' + urllib.quote_plus(query) - + br = browser() - + counter = max_results with closing(br.open(url, timeout=timeout)) as f: doc = html.fromstring(f.read()) for data in doc.xpath('//div[@class="edewpi"]//div[@class="r ld"]'): if counter <= 0: break - + url = '' url_a = data.xpath('div[@class="jd"]/a') if url_a: @@ -65,13 +65,13 @@ class ManyBooksStore(BasicStoreConfig, StorePlugin): continue id = url.split('/')[-1] id = id.strip() - + url_a = html.fromstring(html.tostring(url_a)) heading = ''.join(url_a.xpath('//text()')) title, _, author = heading.rpartition('by ') author = author.split('-')[0] price = '$0.00' - + cover_url = '' mo = re.match('^\D+', id) if mo: @@ -79,10 +79,9 @@ class ManyBooksStore(BasicStoreConfig, StorePlugin): cover_name = cover_name.replace('etext', '') cover_id = id.split('.')[0] cover_url = 'http://www.manybooks.net/images/' + id[0] + '/' + cover_name + '/' + cover_id + '-thumb.jpg' - print(cover_url) counter -= 1 - + s = SearchResult() s.cover_url = cover_url s.title = title.strip() @@ -91,5 +90,5 @@ class ManyBooksStore(BasicStoreConfig, StorePlugin): s.detail_item = '/titles/' + id s.drm = SearchResult.DRM_UNLOCKED s.formts = 'EPUB, PDB (eReader, PalmDoc, zTXT, Plucker, iSilo), FB2, ZIP, AZW, MOBI, PRC, LIT, PKG, PDF, TXT, RB, RTF, LRF, TCR, JAR' - + yield s diff --git a/src/calibre/gui2/tag_browser/model.py b/src/calibre/gui2/tag_browser/model.py index 4022db4fd8..e759783d7b 100644 --- a/src/calibre/gui2/tag_browser/model.py +++ b/src/calibre/gui2/tag_browser/model.py @@ -2,16 +2,17 @@ # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai from __future__ import (unicode_literals, division, absolute_import, print_function) +from future_builtins import map __license__ = 'GPL v3' __copyright__ = '2011, Kovid Goyal ' __docformat__ = 'restructuredtext en' import traceback, cPickle, copy -from itertools import repeat, izip +from itertools import repeat from PyQt4.Qt import (QAbstractItemModel, QIcon, QVariant, QFont, Qt, - QMimeData, QModelIndex, QTreeView) + QMimeData, QModelIndex, pyqtSignal) from calibre.gui2 import NONE, gprefs, config, error_dialog from calibre.library.database2 import Tag @@ -19,12 +20,21 @@ from calibre.utils.config import tweaks from calibre.utils.icu import sort_key, lower, strcmp from calibre.library.field_metadata import TagsIcons, category_icon_map from calibre.gui2.dialogs.confirm_delete import confirm -from calibre.utils.formatter import eval_formatter +from calibre.utils.formatter import EvalFormatter from calibre.utils.search_query_parser import saved_searches TAG_SEARCH_STATES = {'clear': 0, 'mark_plus': 1, 'mark_plusplus': 2, 'mark_minus': 3, 'mark_minusminus': 4} +_bf = None +def bf(): + global _bf + if _bf is None: + _bf = QFont() + _bf.setBold(True) + _bf = QVariant(_bf) + return _bf + class TagTreeItem(object): # {{{ CATEGORY = 0 @@ -41,16 +51,15 @@ class TagTreeItem(object): # {{{ self.icon_state_map = list(map(QVariant, icon_map)) if self.parent is not None: self.parent.append(self) + if data is None: self.type = self.ROOT else: self.type = self.TAG if category_icon is None else self.CATEGORY + if self.type == self.CATEGORY: self.name, self.icon = map(QVariant, (data, category_icon)) self.py_name = data - self.bold_font = QFont() - self.bold_font.setBold(True) - self.bold_font = QVariant(self.bold_font) self.category_key = category_key self.temporary = temporary self.tag = Tag(data, category=category_key, @@ -60,27 +69,21 @@ class TagTreeItem(object): # {{{ elif self.type == self.TAG: self.icon_state_map[0] = QVariant(data.icon) self.tag = data - if tooltip: - self.tooltip = tooltip + ' ' - else: - self.tooltip = '' + + self.tooltip = (tooltip + ' ') if tooltip else '' def break_cycles(self): - for x in self.children: - try: - x.break_cycles() - except: - pass - self.parent = self.icon_state_map = self.bold_font = self.tag = \ - self.icon = self.children = self.tooltip = \ - self.py_name = self.id_set = self.category_key = None + del self.parent + del self.children def __str__(self): if self.type == self.ROOT: return 'ROOT' if self.type == self.CATEGORY: - return 'CATEGORY:'+str(QVariant.toString(self.name))+':%d'%len(self.children) - return 'TAG:'+self.tag.name + return 'CATEGORY:'+str(QVariant.toString( + self.name))+':%d'%len(getattr(self, + 'children', [])) + return 'TAG: %s'%self.tag.name def row(self): if self.parent is not None: @@ -110,7 +113,7 @@ class TagTreeItem(object): # {{{ return self.icon_state_map[self.tag.state] return self.icon if role == Qt.FontRole: - return self.bold_font + return bf() if role == Qt.ToolTipRole and self.tooltip is not None: return QVariant(self.tooltip) return NONE @@ -195,41 +198,92 @@ class TagTreeItem(object): # {{{ class TagsModel(QAbstractItemModel): # {{{ - def __init__(self, db, parent, hidden_categories=None, - search_restriction=None, drag_drop_finished=None, - filter_categories_by=None, collapse_model='disable', - state_map={}): + search_item_renamed = pyqtSignal() + tag_item_renamed = pyqtSignal() + refresh_required = pyqtSignal() + restriction_error = pyqtSignal() + drag_drop_finished = pyqtSignal(object) + user_categories_edited = pyqtSignal(object, object) + + def __init__(self, parent): QAbstractItemModel.__init__(self, parent) self.node_map = {} - - # must do this here because 'QPixmap: Must construct a QApplication - # before a QPaintDevice'. The ':' at the end avoids polluting either of - # the other namespaces (alpha, '#', or '@') + self.category_nodes = [] iconmap = {} for key in category_icon_map: iconmap[key] = QIcon(I(category_icon_map[key])) self.category_icon_map = TagsIcons(iconmap) - self.categories_with_ratings = ['authors', 'series', 'publisher', 'tags'] - self.drag_drop_finished = drag_drop_finished - self.icon_state_map = [None, QIcon(I('plus.png')), QIcon(I('plusplus.png')), - QIcon(I('minus.png')), QIcon(I('minusminus.png'))] - self.db = db - self.tags_view = parent - self.hidden_categories = hidden_categories - self.search_restriction = search_restriction - self.row_map = [] - self.filter_categories_by = filter_categories_by - self.collapse_model = collapse_model + QIcon(I('minus.png')), QIcon(I('minusminus.png'))] + self.hidden_categories = set() + self.search_restriction = None + self.filter_categories_by = None + self.collapse_model = 'disable' + self.row_map = [] + self.root_item = self.create_node(icon_map=self.icon_state_map) + self.db = None + self._build_in_progress = False + self.reread_collapse_model({}, rebuild=False) + + def reread_collapse_model(self, state_map, rebuild=True): + if gprefs['tags_browser_collapse_at'] == 0: + self.collapse_model = 'disable' + else: + self.collapse_model = gprefs['tags_browser_partition_method'] + if rebuild: + self.rebuild_node_tree(state_map) + + def set_search_restriction(self, s): + self.search_restriction = s + self.rebuild_node_tree() + + def set_database(self, db): + self.beginResetModel() + self.search_restriction = None + hidden_cats = db.prefs.get('tag_browser_hidden_categories', None) + # migrate from config to db prefs + if hidden_cats is None: + hidden_cats = config['tag_browser_hidden_categories'] + self.hidden_categories = set() + # strip out any non-existence field keys + for cat in hidden_cats: + if cat in db.field_metadata: + self.hidden_categories.add(cat) + db.prefs.set('tag_browser_hidden_categories', list(self.hidden_categories)) + + self.db = db + self._run_rebuild() + self.endResetModel() + + def rebuild_node_tree(self, state_map={}): + if self._build_in_progress: + print ('Tag Browser build already in progress') + traceback.print_stack() + return + #traceback.print_stack() + #print () + self._build_in_progress = True + self.beginResetModel() + self._run_rebuild(state_map=state_map) + self.endResetModel() + self._build_in_progress = False + + def _run_rebuild(self, state_map={}): + for node in self.node_map.itervalues(): + node.break_cycles() + del node #Clear reference to node in the current frame + self.node_map.clear() + self.category_nodes = [] + self.root_item = self.create_node(icon_map=self.icon_state_map) + self._rebuild_node_tree(state_map=state_map) + + def _rebuild_node_tree(self, state_map): # Note that _get_category_nodes can indirectly change the # user_categories dict. - data = self._get_category_nodes(config['sort_tags_by']) - gst = db.prefs.get('grouped_search_terms', {}) - self.root_item = self.create_node(icon_map=self.icon_state_map) - self.category_nodes = [] + gst = self.db.prefs.get('grouped_search_terms', {}) last_category_node = None category_node_map = {} @@ -293,375 +347,11 @@ class TagsModel(QAbstractItemModel): # {{{ self.category_nodes.append(node) self._create_node_tree(data, state_map) - def break_cycles(self): - self.root_item.break_cycles() - self.db = self.root_item = None - self.node_map = {} - #traceback.print_stack() - #print - - # Drag'n Drop {{{ - def mimeTypes(self): - return ["application/calibre+from_library", - 'application/calibre+from_tag_browser'] - - def mimeData(self, indexes): - data = [] - for idx in indexes: - if idx.isValid(): - # get some useful serializable data - node = self.get_node(idx) - path = self.path_for_index(idx) - if node.type == TagTreeItem.CATEGORY: - d = (node.type, node.py_name, node.category_key) - else: - t = node.tag - p = node - while p.type != TagTreeItem.CATEGORY: - p = p.parent - d = (node.type, p.category_key, p.is_gst, t.original_name, - t.category, path) - data.append(d) - else: - data.append(None) - raw = bytearray(cPickle.dumps(data, -1)) - ans = QMimeData() - ans.setData('application/calibre+from_tag_browser', raw) - return ans - - def dropMimeData(self, md, action, row, column, parent): - fmts = set([unicode(x) for x in md.formats()]) - if not fmts.intersection(set(self.mimeTypes())): - return False - if "application/calibre+from_library" in fmts: - if action != Qt.CopyAction: - return False - return self.do_drop_from_library(md, action, row, column, parent) - elif 'application/calibre+from_tag_browser' in fmts: - return self.do_drop_from_tag_browser(md, action, row, column, parent) - - def do_drop_from_tag_browser(self, md, action, row, column, parent): - if not parent.isValid(): - return False - dest = self.get_node(parent) - if dest.type != TagTreeItem.CATEGORY: - return False - if not md.hasFormat('application/calibre+from_tag_browser'): - return False - data = str(md.data('application/calibre+from_tag_browser')) - src = cPickle.loads(data) - for s in src: - if s[0] != TagTreeItem.TAG: - return False - return self.move_or_copy_item_to_user_category(src, dest, action) - - def move_or_copy_item_to_user_category(self, src, dest, action): - ''' - src is a list of tuples representing items to copy. The tuple is - (type, containing category key, category key is global search term, - full name, category key, path to node) - The type must be TagTreeItem.TAG - dest is the TagTreeItem node to receive the items - action is Qt.CopyAction or Qt.MoveAction - ''' - def process_source_node(user_cats, src_parent, src_parent_is_gst, - is_uc, dest_key, node): - ''' - Copy/move an item and all its children to the destination - ''' - copied = False - src_name = node.tag.original_name - src_cat = node.tag.category - # delete the item if the source is a user category and action is move - if is_uc and not src_parent_is_gst and src_parent in user_cats and \ - action == Qt.MoveAction: - new_cat = [] - for tup in user_cats[src_parent]: - if src_name == tup[0] and src_cat == tup[1]: - continue - new_cat.append(list(tup)) - user_cats[src_parent] = new_cat - else: - copied = True - - # Now add the item to the destination user category - add_it = True - if not is_uc and src_cat == 'news': - src_cat = 'tags' - for tup in user_cats[dest_key]: - if src_name == tup[0] and src_cat == tup[1]: - add_it = False - if add_it: - user_cats[dest_key].append([src_name, src_cat, 0]) - - for c in node.children: - copied = process_source_node(user_cats, src_parent, src_parent_is_gst, - is_uc, dest_key, c) - return copied - - user_cats = self.db.prefs.get('user_categories', {}) - parent_node = None - copied = False - path = None - for s in src: - src_parent, src_parent_is_gst = s[1:3] - path = s[5] - parent_node = src_parent - - if src_parent.startswith('@'): - is_uc = True - src_parent = src_parent[1:] - else: - is_uc = False - dest_key = dest.category_key[1:] - - if dest_key not in user_cats: - continue - - node = self.index_for_path(path) - if node: - copied = process_source_node(user_cats, src_parent, src_parent_is_gst, - is_uc, dest_key, - self.get_node(node)) - - self.db.prefs.set('user_categories', user_cats) - self.tags_view.recount() - - # Scroll to the item copied. If it was moved, scroll to the parent - if parent_node is not None: - self.clear_boxed() - m = self.tags_view.model() - if not copied: - p = path[-1] - if p == 0: - path = m.find_category_node(parent_node) - else: - path[-1] = p - 1 - idx = m.index_for_path(path) - self.tags_view.setExpanded(idx, True) - if self.get_node(idx).type == TagTreeItem.TAG: - m.show_item_at_index(idx, box=True) - else: - m.show_item_at_index(idx) - return True - - def do_drop_from_library(self, md, action, row, column, parent): - idx = parent - if idx.isValid(): - self.tags_view.setCurrentIndex(idx) - node = self.data(idx, Qt.UserRole) - if node.type == TagTreeItem.TAG: - fm = self.db.metadata_for_field(node.tag.category) - if node.tag.category in \ - ('tags', 'series', 'authors', 'rating', 'publisher') or \ - (fm['is_custom'] and ( - fm['datatype'] in ['text', 'rating', 'series', - 'enumeration'] or - (fm['datatype'] == 'composite' and - fm['display'].get('make_category', False)))): - mime = 'application/calibre+from_library' - ids = list(map(int, str(md.data(mime)).split())) - self.handle_drop(node, ids) - return True - elif node.type == TagTreeItem.CATEGORY: - fm_dest = self.db.metadata_for_field(node.category_key) - if fm_dest['kind'] == 'user': - fm_src = self.db.metadata_for_field(md.column_name) - if md.column_name in ['authors', 'publisher', 'series'] or \ - (fm_src['is_custom'] and ( - (fm_src['datatype'] in ['series', 'text', 'enumeration'] and - not fm_src['is_multiple']))or - (fm_src['datatype'] == 'composite' and - fm_src['display'].get('make_category', False))): - mime = 'application/calibre+from_library' - ids = list(map(int, str(md.data(mime)).split())) - self.handle_user_category_drop(node, ids, md.column_name) - return True - return False - - def handle_user_category_drop(self, on_node, ids, column): - categories = self.db.prefs.get('user_categories', {}) - category = categories.get(on_node.category_key[1:], None) - if category is None: - return - fm_src = self.db.metadata_for_field(column) - for id in ids: - label = fm_src['label'] - if not fm_src['is_custom']: - if label == 'authors': - items = self.db.get_authors_with_ids() - items = [(i[0], i[1].replace('|', ',')) for i in items] - value = self.db.authors(id, index_is_id=True) - value = [v.replace('|', ',') for v in value.split(',')] - elif label == 'publisher': - items = self.db.get_publishers_with_ids() - value = self.db.publisher(id, index_is_id=True) - elif label == 'series': - items = self.db.get_series_with_ids() - value = self.db.series(id, index_is_id=True) - else: - items = self.db.get_custom_items_with_ids(label=label) - if fm_src['datatype'] != 'composite': - value = self.db.get_custom(id, label=label, index_is_id=True) - else: - value = self.db.get_property(id, loc=fm_src['rec_index'], - index_is_id=True) - if value is None: - return - if not isinstance(value, list): - value = [value] - for val in value: - for (v, c, id) in category: - if v == val and c == column: - break - else: - category.append([val, column, 0]) - categories[on_node.category_key[1:]] = category - self.db.prefs.set('user_categories', categories) - self.tags_view.recount() - - def handle_drop(self, on_node, ids): - #print 'Dropped ids:', ids, on_node.tag - key = on_node.tag.category - if (key == 'authors' and len(ids) >= 5): - if not confirm('

'+_('Changing the authors for several books can ' - 'take a while. Are you sure?') - +'

', 'tag_browser_drop_authors', self.tags_view): - return - elif len(ids) > 15: - if not confirm('

'+_('Changing the metadata for that many books ' - 'can take a while. Are you sure?') - +'

', 'tag_browser_many_changes', self.tags_view): - return - - fm = self.db.metadata_for_field(key) - is_multiple = fm['is_multiple'] - val = on_node.tag.original_name - for id in ids: - mi = self.db.get_metadata(id, index_is_id=True) - - # Prepare to ignore the author, unless it is changed. Title is - # always ignored -- see the call to set_metadata - set_authors = False - - # Author_sort cannot change explicitly. Changing the author might - # change it. - mi.author_sort = None # Never will change by itself. - - if key == 'authors': - mi.authors = [val] - set_authors=True - elif fm['datatype'] == 'rating': - mi.set(key, len(val) * 2) - elif fm['is_custom'] and fm['datatype'] == 'series': - mi.set(key, val, extra=1.0) - elif is_multiple: - new_val = mi.get(key, []) - if val in new_val: - # Fortunately, only one field can change, so the continue - # won't break anything - continue - new_val.append(val) - mi.set(key, new_val) - else: - mi.set(key, val) - self.db.set_metadata(id, mi, set_title=False, - set_authors=set_authors, commit=False) - self.db.commit() - self.drag_drop_finished.emit(ids) - # }}} - - def set_search_restriction(self, s): - self.search_restriction = s - - def _get_category_nodes(self, sort): - ''' - Called by __init__. Do not directly call this method. - ''' - self.row_map = [] - self.categories = {} - - # Get the categories - if self.search_restriction: - try: - data = self.db.get_categories(sort=sort, - icon_map=self.category_icon_map, - ids=self.db.search('', return_matches=True)) - except: - data = self.db.get_categories(sort=sort, icon_map=self.category_icon_map) - self.tags_view.restriction_error.emit() - else: - data = self.db.get_categories(sort=sort, icon_map=self.category_icon_map) - - # Reconstruct the user categories, putting them into metadata - self.db.field_metadata.remove_dynamic_categories() - tb_cats = self.db.field_metadata - for user_cat in sorted(self.db.prefs.get('user_categories', {}).keys(), - key=sort_key): - cat_name = '@' + user_cat # add the '@' to avoid name collision - while True: - try: - tb_cats.add_user_category(label=cat_name, name=user_cat) - dot = cat_name.rfind('.') - if dot < 0: - break - cat_name = cat_name[:dot] - except ValueError: - break - - for cat in sorted(self.db.prefs.get('grouped_search_terms', {}).keys(), - key=sort_key): - if (u'@' + cat) in data: - try: - tb_cats.add_user_category(label=u'@' + cat, name=cat) - except ValueError: - traceback.print_exc() - self.db.data.change_search_locations(self.db.field_metadata.get_search_terms()) - - if len(saved_searches().names()): - tb_cats.add_search_category(label='search', name=_('Searches')) - - if self.filter_categories_by: - for category in data.keys(): - data[category] = [t for t in data[category] - if lower(t.name).find(self.filter_categories_by) >= 0] - - tb_categories = self.db.field_metadata - for category in tb_categories: - if category in data: # The search category can come and go - self.row_map.append(category) - self.categories[category] = tb_categories[category]['name'] - return data - - def refresh(self, data=None): - ''' - Here to trap usages of refresh in the old architecture. Can eventually - be removed. - ''' - print ('TagsModel: refresh called!') - traceback.print_stack() - return False - - def create_node(self, *args, **kwargs): - node = TagTreeItem(*args, **kwargs) - self.node_map[id(node)] = node - return node - - def get_node(self, idx): - ans = self.node_map.get(idx.internalId(), self.root_item) - return ans - - def createIndex(self, row, column, internal_pointer=None): - idx = QAbstractItemModel.createIndex(self, row, column, - id(internal_pointer)) - return idx - def _create_node_tree(self, data, state_map): - ''' - Called by __init__. Do not directly call this method. - ''' sort_by = config['sort_tags_by'] + eval_formatter = EvalFormatter() + if data is None: print ('_create_node_tree: no data!') traceback.print_stack() @@ -824,22 +514,344 @@ class TagsModel(QAbstractItemModel): # {{{ # }}} for category in self.category_nodes: - process_one_node(category, state_map.get(category.py_name, {})) + process_one_node(category, state_map.get(category.category_key, {})) - def get_state(self): - state_map = {} - expanded_categories = [] - for row, category in enumerate(self.category_nodes): - if self.tags_view.isExpanded(self.index(row, 0, QModelIndex())): - expanded_categories.append(category.py_name) - states = [c.tag.state for c in category.child_tags()] - names = [(c.tag.name, c.tag.category) for c in category.child_tags()] - state_map[category.py_name] = dict(izip(names, states)) - return expanded_categories, state_map + # Drag'n Drop {{{ + def mimeTypes(self): + return ["application/calibre+from_library", + 'application/calibre+from_tag_browser'] + + def mimeData(self, indexes): + data = [] + for idx in indexes: + if idx.isValid(): + # get some useful serializable data + node = self.get_node(idx) + path = self.path_for_index(idx) + if node.type == TagTreeItem.CATEGORY: + d = (node.type, node.py_name, node.category_key) + else: + t = node.tag + p = node + while p.type != TagTreeItem.CATEGORY: + p = p.parent + d = (node.type, p.category_key, p.is_gst, t.original_name, + t.category, path) + data.append(d) + else: + data.append(None) + raw = bytearray(cPickle.dumps(data, -1)) + ans = QMimeData() + ans.setData('application/calibre+from_tag_browser', raw) + return ans + + def dropMimeData(self, md, action, row, column, parent): + fmts = set([unicode(x) for x in md.formats()]) + if not fmts.intersection(set(self.mimeTypes())): + return False + if "application/calibre+from_library" in fmts: + if action != Qt.CopyAction: + return False + return self.do_drop_from_library(md, action, row, column, parent) + elif 'application/calibre+from_tag_browser' in fmts: + return self.do_drop_from_tag_browser(md, action, row, column, parent) + + def do_drop_from_tag_browser(self, md, action, row, column, parent): + if not parent.isValid(): + return False + dest = self.get_node(parent) + if dest.type != TagTreeItem.CATEGORY: + return False + if not md.hasFormat('application/calibre+from_tag_browser'): + return False + data = str(md.data('application/calibre+from_tag_browser')) + src = cPickle.loads(data) + for s in src: + if s[0] != TagTreeItem.TAG: + return False + return self.move_or_copy_item_to_user_category(src, dest, action) + + def move_or_copy_item_to_user_category(self, src, dest, action): + ''' + src is a list of tuples representing items to copy. The tuple is + (type, containing category key, category key is global search term, + full name, category key, path to node) + The type must be TagTreeItem.TAG + dest is the TagTreeItem node to receive the items + action is Qt.CopyAction or Qt.MoveAction + ''' + def process_source_node(user_cats, src_parent, src_parent_is_gst, + is_uc, dest_key, node): + ''' + Copy/move an item and all its children to the destination + ''' + copied = False + src_name = node.tag.original_name + src_cat = node.tag.category + # delete the item if the source is a user category and action is move + if is_uc and not src_parent_is_gst and src_parent in user_cats and \ + action == Qt.MoveAction: + new_cat = [] + for tup in user_cats[src_parent]: + if src_name == tup[0] and src_cat == tup[1]: + continue + new_cat.append(list(tup)) + user_cats[src_parent] = new_cat + else: + copied = True + + # Now add the item to the destination user category + add_it = True + if not is_uc and src_cat == 'news': + src_cat = 'tags' + for tup in user_cats[dest_key]: + if src_name == tup[0] and src_cat == tup[1]: + add_it = False + if add_it: + user_cats[dest_key].append([src_name, src_cat, 0]) + + for c in node.children: + copied = process_source_node(user_cats, src_parent, src_parent_is_gst, + is_uc, dest_key, c) + return copied + + user_cats = self.db.prefs.get('user_categories', {}) + path = None + for s in src: + src_parent, src_parent_is_gst = s[1:3] + path = s[5] + + if src_parent.startswith('@'): + is_uc = True + src_parent = src_parent[1:] + else: + is_uc = False + dest_key = dest.category_key[1:] + + if dest_key not in user_cats: + continue + + node = self.index_for_path(path) + if node: + process_source_node(user_cats, src_parent, src_parent_is_gst, + is_uc, dest_key, + self.get_node(node)) + + self.db.prefs.set('user_categories', user_cats) + self.refresh_required.emit() + + return True + + def do_drop_from_library(self, md, action, row, column, parent): + idx = parent + if idx.isValid(): + node = self.data(idx, Qt.UserRole) + if node.type == TagTreeItem.TAG: + fm = self.db.metadata_for_field(node.tag.category) + if node.tag.category in \ + ('tags', 'series', 'authors', 'rating', 'publisher') or \ + (fm['is_custom'] and ( + fm['datatype'] in ['text', 'rating', 'series', + 'enumeration'] or + (fm['datatype'] == 'composite' and + fm['display'].get('make_category', False)))): + mime = 'application/calibre+from_library' + ids = list(map(int, str(md.data(mime)).split())) + self.handle_drop(node, ids) + return True + elif node.type == TagTreeItem.CATEGORY: + fm_dest = self.db.metadata_for_field(node.category_key) + if fm_dest['kind'] == 'user': + fm_src = self.db.metadata_for_field(md.column_name) + if md.column_name in ['authors', 'publisher', 'series'] or \ + (fm_src['is_custom'] and ( + (fm_src['datatype'] in ['series', 'text', 'enumeration'] and + not fm_src['is_multiple']))or + (fm_src['datatype'] == 'composite' and + fm_src['display'].get('make_category', False))): + mime = 'application/calibre+from_library' + ids = list(map(int, str(md.data(mime)).split())) + self.handle_user_category_drop(node, ids, md.column_name) + return True + return False + + def handle_user_category_drop(self, on_node, ids, column): + categories = self.db.prefs.get('user_categories', {}) + category = categories.get(on_node.category_key[1:], None) + if category is None: + return + fm_src = self.db.metadata_for_field(column) + for id in ids: + label = fm_src['label'] + if not fm_src['is_custom']: + if label == 'authors': + items = self.db.get_authors_with_ids() + items = [(i[0], i[1].replace('|', ',')) for i in items] + value = self.db.authors(id, index_is_id=True) + value = [v.replace('|', ',') for v in value.split(',')] + elif label == 'publisher': + items = self.db.get_publishers_with_ids() + value = self.db.publisher(id, index_is_id=True) + elif label == 'series': + items = self.db.get_series_with_ids() + value = self.db.series(id, index_is_id=True) + else: + items = self.db.get_custom_items_with_ids(label=label) + if fm_src['datatype'] != 'composite': + value = self.db.get_custom(id, label=label, index_is_id=True) + else: + value = self.db.get_property(id, loc=fm_src['rec_index'], + index_is_id=True) + if value is None: + return + if not isinstance(value, list): + value = [value] + for val in value: + for (v, c, id) in category: + if v == val and c == column: + break + else: + category.append([val, column, 0]) + categories[on_node.category_key[1:]] = category + self.db.prefs.set('user_categories', categories) + self.refresh_required.emit() + + def handle_drop(self, on_node, ids): + #print 'Dropped ids:', ids, on_node.tag + key = on_node.tag.category + if (key == 'authors' and len(ids) >= 5): + if not confirm('

'+_('Changing the authors for several books can ' + 'take a while. Are you sure?') + +'

', 'tag_browser_drop_authors', self.parent()): + return + elif len(ids) > 15: + if not confirm('

'+_('Changing the metadata for that many books ' + 'can take a while. Are you sure?') + +'

', 'tag_browser_many_changes', self.parent()): + return + + fm = self.db.metadata_for_field(key) + is_multiple = fm['is_multiple'] + val = on_node.tag.original_name + for id in ids: + mi = self.db.get_metadata(id, index_is_id=True) + + # Prepare to ignore the author, unless it is changed. Title is + # always ignored -- see the call to set_metadata + set_authors = False + + # Author_sort cannot change explicitly. Changing the author might + # change it. + mi.author_sort = None # Never will change by itself. + + if key == 'authors': + mi.authors = [val] + set_authors=True + elif fm['datatype'] == 'rating': + mi.set(key, len(val) * 2) + elif fm['is_custom'] and fm['datatype'] == 'series': + mi.set(key, val, extra=1.0) + elif is_multiple: + new_val = mi.get(key, []) + if val in new_val: + # Fortunately, only one field can change, so the continue + # won't break anything + continue + new_val.append(val) + mi.set(key, new_val) + else: + mi.set(key, val) + self.db.set_metadata(id, mi, set_title=False, + set_authors=set_authors, commit=False) + self.db.commit() + self.drag_drop_finished.emit(ids) + # }}} + + def _get_category_nodes(self, sort): + ''' + Called by __init__. Do not directly call this method. + ''' + self.row_map = [] + self.categories = {} + + # Get the categories + if self.search_restriction: + try: + data = self.db.get_categories(sort=sort, + icon_map=self.category_icon_map, + ids=self.db.search('', return_matches=True)) + except: + data = self.db.get_categories(sort=sort, icon_map=self.category_icon_map) + self.restriction_error.emit() + else: + data = self.db.get_categories(sort=sort, icon_map=self.category_icon_map) + + # Reconstruct the user categories, putting them into metadata + self.db.field_metadata.remove_dynamic_categories() + tb_cats = self.db.field_metadata + for user_cat in sorted(self.db.prefs.get('user_categories', {}).keys(), + key=sort_key): + cat_name = '@' + user_cat # add the '@' to avoid name collision + while True: + try: + tb_cats.add_user_category(label=cat_name, name=user_cat) + dot = cat_name.rfind('.') + if dot < 0: + break + cat_name = cat_name[:dot] + except ValueError: + break + + for cat in sorted(self.db.prefs.get('grouped_search_terms', {}).keys(), + key=sort_key): + if (u'@' + cat) in data: + try: + tb_cats.add_user_category(label=u'@' + cat, name=cat) + except ValueError: + traceback.print_exc() + self.db.data.change_search_locations(self.db.field_metadata.get_search_terms()) + + if len(saved_searches().names()): + tb_cats.add_search_category(label='search', name=_('Searches')) + + if self.filter_categories_by: + for category in data.keys(): + data[category] = [t for t in data[category] + if lower(t.name).find(self.filter_categories_by) >= 0] + + tb_categories = self.db.field_metadata + for category in tb_categories: + if category in data: # The search category can come and go + self.row_map.append(category) + self.categories[category] = tb_categories[category]['name'] + return data + + def refresh(self, data=None): + ''' + Here to trap usages of refresh in the old architecture. Can eventually + be removed. + ''' + print ('TagsModel: refresh called!') + traceback.print_stack() + return False + + def create_node(self, *args, **kwargs): + node = TagTreeItem(*args, **kwargs) + self.node_map[id(node)] = node + return node + + def get_node(self, idx): + ans = self.node_map.get(idx.internalId(), self.root_item) + return ans + + def createIndex(self, row, column, internal_pointer=None): + idx = QAbstractItemModel.createIndex(self, row, column, + id(internal_pointer)) + return idx def index_for_category(self, name): for row, category in enumerate(self.category_nodes): - if category.py_name == name: + if category.category_key == name: return self.index(row, 0, QModelIndex()) def columnCount(self, parent): @@ -853,20 +865,19 @@ class TagsModel(QAbstractItemModel): # {{{ def setData(self, index, value, role=Qt.EditRole): if not index.isValid(): - return NONE + return False # set up to reposition at the same item. We can do this except if # working with the last item and that item is deleted, in which case # we position at the parent label - path = index.model().path_for_index(index) val = unicode(value.toString()).strip() if not val: - error_dialog(self.tags_view, _('Item is blank'), + error_dialog(self.parent(), _('Item is blank'), _('An item cannot be set to nothing. Delete it instead.')).exec_() return False item = self.get_node(index) if item.type == TagTreeItem.CATEGORY and item.category_key.startswith('@'): if val.find('.') >= 0: - error_dialog(self.tags_view, _('Rename user category'), + error_dialog(self.parent(), _('Rename user category'), _('You cannot use periods in the name when ' 'renaming user categories'), show=True) return False @@ -886,7 +897,7 @@ class TagsModel(QAbstractItemModel): # {{{ if len(c) == len(ckey): if strcmp(ckey, nkey) != 0 and \ nkey_lower in user_cat_keys_lower: - error_dialog(self.tags_view, _('Rename user category'), + error_dialog(self.parent(), _('Rename user category'), _('The name %s is already used')%nkey, show=True) return False user_cats[nkey] = user_cats[ckey] @@ -895,16 +906,12 @@ class TagsModel(QAbstractItemModel): # {{{ rest = c[len(ckey):] if strcmp(ckey, nkey) != 0 and \ icu_lower(nkey + rest) in user_cat_keys_lower: - error_dialog(self.tags_view, _('Rename user category'), + error_dialog(self.parent(), _('Rename user category'), _('The name %s is already used')%(nkey+rest), show=True) return False user_cats[nkey + rest] = user_cats[ckey + rest] del user_cats[ckey + rest] - self.db.prefs.set('user_categories', user_cats) - self.tags_view.set_new_model() - # must not use 'self' below because the model has changed! - p = self.tags_view.model().find_category_node('@' + nkey) - self.tags_view.model().show_item_at_path(p) + self.user_categories_edited.emit(user_cats, nkey) # Does a refresh return True key = item.tag.category @@ -914,17 +921,17 @@ class TagsModel(QAbstractItemModel): # {{{ return False if key == 'authors': if val.find('&') >= 0: - error_dialog(self.tags_view, _('Invalid author name'), + error_dialog(self.parent(), _('Invalid author name'), _('Author names cannot contain & characters.')).exec_() return False if key == 'search': if val in saved_searches().names(): - error_dialog(self.tags_view, _('Duplicate search name'), + error_dialog(self.parent(), _('Duplicate search name'), _('The saved search name %s is already used.')%val).exec_() return False saved_searches().rename(unicode(item.data(role).toString()), val) item.tag.name = val - self.tags_view.search_item_renamed.emit() # Does a refresh + self.search_item_renamed.emit() # Does a refresh else: if key == 'series': self.db.rename_series(item.tag.id, val) @@ -937,18 +944,17 @@ class TagsModel(QAbstractItemModel): # {{{ elif self.db.field_metadata[key]['is_custom']: self.db.rename_custom_item(item.tag.id, val, label=self.db.field_metadata[key]['label']) - self.tags_view.tag_item_renamed.emit() + self.tag_item_renamed.emit() item.tag.name = val self.rename_item_in_all_user_categories(name, key, val) - self.tags_view.refresh_required.emit() - self.show_item_at_path(path) + self.refresh_required.emit() return True def rename_item_in_all_user_categories(self, item_name, item_category, new_name): ''' Search all user categories for items named item_name with category item_category and rename them to new_name. The caller must arrange to - redisplay the tree as appropriate (recount or set_new_model) + redisplay the tree as appropriate. ''' user_cats = self.db.prefs.get('user_categories', {}) for k in user_cats.keys(): @@ -965,7 +971,7 @@ class TagsModel(QAbstractItemModel): # {{{ ''' Search all user categories for items named item_name with category item_category and delete them. The caller must arrange to redisplay the - tree as appropriate (recount or set_new_model) + tree as appropriate. ''' user_cats = self.db.prefs.get('user_categories', {}) for cat in user_cats.keys(): @@ -1262,27 +1268,10 @@ class TagsModel(QAbstractItemModel): # {{{ return v return None - def show_item_at_path(self, path, box=False, - position=QTreeView.PositionAtCenter): - ''' - Scroll the browser and open categories to show the item referenced by - path. If possible, the item is placed in the center. If box=True, a - box is drawn around the item. - ''' - if path: - self.show_item_at_index(self.index_for_path(path), box=box, - position=position) - - def show_item_at_index(self, idx, box=False, - position=QTreeView.PositionAtCenter): - if idx.isValid(): - self.tags_view.setCurrentIndex(idx) - self.tags_view.scrollTo(idx, position) - self.tags_view.setCurrentIndex(idx) - if box: - tag_item = self.get_node(idx) - tag_item.boxed = True - self.dataChanged.emit(idx, idx) + def set_boxed(self, idx): + tag_item = self.get_node(idx) + tag_item.boxed = True + self.dataChanged.emit(idx, idx) def clear_boxed(self): ''' @@ -1310,8 +1299,5 @@ class TagsModel(QAbstractItemModel): # {{{ for i in xrange(self.rowCount(QModelIndex())): process_level(self.index(i, 0, QModelIndex())) - def get_filter_categories_by(self): - return self.filter_categories_by - # }}} diff --git a/src/calibre/gui2/tag_browser/ui.py b/src/calibre/gui2/tag_browser/ui.py index f7f724b118..d7e504b3e9 100644 --- a/src/calibre/gui2/tag_browser/ui.py +++ b/src/calibre/gui2/tag_browser/ui.py @@ -91,10 +91,10 @@ class TagBrowserMixin(object): # {{{ # Add the new category user_cats[new_cat] = [] db.prefs.set('user_categories', user_cats) - self.tags_view.set_new_model() + self.tags_view.recount() m = self.tags_view.model() idx = m.index_for_path(m.find_category_node('@' + new_cat)) - m.show_item_at_index(idx) + self.tags_view.show_item_at_index(idx) # Open the editor on the new item to rename it if new_category_name is None: self.tags_view.edit(idx) @@ -111,7 +111,7 @@ class TagBrowserMixin(object): # {{{ for k in d.categories: db.field_metadata.add_user_category('@' + k, k) db.data.change_search_locations(db.field_metadata.get_search_terms()) - self.tags_view.set_new_model() + self.tags_view.recount() def do_delete_user_category(self, category_name): ''' @@ -144,7 +144,7 @@ class TagBrowserMixin(object): # {{{ elif k.startswith(category_name + '.'): del user_cats[k] db.prefs.set('user_categories', user_cats) - self.tags_view.set_new_model() + self.tags_view.recount() def do_del_item_from_user_cat(self, user_cat, item_name, item_category): ''' @@ -262,20 +262,22 @@ class TagBrowserMixin(object): # {{{ self.library_view.select_rows(ids) # refreshing the tags view happens at the emit()/call() site - def do_author_sort_edit(self, parent, id, select_sort=True): + def do_author_sort_edit(self, parent, id, select_sort=True, select_link=False): ''' Open the manage authors dialog ''' db = self.library_view.model().db - editor = EditAuthorsDialog(parent, db, id, select_sort) + editor = EditAuthorsDialog(parent, db, id, select_sort, select_link) d = editor.exec_() if d: - for (id, old_author, new_author, new_sort) in editor.result: + for (id, old_author, new_author, new_sort, new_link) in editor.result: if old_author != new_author: # The id might change if the new author already exists id = db.rename_author(id, new_author) db.set_sort_field_for_author(id, unicode(new_sort), commit=False, notify=False) + db.set_link_field_for_author(id, unicode(new_link), + commit=False, notify=False) db.commit() self.library_view.model().refresh() self.tags_view.recount() @@ -413,13 +415,14 @@ class TagBrowserWidget(QWidget): # {{{ txt = unicode(self.item_search.currentText()).strip() if txt.startswith('*'): - self.tags_view.set_new_model(filter_categories_by=txt[1:]) + model.filter_categories_by = txt[1:] + self.tags_view.recount() self.current_find_position = None return - if model.get_filter_categories_by(): - self.tags_view.set_new_model(filter_categories_by=None) + if model.filter_categories_by: + model.filter_categories_by = None + self.tags_view.recount() self.current_find_position = None - model = self.tags_view.model() if not txt: return @@ -437,8 +440,9 @@ class TagBrowserWidget(QWidget): # {{{ self.current_find_position = \ model.find_item_node(key, txt, self.current_find_position) + if self.current_find_position: - model.show_item_at_path(self.current_find_position, box=True) + self.tags_view.show_item_at_path(self.current_find_position, box=True) elif self.item_search.text(): self.not_found_label.setVisible(True) if self.tags_view.verticalScrollBar().isVisible(): diff --git a/src/calibre/gui2/tag_browser/view.py b/src/calibre/gui2/tag_browser/view.py index 0cafcd2b63..d53167591e 100644 --- a/src/calibre/gui2/tag_browser/view.py +++ b/src/calibre/gui2/tag_browser/view.py @@ -7,11 +7,12 @@ __license__ = 'GPL v3' __copyright__ = '2011, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import cPickle, traceback +import cPickle from functools import partial +from itertools import izip from PyQt4.Qt import (QItemDelegate, Qt, QTreeView, pyqtSignal, QSize, QIcon, - QApplication, QMenu, QPoint) + QApplication, QMenu, QPoint, QModelIndex, QToolTip, QCursor) from calibre.gui2.tag_browser.model import (TagTreeItem, TAG_SEARCH_STATES, TagsModel) @@ -65,7 +66,7 @@ class TagsView(QTreeView): # {{{ tag_list_edit = pyqtSignal(object, object) saved_search_edit = pyqtSignal(object) rebuild_saved_searches = pyqtSignal() - author_sort_edit = pyqtSignal(object, object) + author_sort_edit = pyqtSignal(object, object, object, object) tag_item_renamed = pyqtSignal() search_item_renamed = pyqtSignal() drag_drop_finished = pyqtSignal(object) @@ -90,55 +91,59 @@ class TagsView(QTreeView): # {{{ self.setDropIndicatorShown(True) self.setAutoExpandDelay(500) self.pane_is_visible = False - if gprefs['tags_browser_collapse_at'] == 0: - self.collapse_model = 'disable' - else: - self.collapse_model = gprefs['tags_browser_partition_method'] self.search_icon = QIcon(I('search.png')) self.user_category_icon = QIcon(I('tb_folder.png')) self.delete_icon = QIcon(I('list_remove.png')) self.rename_icon = QIcon(I('edit-undo.png')) + self._model = TagsModel(self) + self._model.search_item_renamed.connect(self.search_item_renamed) + self._model.refresh_required.connect(self.refresh_required, + type=Qt.QueuedConnection) + self._model.tag_item_renamed.connect(self.tag_item_renamed) + self._model.restriction_error.connect(self.restriction_error) + self._model.user_categories_edited.connect(self.user_categories_edited, + type=Qt.QueuedConnection) + self._model.drag_drop_finished.connect(self.drag_drop_finished) + + @property + def hidden_categories(self): + return self._model.hidden_categories + + @property + def db(self): + return self._model.db + + @property + def collapse_model(self): + return self._model.collapse_model + def set_pane_is_visible(self, to_what): pv = self.pane_is_visible self.pane_is_visible = to_what if to_what and not pv: self.recount() + def get_state(self): + state_map = {} + expanded_categories = [] + for row, category in enumerate(self._model.category_nodes): + if self.isExpanded(self._model.index(row, 0, QModelIndex())): + expanded_categories.append(category.category_key) + states = [c.tag.state for c in category.child_tags()] + names = [(c.tag.name, c.tag.category) for c in category.child_tags()] + state_map[category.category_key] = dict(izip(names, states)) + return expanded_categories, state_map + def reread_collapse_parameters(self): - if gprefs['tags_browser_collapse_at'] == 0: - self.collapse_model = 'disable' - else: - self.collapse_model = gprefs['tags_browser_partition_method'] - self.set_new_model(self._model.get_filter_categories_by()) + self._model.reread_collapse_model(self.get_state()[1]) def set_database(self, db, tag_match, sort_by): - hidden_cats = db.prefs.get('tag_browser_hidden_categories', None) - self.hidden_categories = [] - # migrate from config to db prefs - if hidden_cats is None: - hidden_cats = config['tag_browser_hidden_categories'] - # strip out any non-existence field keys - for cat in hidden_cats: - if cat in db.field_metadata: - self.hidden_categories.append(cat) - db.prefs.set('tag_browser_hidden_categories', list(self.hidden_categories)) - self.hidden_categories = set(self.hidden_categories) + self._model.set_database(db) - old = getattr(self, '_model', None) - if old is not None: - old.break_cycles() - self._model = TagsModel(db, parent=self, - hidden_categories=self.hidden_categories, - search_restriction=None, - drag_drop_finished=self.drag_drop_finished, - collapse_model=self.collapse_model, - state_map={}) - self.pane_is_visible = True # because TagsModel.init did a recount + self.pane_is_visible = True # because TagsModel.set_database did a recount self.sort_by = sort_by self.tag_match = tag_match - self.db = db - self.search_restriction = None self.setModel(self._model) self.setContextMenuPolicy(Qt.CustomContextMenu) pop = config['sort_tags_by'] @@ -164,6 +169,13 @@ class TagsView(QTreeView): # {{{ self.refresh_signal_processed = False self.refresh_required.emit() + def user_categories_edited(self, user_cats, nkey): + state_map = self.get_state()[1] + self.db.prefs.set('user_categories', user_cats) + self._model.rebuild_node_tree(state_map=state_map) + p = self._model.find_category_node('@'+nkey) + self.show_item_at_path(p) + @property def match_all(self): return self.tag_match and self.tag_match.currentIndex() > 0 @@ -179,11 +191,8 @@ class TagsView(QTreeView): # {{{ pass def set_search_restriction(self, s): - if s: - self.search_restriction = s - else: - self.search_restriction = None - self.set_new_model() + s = s if s else None + self._model.set_search_restriction(s) def mouseReleaseEvent(self, event): # Swallow everything except leftButton so context menus work correctly @@ -268,23 +277,29 @@ class TagsView(QTreeView): # {{{ self.saved_search_edit.emit(category) return if action == 'edit_author_sort': - self.author_sort_edit.emit(self, index) + self.author_sort_edit.emit(self, index, True, False) + return + if action == 'edit_author_link': + self.author_sort_edit.emit(self, index, False, True) return + reset_filter_categories = True if action == 'hide': self.hidden_categories.add(category) elif action == 'show': self.hidden_categories.discard(category) elif action == 'categorization': changed = self.collapse_model != category - self.collapse_model = category + self._model.collapse_model = category if changed: - self.set_new_model(self._model.get_filter_categories_by()) + reset_filter_categories = False gprefs['tags_browser_partition_method'] = category elif action == 'defaults': self.hidden_categories.clear() self.db.prefs.set('tag_browser_hidden_categories', list(self.hidden_categories)) - self.set_new_model() + if reset_filter_categories: + self._model.filter_categories_by = None + self._model.rebuild_node_tree() except: return @@ -334,6 +349,9 @@ class TagsView(QTreeView): # {{{ self.context_menu.addAction(_('Edit sort for %s')%display_name(tag), partial(self.context_menu_handler, action='edit_author_sort', index=tag.id)) + self.context_menu.addAction(_('Edit link for %s')%display_name(tag), + partial(self.context_menu_handler, + action='edit_author_link', index=tag.id)) # is_editable is also overloaded to mean 'can be added # to a user category' @@ -475,10 +493,25 @@ class TagsView(QTreeView): # {{{ pa.setCheckable(True) pa.setChecked(True) + if config['sort_tags_by'] != "name": + fla.setEnabled(False) + m.hovered.connect(self.collapse_menu_hovered) + fla.setToolTip(_('First letter is usable only when sorting by name')) + # Apparently one cannot set a tooltip to empty, so use a star and + # deal with it in the hover method + da.setToolTip('*') + pa.setToolTip('*') + if not self.context_menu.isEmpty(): self.context_menu.popup(self.mapToGlobal(point)) return True + def collapse_menu_hovered(self, action): + tip = action.toolTip() + if tip == '*': + tip = '' + QToolTip.showText(QCursor.pos(), tip) + def dragMoveEvent(self, event): QTreeView.dragMoveEvent(self, event) self.setDropIndicatorShown(False) @@ -487,6 +520,8 @@ class TagsView(QTreeView): # {{{ return src_is_tb = event.mimeData().hasFormat('application/calibre+from_tag_browser') item = index.data(Qt.UserRole).toPyObject() + if item.type == TagTreeItem.ROOT: + return flags = self._model.flags(index) if item.type == TagTreeItem.TAG and flags & Qt.ItemIsDropEnabled: self.setDropIndicatorShown(not src_is_tb) @@ -537,11 +572,33 @@ class TagsView(QTreeView): # {{{ if not ci.isValid(): ci = self.indexAt(QPoint(10, 10)) path = self.model().path_for_index(ci) if self.is_visible(ci) else None - expanded_categories, state_map = self.model().get_state() - self.set_new_model(state_map=state_map) + expanded_categories, state_map = self.get_state() + self._model.rebuild_node_tree(state_map=state_map) for category in expanded_categories: - self.expand(self.model().index_for_category(category)) - self._model.show_item_at_path(path) + self.expand(self._model.index_for_category(category)) + self.show_item_at_path(path) + + def show_item_at_path(self, path, box=False, + position=QTreeView.PositionAtCenter): + ''' + Scroll the browser and open categories to show the item referenced by + path. If possible, the item is placed in the center. If box=True, a + box is drawn around the item. + ''' + if path: + self.show_item_at_index(self._model.index_for_path(path), box=box, + position=position) + + def show_item_at_index(self, idx, box=False, + position=QTreeView.PositionAtCenter): + if idx.isValid() and idx.data(Qt.UserRole).toPyObject() is not self._model.root_item: + self.expand(self._model.parent(idx)) # Needed otherwise Qt sometimes segfaults if the + # node is buried in a collapsed, off + # screen hierarchy + self.setCurrentIndex(idx) + self.scrollTo(idx, position) + if box: + self._model.set_boxed(idx) def item_expanded(self, idx): ''' @@ -549,30 +606,6 @@ class TagsView(QTreeView): # {{{ ''' self.setCurrentIndex(idx) - def set_new_model(self, filter_categories_by=None, state_map={}): - ''' - There are cases where we need to rebuild the category tree without - attempting to reposition the current node. - ''' - try: - old = getattr(self, '_model', None) - if old is not None: - old.break_cycles() - self._model = TagsModel(self.db, parent=self, - hidden_categories=self.hidden_categories, - search_restriction=self.search_restriction, - drag_drop_finished=self.drag_drop_finished, - filter_categories_by=filter_categories_by, - collapse_model=self.collapse_model, - state_map=state_map) - self.setModel(self._model) - except: - # The DB must be gone. Set the model to None and hope that someone - # will call set_database later. I don't know if this in fact works. - # But perhaps a Bad Thing Happened, so print the exception - traceback.print_exc() - self._model = None - self.setModel(None) # }}} diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index 8b4ad47284..3ebd63afde 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -367,7 +367,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): 'uuid', 'has_cover', ('au_map', 'authors', 'author', - 'aum_sortconcat(link.id, authors.name, authors.sort)'), + 'aum_sortconcat(link.id, authors.name, authors.sort, authors.link)'), 'last_modified', '(SELECT identifiers_concat(type, val) FROM identifiers WHERE identifiers.book=books.id) identifiers', ] @@ -894,13 +894,17 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): aut_list = [] aum = [] aus = {} - for (author, author_sort) in aut_list: - aum.append(author.replace('|', ',')) - aus[author] = author_sort.replace('|', ',') + aul = {} + for (author, author_sort, link) in aut_list: + aut = author.replace('|', ',') + aum.append(aut) + aus[aut] = author_sort.replace('|', ',') + aul[aut] = link mi.title = row[fm['title']] mi.authors = aum mi.author_sort = row[fm['author_sort']] mi.author_sort_map = aus + mi.author_link_map = aul mi.comments = row[fm['comments']] mi.publisher = row[fm['publisher']] mi.timestamp = row[fm['timestamp']] @@ -1245,6 +1249,9 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): ret = tempfile.SpooledTemporaryFile(max_size=SPOOL_SIZE) shutil.copyfileobj(f, ret) ret.seek(0) + # Various bits of code try to use the name as the default + # title when reading metadata, so set it + ret.name = f.name else: ret = f.read() return ret @@ -1442,7 +1449,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): raise ValueError('sort ' + sort + ' not a valid value') self.books_list_filter.change([] if not ids else ids) - id_filter = None if not ids else frozenset(ids) + id_filter = None if ids is None else frozenset(ids) tb_cats = self.field_metadata tcategories = {} @@ -1520,7 +1527,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): rating_dex = self.FIELD_MAP['rating'] tag_class = LibraryDatabase2.TCat_Tag for book in self.data.iterall(): - if id_filter and book[id_dex] not in id_filter: + if id_filter is not None and book[id_dex] not in id_filter: continue rating = book[rating_dex] # We kept track of all possible category field_map positions above @@ -2038,13 +2045,13 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): def authors_with_sort_strings(self, id, index_is_id=False): id = id if index_is_id else self.id(id) aut_strings = self.conn.get(''' - SELECT authors.id, authors.name, authors.sort + SELECT authors.id, authors.name, authors.sort, authors.link FROM authors, books_authors_link as bl WHERE bl.book=? and authors.id=bl.author ORDER BY bl.id''', (id,)) result = [] - for (id_, author, sort,) in aut_strings: - result.append((id_, author.replace('|', ','), sort)) + for (id_, author, sort, link) in aut_strings: + result.append((id_, author.replace('|', ','), sort, link)) return result # Given a book, return the author_sort string for authors of the book @@ -2084,7 +2091,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): aum = self.authors_with_sort_strings(id_, index_is_id=True) self.data.set(id_, self.FIELD_MAP['au_map'], - ':#:'.join([':::'.join((au.replace(',', '|'), aus)) for (_, au, aus) in aum]), + ':#:'.join([':::'.join((au.replace(',', '|'), aus, aul)) + for (_, au, aus, aul) in aum]), row_is_id=True) def _set_authors(self, id, authors, allow_case_change=False): @@ -2435,7 +2443,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): self.conn.commit() def get_authors_with_ids(self): - result = self.conn.get('SELECT id,name,sort FROM authors') + result = self.conn.get('SELECT id,name,sort,link FROM authors') if not result: return [] return result @@ -2446,6 +2454,13 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): (author,), all=False) return result + def set_link_field_for_author(self, aid, link, commit=True, notify=False): + if not link: + link = '' + self.conn.execute('UPDATE authors SET link=? WHERE id=?', (link.strip(), aid)) + if commit: + self.conn.commit() + def set_sort_field_for_author(self, old_id, new_sort, commit=True, notify=False): self.conn.execute('UPDATE authors SET sort=? WHERE id=?', \ (new_sort.strip(), old_id)) diff --git a/src/calibre/library/restore.py b/src/calibre/library/restore.py index 8bd7174849..4fab2edbd6 100644 --- a/src/calibre/library/restore.py +++ b/src/calibre/library/restore.py @@ -53,6 +53,7 @@ class Restore(Thread): self.mismatched_dirs = [] self.successes = 0 self.tb = None + self.authors_links = {} @property def errors_occurred(self): @@ -160,6 +161,12 @@ class Restore(Thread): else: self.mismatched_dirs.append(dirpath) + alm = mi.get('author_link_map', {}) + for author, link in alm.iteritems(): + existing_link, timestamp = self.authors_links.get(author, (None, None)) + if existing_link is None or existing_link != link and timestamp < mi.timestamp: + self.authors_links[author] = (link, mi.timestamp) + def create_cc_metadata(self): self.books.sort(key=itemgetter('timestamp')) self.custom_columns = {} @@ -206,6 +213,11 @@ class Restore(Thread): self.failed_restores.append((book, traceback.format_exc())) self.progress_callback(book['mi'].title, i+1) + for author in self.authors_links.iterkeys(): + link, ign = self.authors_links[author] + db.conn.execute('UPDATE authors SET link=? WHERE name=?', + (link, author.replace(',', '|'))) + db.conn.commit() db.conn.close() def restore_book(self, book, db): diff --git a/src/calibre/library/schema_upgrades.py b/src/calibre/library/schema_upgrades.py index 3fc9a2368a..2907e43098 100644 --- a/src/calibre/library/schema_upgrades.py +++ b/src/calibre/library/schema_upgrades.py @@ -600,4 +600,14 @@ class SchemaUpgrade(object): with open(os.path.join(bdir, fname), 'wb') as f: f.write(script) + def upgrade_version_20(self): + ''' + Add a link column to the authors table. + ''' + + script = ''' + ALTER TABLE authors ADD COLUMN link TEXT NOT NULL DEFAULT ""; + ''' + self.conn.executescript(script) + diff --git a/src/calibre/library/sqlite.py b/src/calibre/library/sqlite.py index 96874d2c27..a2a85806f5 100644 --- a/src/calibre/library/sqlite.py +++ b/src/calibre/library/sqlite.py @@ -144,9 +144,9 @@ class AumSortedConcatenate(object): def __init__(self): self.ans = {} - def step(self, ndx, author, sort): + def step(self, ndx, author, sort, link): if author is not None: - self.ans[ndx] = author + ':::' + sort + self.ans[ndx] = ':::'.join((author, sort, link)) def finalize(self): keys = self.ans.keys() @@ -229,7 +229,7 @@ class DBThread(Thread): load_c_extensions(self.conn) self.conn.row_factory = sqlite.Row if self.row_factory else lambda cursor, row : list(row) self.conn.create_aggregate('concat', 1, Concatenate) - self.conn.create_aggregate('aum_sortconcat', 3, AumSortedConcatenate) + self.conn.create_aggregate('aum_sortconcat', 4, AumSortedConcatenate) self.conn.create_collation('PYNOCASE', partial(pynocase, encoding=encoding)) self.conn.create_function('title_sort', 1, title_sort) diff --git a/src/calibre/manual/faq.rst b/src/calibre/manual/faq.rst index 97551b403f..c67d44b7d5 100644 --- a/src/calibre/manual/faq.rst +++ b/src/calibre/manual/faq.rst @@ -558,11 +558,16 @@ Most readers do not support this. You should complain to the manufacturer about Another alternative is to create a catalog in ebook form containing a listing of all the books in your calibre library, with their metadata. Click the arrow next to the convert button to access the catalog creation tool. And before you ask, no you cannot have the catalog "link directly to" books on your reader. +How do I get |app| to use my HTTP proxy? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +By default, |app| uses whatever proxy settings are set in your OS. Sometimes these are incorrect, for example, on windows if you don't use Internet Explorer then the proxy settings may not be up to date. You can tell |app| to use a particular proxy server by setting the http_proxy environment variable. The format of the variable is: http://username:password@servername you should ask your network admin to give you the correct value for this variable. Note that |app| only supports HTTP proxies not SOCKS proxies. You can see the current proxies used by |app| in Preferences->Miscellaneous. + I want some feature added to |app|. What can I do? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ You have two choices: 1. Create a patch by hacking on |app| and send it to me for review and inclusion. See `Development `_. - 2. `Open a ticket `_ (you have to register and login first). Remember that |app| development is done by volunteers, so if you get no response to your feature request, it means no one feels like implementing it. + 2. `Open a bug requesting the feature `_ . Remember that |app| development is done by volunteers, so if you get no response to your feature request, it means no one feels like implementing it. Why doesn't |app| have an automatic update? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/src/calibre/utils/formatter.py b/src/calibre/utils/formatter.py index ebf47db854..3a93c2b650 100644 --- a/src/calibre/utils/formatter.py +++ b/src/calibre/utils/formatter.py @@ -347,5 +347,6 @@ class EvalFormatter(TemplateFormatter): key = key.lower() return kwargs.get(key, _('No such variable ') + key) +# DEPRECATED. This is not thread safe. Do not use. eval_formatter = EvalFormatter() diff --git a/src/calibre/utils/formatter_functions.py b/src/calibre/utils/formatter_functions.py index 55bad6c7e8..c6f4bd1b0e 100644 --- a/src/calibre/utils/formatter_functions.py +++ b/src/calibre/utils/formatter_functions.py @@ -202,9 +202,9 @@ class BuiltinEval(BuiltinFormatterFunction): 'results from local variables.') def evaluate(self, formatter, kwargs, mi, locals, template): - from formatter import eval_formatter + from formatter import EvalFormatter template = template.replace('[[', '{').replace(']]', '}') - return eval_formatter.safe_format(template, locals, 'EVAL', None) + return EvalFormatter().safe_format(template, locals, 'EVAL', None) class BuiltinAssign(BuiltinFormatterFunction): name = 'assign'
'), lambda match: '')] + remove_tags =[] + remove_tags.append(dict(name = 'div', attrs = {'class' : 'def element-date'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 'def silver'})) + remove_tags.append(dict(name = 'div', attrs = {'id' : 'content-main-column-right'})) - remove_tags =[] - remove_tags.append(dict(name = 'div', attrs = {'class' : 'def element-date'})) - remove_tags.append(dict(name = 'div', attrs = {'class' : 'def silver'})) - remove_tags.append(dict(name = 'div', attrs = {'id' : 'content-main-column-right'})) - - - extra_css = ''' - .div-header {font-size: x-small; font-weight: bold} - ''' + extra_css = ''' + .div-header {font-size: x-small; font-weight: bold} + ''' #h2 {font-size: x-large; font-weight: bold} - def is_blocked(self, a): - if a.findNextSibling('img') is None: - return False - else: - return True + def is_blocked(self, a): + if a.findNextSibling('img') is None: + return False + else: + return True - def find_last_issue(self): - soup = self.index_to_soup('http://www.wprost.pl/archiwum/') - a = 0 - if self.FIND_LAST_FULL_ISSUE: - ico_blocked = soup.findAll('img', attrs={'src' : self.ICO_BLOCKED}) - a = ico_blocked[-1].findNext('a', attrs={'title' : re.compile('Zobacz spis tre.ci')}) - else: - a = soup.find('a', attrs={'title' : re.compile('Zobacz spis tre.ci')}) - self.EDITION = a['href'].replace('/tygodnik/?I=', '') - self.cover_url = a.img['src'] + def find_last_issue(self): + soup = self.index_to_soup('http://www.wprost.pl/archiwum/') + a = 0 + if self.FIND_LAST_FULL_ISSUE: + ico_blocked = soup.findAll('img', attrs={'src' : self.ICO_BLOCKED}) + a = ico_blocked[-1].findNext('a', attrs={'title' : re.compile('Zobacz spis tre.ci')}) + else: + a = soup.find('a', attrs={'title' : re.compile('Zobacz spis tre.ci')}) + self.EDITION = a['href'].replace('/tygodnik/?I=', '') + self.cover_url = a.img['src'] - def parse_index(self): - self.find_last_issue() - soup = self.index_to_soup('http://www.wprost.pl/tygodnik/?I=' + self.EDITION) - feeds = [] - for main_block in soup.findAll(attrs={'class':'main-block-s3 s3-head head-red3'}): - articles = list(self.find_articles(main_block)) - if len(articles) > 0: - section = self.tag_to_string(main_block) - feeds.append((section, articles)) - return feeds - - def find_articles(self, main_block): - for a in main_block.findAllNext( attrs={'style':['','padding-top: 15px;']}): - if a.name in "td": - break - if self.EXCLUDE_LOCKED & self.is_blocked(a): - continue - yield { - 'title' : self.tag_to_string(a), - 'url' : 'http://www.wprost.pl' + a['href'], - 'date' : '', - 'description' : '' - } + def parse_index(self): + self.find_last_issue() + soup = self.index_to_soup('http://www.wprost.pl/tygodnik/?I=' + self.EDITION) + feeds = [] + for main_block in soup.findAll(attrs={'class':'main-block-s3 s3-head head-red3'}): + articles = list(self.find_articles(main_block)) + if len(articles) > 0: + section = self.tag_to_string(main_block) + feeds.append((section, articles)) + return feeds + def find_articles(self, main_block): + for a in main_block.findAllNext( attrs={'style':['','padding-top: 15px;']}): + if a.name in "td": + break + if self.EXCLUDE_LOCKED & self.is_blocked(a): + continue + yield { + 'title' : self.tag_to_string(a), + 'url' : 'http://www.wprost.pl' + a['href'], + 'date' : '', + 'description' : '' + } diff --git a/resources/metadata_sqlite.sql b/resources/metadata_sqlite.sql index aa29d4b8de..83f55c2762 100644 --- a/resources/metadata_sqlite.sql +++ b/resources/metadata_sqlite.sql @@ -1,6 +1,7 @@ CREATE TABLE authors ( id INTEGER PRIMARY KEY, name TEXT NOT NULL COLLATE NOCASE, sort TEXT COLLATE NOCASE, + link TEXT NOT NULL DEFAULT "", UNIQUE(name) ); CREATE TABLE books ( id INTEGER PRIMARY KEY AUTOINCREMENT, @@ -545,4 +546,4 @@ CREATE TRIGGER series_update_trg BEGIN UPDATE series SET sort=NEW.name WHERE id=NEW.id; END; -pragma user_version=20; +pragma user_version=21; diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py index 33e80982d1..cf4d09770c 100644 --- a/src/calibre/__init__.py +++ b/src/calibre/__init__.py @@ -106,10 +106,12 @@ def sanitize_file_name(name, substitute='_', as_unicode=False): name = name.encode(filesystem_encoding, 'ignore') one = _filename_sanitize.sub(substitute, name) one = re.sub(r'\s', ' ', one).strip() - one = re.sub(r'^\.+$', '_', one) + bname, ext = os.path.splitext(one) + one = re.sub(r'^\.+$', '_', bname) if as_unicode: one = one.decode(filesystem_encoding) one = one.replace('..', substitute) + one += ext # Windows doesn't like path components that end with a period if one and one[-1] in ('.', ' '): one = one[:-1]+'_' @@ -132,8 +134,10 @@ def sanitize_file_name_unicode(name, substitute='_'): name] one = u''.join(chars) one = re.sub(r'\s', ' ', one).strip() - one = re.sub(r'^\.+$', '_', one) + bname, ext = os.path.splitext(one) + one = re.sub(r'^\.+$', '_', bname) one = one.replace('..', substitute) + one += ext # Windows doesn't like path components that end with a period or space if one and one[-1] in ('.', ' '): one = one[:-1]+'_' diff --git a/src/calibre/db/tables.py b/src/calibre/db/tables.py index 7240b3ec6e..735d2f69a0 100644 --- a/src/calibre/db/tables.py +++ b/src/calibre/db/tables.py @@ -105,11 +105,13 @@ class ManyToManyTable(ManyToOneTable): class AuthorsTable(ManyToManyTable): def read_id_maps(self, db): + self.alink_map = {} for row in db.conn.execute( - 'SELECT id, name, sort FROM authors'): + 'SELECT id, name, sort, link FROM authors'): self.id_map[row[0]] = row[1] self.extra_map[row[0]] = (row[2] if row[2] else author_to_author_sort(row[1])) + self.alink_map[row[0]] = row[3] class FormatsTable(ManyToManyTable): diff --git a/src/calibre/devices/android/driver.py b/src/calibre/devices/android/driver.py index 2c840c644a..08ecbd1ee3 100644 --- a/src/calibre/devices/android/driver.py +++ b/src/calibre/devices/android/driver.py @@ -19,10 +19,11 @@ class ANDROID(USBMS): VENDOR_ID = { # HTC - 0x0bb4 : { 0x0c02 : [0x100, 0x0227, 0x0226, 0x222], - 0x0c01 : [0x100, 0x0227, 0x0226], - 0x0ff9 : [0x0100, 0x0227, 0x0226], - 0x0c87 : [0x0100, 0x0227, 0x0226], + 0x0bb4 : { 0xc02 : [0x100, 0x0227, 0x0226, 0x222], + 0xc01 : [0x100, 0x0227, 0x0226], + 0xff9 : [0x0100, 0x0227, 0x0226], + 0xc87 : [0x0100, 0x0227, 0x0226], + 0xc91 : [0x0100, 0x0227, 0x0226], 0xc92 : [0x100], 0xc97 : [0x226], 0xc99 : [0x0100], diff --git a/src/calibre/devices/apple/driver.py b/src/calibre/devices/apple/driver.py index a3b1d05950..de895c65be 100644 --- a/src/calibre/devices/apple/driver.py +++ b/src/calibre/devices/apple/driver.py @@ -5,7 +5,7 @@ __copyright__ = '2010, Gregory Riker' __docformat__ = 'restructuredtext en' -import cStringIO, ctypes, datetime, os, re, sys, tempfile, time +import cStringIO, ctypes, datetime, os, re, shutil, sys, tempfile, time from calibre.constants import __appname__, __version__, DEBUG from calibre import fit_image, confirm_config_name from calibre.constants import isosx, iswindows @@ -119,11 +119,17 @@ class DriverBase(DeviceConfig, DevicePlugin): 'iBooks Category'), _('Cache covers from iTunes/iBooks') + ':::' + - _('Enable to cache and display covers from iTunes/iBooks') + _('Enable to cache and display covers from iTunes/iBooks'), + _("'Copy files to iTunes Media folder" u"\u2026" "' is enabled in iTunes Preferences|Advanced") + + ':::' + + _("

This setting should match your iTunes Preferences|Advanced setting.

" + "

Disabling will store copies of books transferred to iTunes in your calibre configuration directory.

" + "

Enabling indicates that iTunes is configured to store copies in your iTunes Media folder.

") ] EXTRA_CUSTOMIZATION_DEFAULT = [ True, True, + False, ] @@ -193,6 +199,7 @@ class ITUNES(DriverBase): # EXTRA_CUSTOMIZATION_MESSAGE indexes USE_SERIES_AS_CATEGORY = 0 CACHE_COVERS = 1 + USE_ITUNES_STORAGE = 2 OPEN_FEEDBACK_MESSAGE = _( 'Apple device detected, launching iTunes, please wait ...') @@ -281,6 +288,7 @@ class ITUNES(DriverBase): description_prefix = "added by calibre" ejected = False iTunes= None + iTunes_local_storage = None library_orphans = None log = Log() manual_sync_mode = False @@ -825,7 +833,7 @@ class ITUNES(DriverBase): # Confirm/create thumbs archive if not os.path.exists(self.cache_dir): if DEBUG: - self.log.info(" creating thumb cache '%s'" % self.cache_dir) + self.log.info(" creating thumb cache at '%s'" % self.cache_dir) os.makedirs(self.cache_dir) if not os.path.exists(self.archive_path): @@ -837,6 +845,17 @@ class ITUNES(DriverBase): if DEBUG: self.log.info(" existing thumb cache at '%s'" % self.archive_path) + # If enabled in config options, create/confirm an iTunes storage folder + if not self.settings().extra_customization[self.USE_ITUNES_STORAGE]: + self.iTunes_local_storage = os.path.join(config_dir,'iTunes storage') + if not os.path.exists(self.iTunes_local_storage): + if DEBUG: + self.log(" creating iTunes_local_storage at '%s'" % self.iTunes_local_storage) + os.mkdir(self.iTunes_local_storage) + else: + if DEBUG: + self.log(" existing iTunes_local_storage at '%s'" % self.iTunes_local_storage) + def remove_books_from_metadata(self, paths, booklists): ''' Remove books from the metadata list. This function must not communicate @@ -1281,50 +1300,27 @@ class ITUNES(DriverBase): if DEBUG: self.log.info(" ITUNES._add_new_copy()") - def _save_last_known_iTunes_storage(lb_added): - if isosx: - fp = lb_added.location().path - index = fp.rfind('/Books') + len('/Books') - last_known_iTunes_storage = fp[:index] - elif iswindows: - fp = lb_added.Location - index = fp.rfind('\Books') + len('\Books') - last_known_iTunes_storage = fp[:index] - dynamic['last_known_iTunes_storage'] = last_known_iTunes_storage - self.log.warning(" last_known_iTunes_storage: %s" % last_known_iTunes_storage) - db_added = None lb_added = None + # If using iTunes_local_storage, copy the file, redirect iTunes to use local copy + if not self.settings().extra_customization[self.USE_ITUNES_STORAGE]: + local_copy = os.path.join(self.iTunes_local_storage, str(metadata.uuid) + os.path.splitext(fpath)[1]) + shutil.copyfile(fpath,local_copy) + fpath = local_copy + if self.manual_sync_mode: ''' - This is the unsupported direct-connect mode. - In an attempt to avoid resetting the iTunes library Media folder, don't try to - add the book to iTunes if the last_known_iTunes_storage path is inaccessible. - This means that the path has to be set at least once, probably by using - 'Connect to iTunes' and doing a transfer. + Unsupported direct-connect mode. ''' self.log.warning(" unsupported direct connect mode") db_added = self._add_device_book(fpath, metadata) - last_known_iTunes_storage = dynamic.get('last_known_iTunes_storage', None) - if last_known_iTunes_storage is not None: - if os.path.exists(last_known_iTunes_storage): - if DEBUG: - self.log.warning(" iTunes storage online, adding to library") - lb_added = self._add_library_book(fpath, metadata) - else: - if DEBUG: - self.log.warning(" iTunes storage not online, can't add to library") - - if lb_added: - _save_last_known_iTunes_storage(lb_added) + lb_added = self._add_library_book(fpath, metadata) if not lb_added and DEBUG: self.log.warn(" failed to add '%s' to iTunes, iTunes Media folder inaccessible" % metadata.title) else: lb_added = self._add_library_book(fpath, metadata) - if lb_added: - _save_last_known_iTunes_storage(lb_added) - else: + if not lb_added: raise UserFeedback("iTunes Media folder inaccessible", details="Failed to add '%s' to iTunes" % metadata.title, level=UserFeedback.WARN) @@ -1520,7 +1516,7 @@ class ITUNES(DriverBase): else: self.log.error(" book_playlist not found") - if len(dev_books): + if dev_books is not None and len(dev_books): first_book = dev_books[0] if False: self.log.info(" determing manual mode by modifying '%s' by %s" % (first_book.name(), first_book.artist())) @@ -1551,7 +1547,7 @@ class ITUNES(DriverBase): dev_books = pl.Tracks break - if dev_books.Count: + if dev_books is not None and dev_books.Count: first_book = dev_books.Item(1) #if DEBUG: #self.log.info(" determing manual mode by modifying '%s' by %s" % (first_book.Name, first_book.Artist)) @@ -2526,7 +2522,15 @@ class ITUNES(DriverBase): self.log.info(" processing %s" % fp) if fp.startswith(prefs['library_path']): self.log.info(" '%s' stored in calibre database, not removed" % cached_book['title']) + elif not self.settings().extra_customization[self.USE_ITUNES_STORAGE] and \ + fp.startswith(self.iTunes_local_storage) and \ + os.path.exists(fp): + # Delete the copy in iTunes_local_storage + os.remove(fp) + if DEBUG: + self.log(" removing from iTunes_local_storage") else: + # Delete from iTunes Media folder if os.path.exists(fp): os.remove(fp) if DEBUG: @@ -2544,12 +2548,6 @@ class ITUNES(DriverBase): os.rmdir(author_storage_path) if DEBUG: self.log.info(" removing empty author directory") - ''' - else: - if DEBUG: - self.log.info(" author_storage_path not empty:") - self.log.info(" %s" % '\n'.join(author_files)) - ''' else: self.log.info(" '%s' does not exist at storage location" % cached_book['title']) @@ -2586,7 +2584,15 @@ class ITUNES(DriverBase): self.log.info(" processing %s" % fp) if fp.startswith(prefs['library_path']): self.log.info(" '%s' stored in calibre database, not removed" % cached_book['title']) + elif not self.settings().extra_customization[self.USE_ITUNES_STORAGE] and \ + fp.startswith(self.iTunes_local_storage) and \ + os.path.exists(fp): + # Delete the copy in iTunes_local_storage + os.remove(fp) + if DEBUG: + self.log(" removing from iTunes_local_storage") else: + # Delete from iTunes Media folder if os.path.exists(fp): os.remove(fp) if DEBUG: @@ -3234,6 +3240,17 @@ class ITUNES_ASYNC(ITUNES): if DEBUG: self.log.info(" existing thumb cache at '%s'" % self.archive_path) + # If enabled in config options, create/confirm an iTunes storage folder + if not self.settings().extra_customization[self.USE_ITUNES_STORAGE]: + self.iTunes_local_storage = os.path.join(config_dir,'iTunes storage') + if not os.path.exists(self.iTunes_local_storage): + if DEBUG: + self.log(" creating iTunes_local_storage at '%s'" % self.iTunes_local_storage) + os.mkdir(self.iTunes_local_storage) + else: + if DEBUG: + self.log(" existing iTunes_local_storage at '%s'" % self.iTunes_local_storage) + def sync_booklists(self, booklists, end_session=True): ''' Update metadata on device. diff --git a/src/calibre/devices/iriver/driver.py b/src/calibre/devices/iriver/driver.py index 0ad540f8a3..21b188e031 100644 --- a/src/calibre/devices/iriver/driver.py +++ b/src/calibre/devices/iriver/driver.py @@ -20,11 +20,11 @@ class IRIVER_STORY(USBMS): FORMATS = ['epub', 'fb2', 'pdf', 'djvu', 'txt'] VENDOR_ID = [0x1006] - PRODUCT_ID = [0x4023, 0x4024, 0x4025] - BCD = [0x0323] + PRODUCT_ID = [0x4023, 0x4024, 0x4025, 0x4034] + BCD = [0x0323, 0x0326] VENDOR_NAME = 'IRIVER' - WINDOWS_MAIN_MEM = ['STORY', 'STORY_EB05', 'STORY_WI-FI'] + WINDOWS_MAIN_MEM = ['STORY', 'STORY_EB05', 'STORY_WI-FI', 'STORY_EB07'] WINDOWS_CARD_A_MEM = ['STORY', 'STORY_SD'] #OSX_MAIN_MEM = 'Kindle Internal Storage Media' diff --git a/src/calibre/devices/linux_mount_helper.c b/src/calibre/devices/linux_mount_helper.c index 2ced0f31fa..550510106e 100644 --- a/src/calibre/devices/linux_mount_helper.c +++ b/src/calibre/devices/linux_mount_helper.c @@ -64,14 +64,24 @@ int do_mount(const char *dev, const char *mp) { snprintf(options, 1000, "rw,noexec,nosuid,sync,nodev"); snprintf(uids, 100, "%d", getuid()); snprintf(gids, 100, "%d", getgid()); +#else +#ifdef __FreeBSD__ + snprintf(options, 1000, "rw,noexec,nosuid,sync,-u=%d,-g=%d",getuid(),getgid()); #else snprintf(options, 1000, "rw,noexec,nosuid,sync,nodev,quiet,shortname=mixed,uid=%d,gid=%d,umask=077,fmask=0177,dmask=0077,utf8,iocharset=iso8859-1", getuid(), getgid()); #endif +#endif + ensure_root(); + #ifdef __NetBSD__ execlp("mount_msdos", "mount_msdos", "-u", uids, "-g", gids, "-o", options, dev, mp, NULL); +#else +#ifdef __FreeBSD__ + execlp("mount", "mount", "-t", "msdosfs", "-o", options, dev, mp, NULL); #else execlp("mount", "mount", "-t", "auto", "-o", options, dev, mp, NULL); +#endif #endif errsv = errno; fprintf(stderr, "Failed to mount with error: %s\n", strerror(errsv)); @@ -91,8 +101,12 @@ int call_eject(const char *dev, const char *mp) { ensure_root(); #ifdef __NetBSD__ execlp("eject", "eject", dev, NULL); +#else +#ifdef __FreeBSD__ + execlp("umount", "umount", dev, NULL); #else execlp("eject", "eject", "-s", dev, NULL); +#endif #endif /* execlp failed */ errsv = errno; @@ -121,7 +135,11 @@ int call_umount(const char *dev, const char *mp) { if (pid == 0) { /* Child process */ ensure_root(); +#ifdef __FreeBSD__ + execlp("umount", "umount", mp, NULL); +#else execlp("umount", "umount", "-l", mp, NULL); +#endif /* execlp failed */ errsv = errno; fprintf(stderr, "Failed to umount with error: %s\n", strerror(errsv)); diff --git a/src/calibre/devices/usbms/books.py b/src/calibre/devices/usbms/books.py index 731d3e2b49..4d726e5bde 100644 --- a/src/calibre/devices/usbms/books.py +++ b/src/calibre/devices/usbms/books.py @@ -14,7 +14,7 @@ from calibre.constants import preferred_encoding from calibre import isbytestring, force_unicode from calibre.utils.config import prefs, tweaks from calibre.utils.icu import strcmp -from calibre.utils.formatter import eval_formatter +from calibre.utils.formatter import EvalFormatter class Book(Metadata): def __init__(self, prefix, lpath, size=None, other=None): @@ -116,7 +116,7 @@ class CollectionsBookList(BookList): field_name = field_meta['name'] else: field_name = '' - cat_name = eval_formatter.safe_format( + cat_name = EvalFormatter().safe_format( fmt=tweaks['sony_collection_name_template'], kwargs={'category':field_name, 'value':field_value}, error_value='GET_CATEGORY', book=None) diff --git a/src/calibre/devices/usbms/device.py b/src/calibre/devices/usbms/device.py index 442f3701c4..bdbf5f44cf 100644 --- a/src/calibre/devices/usbms/device.py +++ b/src/calibre/devices/usbms/device.py @@ -17,7 +17,7 @@ from itertools import repeat from calibre.devices.interface import DevicePlugin from calibre.devices.errors import DeviceError, FreeSpaceError from calibre.devices.usbms.deviceconfig import DeviceConfig -from calibre.constants import iswindows, islinux, isosx, plugins +from calibre.constants import iswindows, islinux, isosx, isfreebsd, plugins from calibre.utils.filenames import ascii_filename as sanitize, shorten_components_to if isosx: @@ -701,7 +701,152 @@ class Device(DeviceConfig, DevicePlugin): self._card_a_prefix = self._card_b_prefix self._card_b_prefix = None +# ------------------------------------------------------ +# +# open for FreeBSD +# find the device node or nodes that match the S/N we already have from the scanner +# and attempt to mount each one +# 1. get list of disk devices from sysctl +# 2. compare that list with the one from camcontrol +# 3. and see if it has a matching s/n +# 6. find any partitions/slices associated with each node +# 7. attempt to mount, using calibre-mount-helper, each one +# 8. when finished, we have a list of mount points and associated device nodes +# + def open_freebsd(self): + # this gives us access to the S/N, etc. of the reader that the scanner has found + # and the match routines for some of that data, like s/n, vendor ID, etc. + d=self.detected_device + + if not d.serial: + raise DeviceError("Device has no S/N. Can't continue") + return False + + devs={} + di=0 + ndevs=4 # number of possible devices per reader (main, carda, cardb, launcher) + + #get list of disk devices + p=subprocess.Popen(["sysctl", "kern.disks"], stdout=subprocess.PIPE) + kdsks=subprocess.Popen(["sed", "s/kern.disks: //"], stdin=p.stdout, stdout=subprocess.PIPE).communicate()[0] + p.stdout.close() + #print kdsks + for dvc in kdsks.split(): + # for each one that's also in the list of cam devices ... + p=subprocess.Popen(["camcontrol", "devlist"], stdout=subprocess.PIPE) + devmatch=subprocess.Popen(["grep", dvc], stdin=p.stdout, stdout=subprocess.PIPE).communicate()[0] + p.stdout.close() + if devmatch: + #print "Checking ", devmatch + # ... see if we can get a S/N from the actual device node + sn=subprocess.Popen(["camcontrol", "inquiry", dvc, "-S"], stdout=subprocess.PIPE).communicate()[0] + sn=sn[0:-1] # drop the trailing newline + #print "S/N = ", sn + if sn and d.match_serial(sn): + # we have a matching s/n, record this device node + #print "match found: ", dvc + devs[di]=dvc + di += 1 + + # sort the list of devices + for i in range(1,ndevs+1): + for j in reversed(range(1,i)): + if devs[j-1] > devs[j]: + x=devs[j-1] + devs[j-1]=devs[j] + devs[j]=x + #print devs + + # now we need to see if any of these have slices/partitions + mtd=0 + label="READER" # could use something more unique, like S/N or productID... + cmd = '/usr/local/bin/calibre-mount-helper' + cmd = [cmd, 'mount'] + for i in range(0,ndevs): + cmd2="ls /dev/"+devs[i]+"*" + p=subprocess.Popen(cmd2, shell=True, stdout=subprocess.PIPE) + devs[i]=subprocess.Popen(["cut", "-d", "/", "-f" "3"], stdin=p.stdout, stdout=subprocess.PIPE).communicate()[0] + p.stdout.close() + + # try all the nodes to see what we can mount + for dev in devs[i].split(): + mp='/media/'+label+'-'+dev + #print "trying ", dev, "on", mp + try: + p = subprocess.Popen(cmd + ["/dev/"+dev, mp]) + except OSError: + raise DeviceError(_('Could not find mount helper: %s.')%cmd[0]) + while p.poll() is None: + time.sleep(0.1) + + if p.returncode == 0: + #print " mounted", dev + if i == 0: + self._main_prefix = mp + self._main_dev = "/dev/"+dev + #print "main = ", self._main_dev, self._main_prefix + if i == 1: + self._card_a_prefix = mp + self._card_a_dev = "/dev/"+dev + #print "card a = ", self._card_a_dev, self._card_a_prefix + if i == 2: + self._card_b_prefix = mp + self._card_b_dev = "/dev/"+dev + #print "card b = ", self._card_b_dev, self._card_b_prefix + + mtd += 1 + break + + if mtd > 0: + return True + else : + return False +# +# ------------------------------------------------------ +# +# this one is pretty simple: +# just umount each of the previously +# mounted filesystems, using the mount helper +# + def eject_freebsd(self): + cmd = '/usr/local/bin/calibre-mount-helper' + cmd = [cmd, 'eject'] + + if self._main_prefix: + #print "umount main:", cmd, self._main_dev, self._main_prefix + try: + p = subprocess.Popen(cmd + [self._main_dev, self._main_prefix]) + except OSError: + raise DeviceError( + _('Could not find mount helper: %s.')%cmd[0]) + while p.poll() is None: + time.sleep(0.1) + + if self._card_a_prefix: + #print "umount card a:", cmd, self._card_a_dev, self._card_a_prefix + try: + p = subprocess.Popen(cmd + [self._card_a_dev, self._card_a_prefix]) + except OSError: + raise DeviceError( + _('Could not find mount helper: %s.')%cmd[0]) + while p.poll() is None: + time.sleep(0.1) + + if self._card_b_prefix: + #print "umount card b:", cmd, self._card_b_dev, self._card_b_prefix + try: + p = subprocess.Popen(cmd + [self._card_b_dev, self._card_b_prefix]) + except OSError: + raise DeviceError( + _('Could not find mount helper: %s.')%cmd[0]) + while p.poll() is None: + time.sleep(0.1) + + self._main_prefix = None + self._card_a_prefix = None + self._card_b_prefix = None +# ------------------------------------------------------ def open(self, library_uuid): time.sleep(5) @@ -712,6 +857,14 @@ class Device(DeviceConfig, DevicePlugin): except DeviceError: time.sleep(7) self.open_linux() + if isfreebsd: + self._main_dev = self._card_a_dev = self._card_b_dev = None + try: + self.open_freebsd() + except DeviceError: + subprocess.Popen(["camcontrol", "rescan", "all"]) + time.sleep(2) + self.open_freebsd() if iswindows: try: self.open_windows() @@ -800,6 +953,11 @@ class Device(DeviceConfig, DevicePlugin): self.eject_linux() except: pass + if isfreebsd: + try: + self.eject_freebsd() + except: + pass if iswindows: try: self.eject_windows() diff --git a/src/calibre/ebooks/compression/palmdoc.c b/src/calibre/ebooks/compression/palmdoc.c index 6b07bb9cd5..922b63fe1b 100644 --- a/src/calibre/ebooks/compression/palmdoc.c +++ b/src/calibre/ebooks/compression/palmdoc.c @@ -54,7 +54,7 @@ cpalmdoc_decompress(PyObject *self, PyObject *args) { // Map chars to bytes for (j = 0; j < input_len; j++) input[j] = (_input[j] < 0) ? _input[j]+256 : _input[j]; - output = (char *)PyMem_Malloc(sizeof(char)*(MAX(BUFFER, 5*input_len))); + output = (char *)PyMem_Malloc(sizeof(char)*(MAX(BUFFER, 8*input_len))); if (output == NULL) return PyErr_NoMemory(); while (i < input_len) { diff --git a/src/calibre/ebooks/metadata/book/__init__.py b/src/calibre/ebooks/metadata/book/__init__.py index fae858aabd..50e7b916ee 100644 --- a/src/calibre/ebooks/metadata/book/__init__.py +++ b/src/calibre/ebooks/metadata/book/__init__.py @@ -86,6 +86,8 @@ CALIBRE_METADATA_FIELDS = frozenset([ # a dict of user category names, where the value is a list of item names # from the book that are in that category 'user_categories', + # a dict of author to an associated hyperlink + 'author_link_map', ] ) diff --git a/src/calibre/ebooks/metadata/book/base.py b/src/calibre/ebooks/metadata/book/base.py index 382cb6c5a2..3be37d922e 100644 --- a/src/calibre/ebooks/metadata/book/base.py +++ b/src/calibre/ebooks/metadata/book/base.py @@ -34,6 +34,7 @@ NULL_VALUES = { 'authors' : [_('Unknown')], 'title' : _('Unknown'), 'user_categories' : {}, + 'author_link_map' : {}, 'language' : 'und' } @@ -70,6 +71,7 @@ class SafeFormat(TemplateFormatter): return '' return v +# DEPRECATED. This is not thread safe. Do not use. composite_formatter = SafeFormat() class Metadata(object): @@ -110,6 +112,7 @@ class Metadata(object): # List of strings or [] self.author = list(authors) if authors else []# Needed for backward compatibility self.authors = list(authors) if authors else [] + self.formatter = SafeFormat() def is_null(self, field): ''' @@ -146,7 +149,7 @@ class Metadata(object): return val if val is None: d['#value#'] = 'RECURSIVE_COMPOSITE FIELD (Metadata) ' + field - val = d['#value#'] = composite_formatter.safe_format( + val = d['#value#'] = self.formatter.safe_format( d['display']['composite_template'], self, _('TEMPLATE ERROR'), @@ -423,11 +426,12 @@ class Metadata(object): ''' if not ops: return + formatter = SafeFormat() for op in ops: try: src = op[0] dest = op[1] - val = composite_formatter.safe_format\ + val = formatter.safe_format\ (src, other, 'PLUGBOARD TEMPLATE ERROR', other) if dest == 'tags': self.set(dest, [f.strip() for f in val.split(',') if f.strip()]) diff --git a/src/calibre/ebooks/metadata/opf2.py b/src/calibre/ebooks/metadata/opf2.py index 80fb84633b..186821b0c3 100644 --- a/src/calibre/ebooks/metadata/opf2.py +++ b/src/calibre/ebooks/metadata/opf2.py @@ -474,7 +474,7 @@ def serialize_user_metadata(metadata_elem, all_user_metadata, tail='\n'+(' '*8)) metadata_elem.append(meta) -def dump_user_categories(cats): +def dump_dict(cats): if not cats: cats = {} from calibre.ebooks.metadata.book.json_codec import object_to_unicode @@ -537,8 +537,9 @@ class OPF(object): # {{{ formatter=parse_date, renderer=isoformat) user_categories = MetadataField('user_categories', is_dc=False, formatter=json.loads, - renderer=dump_user_categories) - + renderer=dump_dict) + author_link_map = MetadataField('author_link_map', is_dc=False, + formatter=json.loads, renderer=dump_dict) def __init__(self, stream, basedir=os.getcwdu(), unquote_urls=True, populate_spine=True): @@ -1039,7 +1040,7 @@ class OPF(object): # {{{ for attr in ('title', 'authors', 'author_sort', 'title_sort', 'publisher', 'series', 'series_index', 'rating', 'isbn', 'tags', 'category', 'comments', - 'pubdate', 'user_categories'): + 'pubdate', 'user_categories', 'author_link_map'): val = getattr(mi, attr, None) if val is not None and val != [] and val != (None, None): setattr(self, attr, val) @@ -1336,6 +1337,8 @@ def metadata_to_opf(mi, as_string=True): for tag in mi.tags: factory(DC('subject'), tag) meta = lambda n, c: factory('meta', name='calibre:'+n, content=c) + if getattr(mi, 'author_link_map', None) is not None: + meta('author_link_map', dump_dict(mi.author_link_map)) if mi.series: meta('series', mi.series) if mi.series_index is not None: @@ -1349,7 +1352,7 @@ def metadata_to_opf(mi, as_string=True): if mi.title_sort: meta('title_sort', mi.title_sort) if mi.user_categories: - meta('user_categories', dump_user_categories(mi.user_categories)) + meta('user_categories', dump_dict(mi.user_categories)) serialize_user_metadata(metadata, mi.get_all_user_metadata(False)) diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py index 46505de4bd..1173b84266 100644 --- a/src/calibre/ebooks/mobi/reader.py +++ b/src/calibre/ebooks/mobi/reader.py @@ -957,7 +957,10 @@ def get_metadata(stream): return get_metadata(stream) from calibre.utils.logging import Log log = Log() - mi = MetaInformation(os.path.basename(stream.name), [_('Unknown')]) + try: + mi = MetaInformation(os.path.basename(stream.name), [_('Unknown')]) + except: + mi = MetaInformation(_('Unknown'), [_('Unknown')]) mh = MetadataHeader(stream, log) if mh.title and mh.title != _('Unknown'): mi.title = mh.title diff --git a/src/calibre/gui2/__init__.py b/src/calibre/gui2/__init__.py index 0b21502327..c9550dd01e 100644 --- a/src/calibre/gui2/__init__.py +++ b/src/calibre/gui2/__init__.py @@ -7,12 +7,13 @@ from urllib import unquote from PyQt4.Qt import (QVariant, QFileInfo, QObject, SIGNAL, QBuffer, Qt, QByteArray, QTranslator, QCoreApplication, QThread, QEvent, QTimer, pyqtSignal, QDate, QDesktopServices, - QFileDialog, QFileIconProvider, + QFileDialog, QFileIconProvider, QSettings, QIcon, QApplication, QDialog, QUrl, QFont) ORG_NAME = 'KovidsBrain' APP_UID = 'libprs500' -from calibre.constants import islinux, iswindows, isbsd, isfrozen, isosx +from calibre.constants import (islinux, iswindows, isbsd, isfrozen, isosx, + config_dir) from calibre.utils.config import Config, ConfigProxy, dynamic, JSONConfig from calibre.utils.localization import set_qt_translator from calibre.ebooks.metadata import MetaInformation @@ -82,13 +83,14 @@ gprefs.defaults['tags_browser_partition_method'] = 'first letter' gprefs.defaults['tags_browser_collapse_at'] = 100 gprefs.defaults['edit_metadata_single_layout'] = 'default' gprefs.defaults['book_display_fields'] = [ - ('title', False), ('authors', False), ('formats', True), + ('title', False), ('authors', True), ('formats', True), ('series', True), ('identifiers', True), ('tags', True), ('path', True), ('publisher', False), ('rating', False), ('author_sort', False), ('sort', False), ('timestamp', False), ('uuid', False), ('comments', True), ('id', False), ('pubdate', False), ('last_modified', False), ('size', False), ] +gprefs.defaults['default_author_link'] = 'http://en.wikipedia.org/w/index.php?search={author}' # }}} @@ -192,6 +194,11 @@ def _config(): # {{{ config = _config() # }}} +QSettings.setPath(QSettings.IniFormat, QSettings.UserScope, config_dir) +QSettings.setPath(QSettings.IniFormat, QSettings.SystemScope, + config_dir) +QSettings.setDefaultFormat(QSettings.IniFormat) + # Turn off DeprecationWarnings in windows GUI if iswindows: import warnings diff --git a/src/calibre/gui2/actions/show_quickview.py b/src/calibre/gui2/actions/show_quickview.py index 78352e6da8..4f7bbc0473 100644 --- a/src/calibre/gui2/actions/show_quickview.py +++ b/src/calibre/gui2/actions/show_quickview.py @@ -38,3 +38,6 @@ class ShowQuickviewAction(InterfaceAction): Quickview(self.gui, self.gui.library_view, index) self.current_instance.show() + def library_changed(self, db): + if self.current_instance and not self.current_instance.is_closed: + self.current_instance.set_database(db) diff --git a/src/calibre/gui2/book_details.py b/src/calibre/gui2/book_details.py index f94e179166..5d396e2e96 100644 --- a/src/calibre/gui2/book_details.py +++ b/src/calibre/gui2/book_details.py @@ -5,6 +5,7 @@ __license__ = 'GPL v3' __copyright__ = '2010, Kovid Goyal ' __docformat__ = 'restructuredtext en' +import urllib2 from PyQt4.Qt import (QPixmap, QSize, QWidget, Qt, pyqtSignal, QUrl, QPropertyAnimation, QEasingCurve, QApplication, QFontInfo, @@ -23,6 +24,7 @@ from calibre.library.comments import comments_to_html from calibre.gui2 import (config, open_local_file, open_url, pixmap_to_data, gprefs) from calibre.utils.icu import sort_key +from calibre.utils.formatter import EvalFormatter def render_html(mi, css, vertical, widget, all_fields=False): # {{{ table = render_data(mi, all_fields=all_fields, @@ -121,6 +123,27 @@ def render_data(mi, use_roman_numbers=True, all_fields=False): if links: ans.append((field, u'
%s%s%s%s