Sync to trunk.

This commit is contained in:
John Schember 2011-06-29 07:35:09 -04:00
commit 0c023835ac
50 changed files with 2664 additions and 906 deletions

View File

@ -1,5 +1,5 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
''' '''
www.eluniversal.com www.eluniversal.com
''' '''
@ -15,12 +15,20 @@ class ElUniversal(BasicNewsRecipe):
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
remove_empty_feeds = True
encoding = 'cp1252' encoding = 'cp1252'
publisher = 'El Universal' publisher = 'El Universal'
category = 'news, Caracas, Venezuela, world' category = 'news, Caracas, Venezuela, world'
language = 'es_VE' language = 'es_VE'
publication_type = 'newspaper'
cover_url = strftime('http://static.eluniversal.com/%Y/%m/%d/portada.jpg') cover_url = strftime('http://static.eluniversal.com/%Y/%m/%d/portada.jpg')
extra_css = """
.txt60{font-family: Tahoma,Geneva,sans-serif; font-size: small}
.txt29{font-family: Tahoma,Geneva,sans-serif; font-size: small; color: gray}
.txt38{font-family: Georgia,"Times New Roman",Times,serif; font-size: xx-large}
.txt35{font-family: Georgia,"Times New Roman",Times,serif; font-size: large}
body{font-family: Verdana,Arial,Helvetica,sans-serif}
"""
conversion_options = { conversion_options = {
'comments' : description 'comments' : description
,'tags' : category ,'tags' : category
@ -28,10 +36,11 @@ class ElUniversal(BasicNewsRecipe):
,'publisher' : publisher ,'publisher' : publisher
} }
keep_only_tags = [dict(name='div', attrs={'class':'Nota'})] remove_tags_before=dict(attrs={'class':'header-print MB10'})
remove_tags_after= dict(attrs={'id':'SizeText'})
remove_tags = [ remove_tags = [
dict(name=['object','link','script','iframe']) dict(name=['object','link','script','iframe','meta'])
,dict(name='div',attrs={'class':'Herramientas'}) ,dict(attrs={'class':'header-print MB10'})
] ]
feeds = [ feeds = [

View File

@ -1,32 +1,41 @@
#!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
''' '''
ft.com www.ft.com
''' '''
import datetime
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class FinancialTimes(BasicNewsRecipe): class FinancialTimes_rss(BasicNewsRecipe):
title = u'Financial Times' title = 'Financial Times'
__author__ = 'Darko Miletic and Sujata Raman' __author__ = 'Darko Miletic'
description = ('Financial world news. Available after 5AM ' description = "The Financial Times (FT) is one of the world's leading business news and information organisations, recognised internationally for its authority, integrity and accuracy."
'GMT, daily.') publisher = 'The Financial Times Ltd.'
category = 'news, finances, politics, World'
oldest_article = 2 oldest_article = 2
language = 'en' language = 'en'
max_articles_per_feed = 250
max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
needs_subscription = True needs_subscription = True
simultaneous_downloads= 1 encoding = 'utf8'
delay = 1 publication_type = 'newspaper'
masthead_url = 'http://im.media.ft.com/m/img/masthead_main.jpg'
LOGIN = 'https://registration.ft.com/registration/barrier/login'
INDEX = 'http://www.ft.com'
LOGIN = 'https://registration.ft.com/registration/barrier/login' conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
, 'linearize_tables' : True
}
def get_browser(self): def get_browser(self):
br = BasicNewsRecipe.get_browser() br = BasicNewsRecipe.get_browser()
br.open(self.INDEX)
if self.username is not None and self.password is not None: if self.username is not None and self.password is not None:
br.open(self.LOGIN) br.open(self.LOGIN)
br.select_form(name='loginForm') br.select_form(name='loginForm')
@ -35,31 +44,63 @@ class FinancialTimes(BasicNewsRecipe):
br.submit() br.submit()
return br return br
keep_only_tags = [ dict(name='div', attrs={'id':'cont'}) ] keep_only_tags = [dict(name='div', attrs={'class':['fullstory fullstoryHeader','fullstory fullstoryBody','ft-story-header','ft-story-body','index-detail']})]
remove_tags_after = dict(name='p', attrs={'class':'copyright'})
remove_tags = [ remove_tags = [
dict(name='div', attrs={'id':'floating-con'}) dict(name='div', attrs={'id':'floating-con'})
,dict(name=['meta','iframe','base','object','embed','link'])
,dict(attrs={'class':['storyTools','story-package','screen-copy','story-package separator','expandable-image']})
] ]
remove_attributes = ['width','height','lang']
extra_css = ''' extra_css = """
body{font-family:Arial,Helvetica,sans-serif;} body{font-family: Georgia,Times,"Times New Roman",serif}
h2(font-size:large;} h2{font-size:large}
.ft-story-header(font-size:xx-small;} .ft-story-header{font-size: x-small}
.ft-story-body(font-size:small;}
a{color:#003399;}
.container{font-size:x-small;} .container{font-size:x-small;}
h3{font-size:x-small;color:#003399;} h3{font-size:x-small;color:#003399;}
''' .copyright{font-size: x-small}
img{margin-top: 0.8em; display: block}
.lastUpdated{font-family: Arial,Helvetica,sans-serif; font-size: x-small}
.byline,.ft-story-body,.ft-story-header{font-family: Arial,Helvetica,sans-serif}
"""
feeds = [ feeds = [
(u'UK' , u'http://www.ft.com/rss/home/uk' ) (u'UK' , u'http://www.ft.com/rss/home/uk' )
,(u'US' , u'http://www.ft.com/rss/home/us' ) ,(u'US' , u'http://www.ft.com/rss/home/us' )
,(u'Europe' , u'http://www.ft.com/rss/home/europe' )
,(u'Asia' , u'http://www.ft.com/rss/home/asia' ) ,(u'Asia' , u'http://www.ft.com/rss/home/asia' )
,(u'Middle East', u'http://www.ft.com/rss/home/middleeast') ,(u'Middle East', u'http://www.ft.com/rss/home/middleeast')
] ]
def preprocess_html(self, soup): def preprocess_html(self, soup):
content_type = soup.find('meta', {'http-equiv':'Content-Type'}) items = ['promo-box','promo-title',
if content_type: 'promo-headline','promo-image',
content_type['content'] = 'text/html; charset=utf-8' 'promo-intro','promo-link','subhead']
for item in items:
for it in soup.findAll(item):
it.name = 'div'
it.attrs = []
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('a'):
limg = item.find('img')
if item.string is not None:
str = item.string
item.replaceWith(str)
else:
if limg:
item.name = 'div'
item.attrs = []
else:
str = self.tag_to_string(item)
item.replaceWith(str)
for item in soup.findAll('img'):
if not item.has_key('alt'):
item['alt'] = 'image'
return soup return soup
def get_cover_url(self):
cdate = datetime.date.today()
if cdate.isoweekday() == 7:
cdate -= datetime.timedelta(days=1)
return cdate.strftime('http://specials.ft.com/vtf_pdf/%d%m%y_FRONT1_USA.pdf')

View File

@ -3,6 +3,8 @@ __copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
''' '''
www.ft.com/uk-edition www.ft.com/uk-edition
''' '''
import datetime
from calibre import strftime from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
@ -20,7 +22,6 @@ class FinancialTimes(BasicNewsRecipe):
needs_subscription = True needs_subscription = True
encoding = 'utf8' encoding = 'utf8'
publication_type = 'newspaper' publication_type = 'newspaper'
cover_url = strftime('http://specials.ft.com/vtf_pdf/%d%m%y_FRONT1_LON.pdf')
masthead_url = 'http://im.media.ft.com/m/img/masthead_main.jpg' masthead_url = 'http://im.media.ft.com/m/img/masthead_main.jpg'
LOGIN = 'https://registration.ft.com/registration/barrier/login' LOGIN = 'https://registration.ft.com/registration/barrier/login'
INDEX = 'http://www.ft.com/uk-edition' INDEX = 'http://www.ft.com/uk-edition'
@ -128,3 +129,10 @@ class FinancialTimes(BasicNewsRecipe):
if not item.has_key('alt'): if not item.has_key('alt'):
item['alt'] = 'image' item['alt'] = 'image'
return soup return soup
def get_cover_url(self):
cdate = datetime.date.today()
if cdate.isoweekday() == 7:
cdate -= datetime.timedelta(days=1)
return cdate.strftime('http://specials.ft.com/vtf_pdf/%d%m%y_FRONT1_LON.pdf')

View File

@ -1,5 +1,6 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
import re import re
from datetime import date, timedelta
class HBR(BasicNewsRecipe): class HBR(BasicNewsRecipe):
@ -12,13 +13,14 @@ class HBR(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
LOGIN_URL = 'http://hbr.org/login?request_url=/' LOGIN_URL = 'http://hbr.org/login?request_url=/'
INDEX = 'http://hbr.org/current' INDEX = 'http://hbr.org/archive-toc/BR'
keep_only_tags = [dict(name='div', id='pageContainer')] keep_only_tags = [dict(name='div', id='pageContainer')]
remove_tags = [dict(id=['mastheadContainer', 'magazineHeadline', remove_tags = [dict(id=['mastheadContainer', 'magazineHeadline',
'articleToolbarTopRD', 'pageRightSubColumn', 'pageRightColumn', 'articleToolbarTopRD', 'pageRightSubColumn', 'pageRightColumn',
'todayOnHBRListWidget', 'mostWidget', 'keepUpWithHBR', 'todayOnHBRListWidget', 'mostWidget', 'keepUpWithHBR',
'mailingListTout', 'partnerCenter', 'pageFooter', 'mailingListTout', 'partnerCenter', 'pageFooter',
'superNavHeadContainer', 'hbrDisqus',
'articleToolbarTop', 'articleToolbarBottom', 'articleToolbarRD']), 'articleToolbarTop', 'articleToolbarBottom', 'articleToolbarRD']),
dict(name='iframe')] dict(name='iframe')]
extra_css = ''' extra_css = '''
@ -55,9 +57,14 @@ class HBR(BasicNewsRecipe):
def hbr_get_toc(self): def hbr_get_toc(self):
soup = self.index_to_soup(self.INDEX) today = date.today()
url = soup.find('a', text=lambda t:'Full Table of Contents' in t).parent.get('href') future = today + timedelta(days=30)
return self.index_to_soup('http://hbr.org'+url) for x in [x.strftime('%y%m') for x in (future, today)]:
url = self.INDEX + x
soup = self.index_to_soup(url)
if not soup.find(text='Issue Not Found'):
return soup
raise Exception('Could not find current issue')
def hbr_parse_section(self, container, feeds): def hbr_parse_section(self, container, feeds):
current_section = None current_section = None

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.4 KiB

View File

@ -1,17 +1,23 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2010-2011, Eddie Lau' __copyright__ = '2010-2011, Eddie Lau'
# Region - Hong Kong, Vancouver, Toronto
__Region__ = 'Hong Kong'
# Users of Kindle 3 with limited system-level CJK support # Users of Kindle 3 with limited system-level CJK support
# please replace the following "True" with "False". # please replace the following "True" with "False".
__MakePeriodical__ = True __MakePeriodical__ = True
# Turn below to true if your device supports display of CJK titles # Turn below to true if your device supports display of CJK titles
__UseChineseTitle__ = False __UseChineseTitle__ = False
# Trun below to true if you wish to use life.mingpao.com as the main article source # Set it to False if you want to skip images
__KeepImages__ = True
# (HK only) Turn below to true if you wish to use life.mingpao.com as the main article source
__UseLife__ = True __UseLife__ = True
''' '''
Change Log: Change Log:
2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
provide options to remove all images in the file
2011/05/12: switch the main parse source to life.mingpao.com, which has more photos on the article pages 2011/05/12: switch the main parse source to life.mingpao.com, which has more photos on the article pages
2011/03/06: add new articles for finance section, also a new section "Columns" 2011/03/06: add new articles for finance section, also a new section "Columns"
2011/02/28: rearrange the sections 2011/02/28: rearrange the sections
@ -34,21 +40,96 @@ Change Log:
import os, datetime, re import os, datetime, re
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
from contextlib import nested from contextlib import nested
from calibre.ebooks.BeautifulSoup import BeautifulSoup from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.metadata.toc import TOC from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata import MetaInformation
class MPHKRecipe(BasicNewsRecipe): # MAIN CLASS
title = 'Ming Pao - Hong Kong' class MPRecipe(BasicNewsRecipe):
if __Region__ == 'Hong Kong':
title = 'Ming Pao - Hong Kong'
description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
category = 'Chinese, News, Hong Kong'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}'
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
keep_only_tags = [dict(name='h1'),
dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
dict(name='font', attrs={'color':['AA0000']}), # for column articles title
dict(attrs={'id':['newscontent']}), # entertainment and column page content
dict(attrs={'id':['newscontent01','newscontent02']}),
dict(attrs={'class':['photo']}),
dict(name='table', attrs={'width':['100%'], 'border':['0'], 'cellspacing':['5'], 'cellpadding':['0']}), # content in printed version of life.mingpao.com
dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}) # images for source from life.mingpao.com
]
if __KeepImages__:
remove_tags = [dict(name='style'),
dict(attrs={'id':['newscontent135']}), # for the finance page from mpfinance.com
dict(name='font', attrs={'size':['2'], 'color':['666666']}), # article date in life.mingpao.com article
#dict(name='table') # for content fetched from life.mingpao.com
]
else:
remove_tags = [dict(name='style'),
dict(attrs={'id':['newscontent135']}), # for the finance page from mpfinance.com
dict(name='font', attrs={'size':['2'], 'color':['666666']}), # article date in life.mingpao.com article
dict(name='img'),
#dict(name='table') # for content fetched from life.mingpao.com
]
remove_attributes = ['width']
preprocess_regexps = [
(re.compile(r'<h5>', re.DOTALL|re.IGNORECASE),
lambda match: '<h1>'),
(re.compile(r'</h5>', re.DOTALL|re.IGNORECASE),
lambda match: '</h1>'),
(re.compile(r'<p><a href=.+?</a></p>', re.DOTALL|re.IGNORECASE), # for entertainment page
lambda match: ''),
# skip <br> after title in life.mingpao.com fetched article
(re.compile(r"<div id='newscontent'><br>", re.DOTALL|re.IGNORECASE),
lambda match: "<div id='newscontent'>"),
(re.compile(r"<br><br></b>", re.DOTALL|re.IGNORECASE),
lambda match: "</b>")
]
elif __Region__ == 'Vancouver':
title = 'Ming Pao - Vancouver'
description = 'Vancouver Chinese Newspaper (http://www.mingpaovan.com)'
category = 'Chinese, News, Vancouver'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
masthead_url = 'http://www.mingpaovan.com/image/mainlogo2_VAN2.gif'
keep_only_tags = [dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['1']}),
dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['3'], 'cellpadding':['3'], 'id':['tblContent3']}),
dict(name='table', attrs={'width':['180'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['0'], 'bgcolor':['F0F0F0']}),
]
if __KeepImages__:
remove_tags = [dict(name='img', attrs={'src':['../../../image/magnifier.gif']})] # the magnifier icon
else:
remove_tags = [dict(name='img')]
remove_attributes = ['width']
preprocess_regexps = [(re.compile(r'&nbsp;', re.DOTALL|re.IGNORECASE),
lambda match: ''),
]
elif __Region__ == 'Toronto':
title = 'Ming Pao - Toronto'
description = 'Toronto Chinese Newspaper (http://www.mingpaotor.com)'
category = 'Chinese, News, Toronto'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
masthead_url = 'http://www.mingpaotor.com/image/mainlogo2_TOR2.gif'
keep_only_tags = [dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['1']}),
dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['3'], 'cellpadding':['3'], 'id':['tblContent3']}),
dict(name='table', attrs={'width':['180'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['0'], 'bgcolor':['F0F0F0']}),
]
if __KeepImages__:
remove_tags = [dict(name='img', attrs={'src':['../../../image/magnifier.gif']})] # the magnifier icon
else:
remove_tags = [dict(name='img')]
remove_attributes = ['width']
preprocess_regexps = [(re.compile(r'&nbsp;', re.DOTALL|re.IGNORECASE),
lambda match: ''),
]
oldest_article = 1 oldest_article = 1
max_articles_per_feed = 100 max_articles_per_feed = 100
__author__ = 'Eddie Lau' __author__ = 'Eddie Lau'
description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
publisher = 'MingPao' publisher = 'MingPao'
category = 'Chinese, News, Hong Kong'
remove_javascript = True remove_javascript = True
use_embedded_content = False use_embedded_content = False
no_stylesheets = True no_stylesheets = True
@ -57,33 +138,6 @@ class MPHKRecipe(BasicNewsRecipe):
recursions = 0 recursions = 0
conversion_options = {'linearize_tables':True} conversion_options = {'linearize_tables':True}
timefmt = '' timefmt = ''
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}'
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
keep_only_tags = [dict(name='h1'),
dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
dict(name='font', attrs={'color':['AA0000']}), # for column articles title
dict(attrs={'id':['newscontent']}), # entertainment and column page content
dict(attrs={'id':['newscontent01','newscontent02']}),
dict(attrs={'class':['photo']}),
dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}) # images for source from life.mingpao.com
]
remove_tags = [dict(name='style'),
dict(attrs={'id':['newscontent135']}), # for the finance page from mpfinance.com
dict(name='table')] # for content fetched from life.mingpao.com
remove_attributes = ['width']
preprocess_regexps = [
(re.compile(r'<h5>', re.DOTALL|re.IGNORECASE),
lambda match: '<h1>'),
(re.compile(r'</h5>', re.DOTALL|re.IGNORECASE),
lambda match: '</h1>'),
(re.compile(r'<p><a href=.+?</a></p>', re.DOTALL|re.IGNORECASE), # for entertainment page
lambda match: ''),
# skip <br> after title in life.mingpao.com fetched article
(re.compile(r"<div id='newscontent'><br>", re.DOTALL|re.IGNORECASE),
lambda match: "<div id='newscontent'>"),
(re.compile(r"<br><br></b>", re.DOTALL|re.IGNORECASE),
lambda match: "</b>")
]
def image_url_processor(cls, baseurl, url): def image_url_processor(cls, baseurl, url):
# trick: break the url at the first occurance of digit, add an additional # trick: break the url at the first occurance of digit, add an additional
@ -124,8 +178,18 @@ class MPHKRecipe(BasicNewsRecipe):
def get_dtlocal(self): def get_dtlocal(self):
dt_utc = datetime.datetime.utcnow() dt_utc = datetime.datetime.utcnow()
# convert UTC to local hk time - at around HKT 6.00am, all news are available if __Region__ == 'Hong Kong':
dt_local = dt_utc - datetime.timedelta(-2.0/24) # convert UTC to local hk time - at HKT 5.30am, all news are available
dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(5.5/24)
# dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(5.5/24)
elif __Region__ == 'Vancouver':
# convert UTC to local Vancouver time - at PST time 5.30am, all news are available
dt_local = dt_utc + datetime.timedelta(-8.0/24) - datetime.timedelta(5.5/24)
#dt_local = dt_utc.astimezone(pytz.timezone('America/Vancouver')) - datetime.timedelta(5.5/24)
elif __Region__ == 'Toronto':
# convert UTC to local Toronto time - at EST time 8.30am, all news are available
dt_local = dt_utc + datetime.timedelta(-5.0/24) - datetime.timedelta(8.5/24)
#dt_local = dt_utc.astimezone(pytz.timezone('America/Toronto')) - datetime.timedelta(8.5/24)
return dt_local return dt_local
def get_fetchdate(self): def get_fetchdate(self):
@ -135,13 +199,15 @@ class MPHKRecipe(BasicNewsRecipe):
return self.get_dtlocal().strftime("%Y-%m-%d") return self.get_dtlocal().strftime("%Y-%m-%d")
def get_fetchday(self): def get_fetchday(self):
# dt_utc = datetime.datetime.utcnow()
# convert UTC to local hk time - at around HKT 6.00am, all news are available
# dt_local = dt_utc - datetime.timedelta(-2.0/24)
return self.get_dtlocal().strftime("%d") return self.get_dtlocal().strftime("%d")
def get_cover_url(self): def get_cover_url(self):
cover = 'http://news.mingpao.com/' + self.get_fetchdate() + '/' + self.get_fetchdate() + '_' + self.get_fetchday() + 'gacov.jpg' if __Region__ == 'Hong Kong':
cover = 'http://news.mingpao.com/' + self.get_fetchdate() + '/' + self.get_fetchdate() + '_' + self.get_fetchday() + 'gacov.jpg'
elif __Region__ == 'Vancouver':
cover = 'http://www.mingpaovan.com/ftp/News/' + self.get_fetchdate() + '/' + self.get_fetchday() + 'pgva1s.jpg'
elif __Region__ == 'Toronto':
cover = 'http://www.mingpaotor.com/ftp/News/' + self.get_fetchdate() + '/' + self.get_fetchday() + 'pgtas.jpg'
br = BasicNewsRecipe.get_browser() br = BasicNewsRecipe.get_browser()
try: try:
br.open(cover) br.open(cover)
@ -153,76 +219,104 @@ class MPHKRecipe(BasicNewsRecipe):
feeds = [] feeds = []
dateStr = self.get_fetchdate() dateStr = self.get_fetchdate()
if __UseLife__: if __Region__ == 'Hong Kong':
for title, url, keystr in [(u'\u8981\u805e Headline', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalga', 'nal'), if __UseLife__:
(u'\u6e2f\u805e Local', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgb', 'nal'), for title, url, keystr in [(u'\u8981\u805e Headline', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalga', 'nal'),
(u'\u6559\u80b2 Education', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgf', 'nal'), (u'\u6e2f\u805e Local', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgb', 'nal'),
(u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr', 'nal'), (u'\u6559\u80b2 Education', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgf', 'nal'),
(u'\u8ad6\u58c7 Forum', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalfa', 'nal'), (u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr', 'nal'),
(u'\u4e2d\u570b China', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalca', 'nal'), (u'\u8ad6\u58c7 Forum', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalfa', 'nal'),
(u'\u570b\u969b World', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalta', 'nal'), (u'\u4e2d\u570b China', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalca', 'nal'),
(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal'), (u'\u570b\u969b World', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalta', 'nal'),
(u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'), (u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal'),
(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal'), (u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'),
(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')]: (u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal'),
articles = self.parse_section2(url, keystr) (u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')]:
articles = self.parse_section2(url, keystr)
if articles:
feeds.append((title, articles))
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
else:
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
(u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
# special- editorial
ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
if ed_articles:
feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
(u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
(u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
# special - finance
#fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
if fin_articles:
feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
(u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
# special - entertainment
ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
if ent_articles:
feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
# special- columns
col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
if col_articles:
feeds.append((u'\u5c08\u6b04 Columns', col_articles))
elif __Region__ == 'Vancouver':
for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VAindex.htm'),
(u'\u52a0\u570b Canada', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VBindex.htm'),
(u'\u793e\u5340 Local', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VDindex.htm'),
(u'\u6e2f\u805e Hong Kong', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/HK-VGindex.htm'),
(u'\u570b\u969b World', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VTindex.htm'),
(u'\u4e2d\u570b China', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VCindex.htm'),
(u'\u7d93\u6fdf Economics', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VEindex.htm'),
(u'\u9ad4\u80b2 Sports', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VSindex.htm'),
(u'\u5f71\u8996 Film/TV', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/HK-MAindex.htm'),
(u'\u526f\u520a Supplements', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/WWindex.htm'),]:
articles = self.parse_section3(url, 'http://www.mingpaovan.com/')
if articles: if articles:
feeds.append((title, articles)) feeds.append((title, articles))
elif __Region__ == 'Toronto':
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'), for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TAindex.htm'),
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]: (u'\u52a0\u570b Canada', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TDindex.htm'),
articles = self.parse_section(url) (u'\u793e\u5340 Local', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TFindex.htm'),
(u'\u4e2d\u570b China', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TCAindex.htm'),
(u'\u570b\u969b World', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TTAindex.htm'),
(u'\u6e2f\u805e Hong Kong', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/HK-GAindex.htm'),
(u'\u7d93\u6fdf Economics', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/THindex.htm'),
(u'\u9ad4\u80b2 Sports', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TSindex.htm'),
(u'\u5f71\u8996 Film/TV', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/HK-MAindex.htm'),
(u'\u526f\u520a Supplements', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/WWindex.htm'),]:
articles = self.parse_section3(url, 'http://www.mingpaotor.com/')
if articles: if articles:
feeds.append((title, articles)) feeds.append((title, articles))
else:
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
(u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
# special- editorial
ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
if ed_articles:
feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
(u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
(u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
# special - finance
#fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
if fin_articles:
feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
(u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
# special - entertainment
ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
if ent_articles:
feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
# special- columns
col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
if col_articles:
feeds.append((u'\u5c08\u6b04 Columns', col_articles))
return feeds return feeds
# parse from news.mingpao.com # parse from news.mingpao.com
@ -256,11 +350,30 @@ class MPHKRecipe(BasicNewsRecipe):
title = self.tag_to_string(i) title = self.tag_to_string(i)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False) url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1): if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article
current_articles.append({'title': title, 'url': url, 'description': ''}) current_articles.append({'title': title, 'url': url, 'description': ''})
included_urls.append(url) included_urls.append(url)
current_articles.reverse() current_articles.reverse()
return current_articles return current_articles
# parse from www.mingpaovan.com
def parse_section3(self, url, baseUrl):
self.get_fetchdate()
soup = self.index_to_soup(url)
divs = soup.findAll(attrs={'class': ['ListContentLargeLink']})
current_articles = []
included_urls = []
divs.reverse()
for i in divs:
title = self.tag_to_string(i)
urlstr = i.get('href', False)
urlstr = baseUrl + '/' + urlstr.replace('../../../', '')
if urlstr not in included_urls:
current_articles.append({'title': title, 'url': urlstr, 'description': '', 'date': ''})
included_urls.append(urlstr)
current_articles.reverse()
return current_articles
def parse_ed_section(self, url): def parse_ed_section(self, url):
self.get_fetchdate() self.get_fetchdate()
soup = self.index_to_soup(url) soup = self.index_to_soup(url)
@ -338,7 +451,12 @@ class MPHKRecipe(BasicNewsRecipe):
if dir is None: if dir is None:
dir = self.output_dir dir = self.output_dir
if __UseChineseTitle__ == True: if __UseChineseTitle__ == True:
title = u'\u660e\u5831 (\u9999\u6e2f)' if __Region__ == 'Hong Kong':
title = u'\u660e\u5831 (\u9999\u6e2f)'
elif __Region__ == 'Vancouver':
title = u'\u660e\u5831 (\u6eab\u54e5\u83ef)'
elif __Region__ == 'Toronto':
title = u'\u660e\u5831 (\u591a\u502b\u591a)'
else: else:
title = self.short_title() title = self.short_title()
# if not generating a periodical, force date to apply in title # if not generating a periodical, force date to apply in title

View File

@ -0,0 +1,594 @@
__license__ = 'GPL v3'
__copyright__ = '2010-2011, Eddie Lau'
# Region - Hong Kong, Vancouver, Toronto
__Region__ = 'Toronto'
# Users of Kindle 3 with limited system-level CJK support
# please replace the following "True" with "False".
__MakePeriodical__ = True
# Turn below to true if your device supports display of CJK titles
__UseChineseTitle__ = False
# Set it to False if you want to skip images
__KeepImages__ = True
# (HK only) Turn below to true if you wish to use life.mingpao.com as the main article source
__UseLife__ = True
'''
Change Log:
2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
provide options to remove all images in the file
2011/05/12: switch the main parse source to life.mingpao.com, which has more photos on the article pages
2011/03/06: add new articles for finance section, also a new section "Columns"
2011/02/28: rearrange the sections
[Disabled until Kindle has better CJK support and can remember last (section,article) read in Sections & Articles
View] make it the same title if generating a periodical, so past issue will be automatically put into "Past Issues"
folder in Kindle 3
2011/02/20: skip duplicated links in finance section, put photos which may extend a whole page to the back of the articles
clean up the indentation
2010/12/07: add entertainment section, use newspaper front page as ebook cover, suppress date display in section list
(to avoid wrong date display in case the user generates the ebook in a time zone different from HKT)
2010/11/22: add English section, remove eco-news section which is not updated daily, correct
ordering of articles
2010/11/12: add news image and eco-news section
2010/11/08: add parsing of finance section
2010/11/06: temporary work-around for Kindle device having no capability to display unicode
in section/article list.
2010/10/31: skip repeated articles in section pages
'''
import os, datetime, re
from calibre.web.feeds.recipes import BasicNewsRecipe
from contextlib import nested
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.metadata import MetaInformation
# MAIN CLASS
class MPRecipe(BasicNewsRecipe):
if __Region__ == 'Hong Kong':
title = 'Ming Pao - Hong Kong'
description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
category = 'Chinese, News, Hong Kong'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}'
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
keep_only_tags = [dict(name='h1'),
dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
dict(name='font', attrs={'color':['AA0000']}), # for column articles title
dict(attrs={'id':['newscontent']}), # entertainment and column page content
dict(attrs={'id':['newscontent01','newscontent02']}),
dict(attrs={'class':['photo']}),
dict(name='table', attrs={'width':['100%'], 'border':['0'], 'cellspacing':['5'], 'cellpadding':['0']}), # content in printed version of life.mingpao.com
dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}) # images for source from life.mingpao.com
]
if __KeepImages__:
remove_tags = [dict(name='style'),
dict(attrs={'id':['newscontent135']}), # for the finance page from mpfinance.com
dict(name='font', attrs={'size':['2'], 'color':['666666']}), # article date in life.mingpao.com article
#dict(name='table') # for content fetched from life.mingpao.com
]
else:
remove_tags = [dict(name='style'),
dict(attrs={'id':['newscontent135']}), # for the finance page from mpfinance.com
dict(name='font', attrs={'size':['2'], 'color':['666666']}), # article date in life.mingpao.com article
dict(name='img'),
#dict(name='table') # for content fetched from life.mingpao.com
]
remove_attributes = ['width']
preprocess_regexps = [
(re.compile(r'<h5>', re.DOTALL|re.IGNORECASE),
lambda match: '<h1>'),
(re.compile(r'</h5>', re.DOTALL|re.IGNORECASE),
lambda match: '</h1>'),
(re.compile(r'<p><a href=.+?</a></p>', re.DOTALL|re.IGNORECASE), # for entertainment page
lambda match: ''),
# skip <br> after title in life.mingpao.com fetched article
(re.compile(r"<div id='newscontent'><br>", re.DOTALL|re.IGNORECASE),
lambda match: "<div id='newscontent'>"),
(re.compile(r"<br><br></b>", re.DOTALL|re.IGNORECASE),
lambda match: "</b>")
]
elif __Region__ == 'Vancouver':
title = 'Ming Pao - Vancouver'
description = 'Vancouver Chinese Newspaper (http://www.mingpaovan.com)'
category = 'Chinese, News, Vancouver'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
masthead_url = 'http://www.mingpaovan.com/image/mainlogo2_VAN2.gif'
keep_only_tags = [dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['1']}),
dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['3'], 'cellpadding':['3'], 'id':['tblContent3']}),
dict(name='table', attrs={'width':['180'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['0'], 'bgcolor':['F0F0F0']}),
]
if __KeepImages__:
remove_tags = [dict(name='img', attrs={'src':['../../../image/magnifier.gif']})] # the magnifier icon
else:
remove_tags = [dict(name='img')]
remove_attributes = ['width']
preprocess_regexps = [(re.compile(r'&nbsp;', re.DOTALL|re.IGNORECASE),
lambda match: ''),
]
elif __Region__ == 'Toronto':
title = 'Ming Pao - Toronto'
description = 'Toronto Chinese Newspaper (http://www.mingpaotor.com)'
category = 'Chinese, News, Toronto'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
masthead_url = 'http://www.mingpaotor.com/image/mainlogo2_TOR2.gif'
keep_only_tags = [dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['1']}),
dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['3'], 'cellpadding':['3'], 'id':['tblContent3']}),
dict(name='table', attrs={'width':['180'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['0'], 'bgcolor':['F0F0F0']}),
]
if __KeepImages__:
remove_tags = [dict(name='img', attrs={'src':['../../../image/magnifier.gif']})] # the magnifier icon
else:
remove_tags = [dict(name='img')]
remove_attributes = ['width']
preprocess_regexps = [(re.compile(r'&nbsp;', re.DOTALL|re.IGNORECASE),
lambda match: ''),
]
oldest_article = 1
max_articles_per_feed = 100
__author__ = 'Eddie Lau'
publisher = 'MingPao'
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
language = 'zh'
encoding = 'Big5-HKSCS'
recursions = 0
conversion_options = {'linearize_tables':True}
timefmt = ''
def image_url_processor(cls, baseurl, url):
# trick: break the url at the first occurance of digit, add an additional
# '_' at the front
# not working, may need to move this to preprocess_html() method
# minIdx = 10000
# i0 = url.find('0')
# if i0 >= 0 and i0 < minIdx:
# minIdx = i0
# i1 = url.find('1')
# if i1 >= 0 and i1 < minIdx:
# minIdx = i1
# i2 = url.find('2')
# if i2 >= 0 and i2 < minIdx:
# minIdx = i2
# i3 = url.find('3')
# if i3 >= 0 and i0 < minIdx:
# minIdx = i3
# i4 = url.find('4')
# if i4 >= 0 and i4 < minIdx:
# minIdx = i4
# i5 = url.find('5')
# if i5 >= 0 and i5 < minIdx:
# minIdx = i5
# i6 = url.find('6')
# if i6 >= 0 and i6 < minIdx:
# minIdx = i6
# i7 = url.find('7')
# if i7 >= 0 and i7 < minIdx:
# minIdx = i7
# i8 = url.find('8')
# if i8 >= 0 and i8 < minIdx:
# minIdx = i8
# i9 = url.find('9')
# if i9 >= 0 and i9 < minIdx:
# minIdx = i9
return url
def get_dtlocal(self):
dt_utc = datetime.datetime.utcnow()
if __Region__ == 'Hong Kong':
# convert UTC to local hk time - at HKT 5.30am, all news are available
dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(5.5/24)
# dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(5.5/24)
elif __Region__ == 'Vancouver':
# convert UTC to local Vancouver time - at PST time 5.30am, all news are available
dt_local = dt_utc + datetime.timedelta(-8.0/24) - datetime.timedelta(5.5/24)
#dt_local = dt_utc.astimezone(pytz.timezone('America/Vancouver')) - datetime.timedelta(5.5/24)
elif __Region__ == 'Toronto':
# convert UTC to local Toronto time - at EST time 8.30am, all news are available
dt_local = dt_utc + datetime.timedelta(-5.0/24) - datetime.timedelta(8.5/24)
#dt_local = dt_utc.astimezone(pytz.timezone('America/Toronto')) - datetime.timedelta(8.5/24)
return dt_local
def get_fetchdate(self):
return self.get_dtlocal().strftime("%Y%m%d")
def get_fetchformatteddate(self):
return self.get_dtlocal().strftime("%Y-%m-%d")
def get_fetchday(self):
return self.get_dtlocal().strftime("%d")
def get_cover_url(self):
if __Region__ == 'Hong Kong':
cover = 'http://news.mingpao.com/' + self.get_fetchdate() + '/' + self.get_fetchdate() + '_' + self.get_fetchday() + 'gacov.jpg'
elif __Region__ == 'Vancouver':
cover = 'http://www.mingpaovan.com/ftp/News/' + self.get_fetchdate() + '/' + self.get_fetchday() + 'pgva1s.jpg'
elif __Region__ == 'Toronto':
cover = 'http://www.mingpaotor.com/ftp/News/' + self.get_fetchdate() + '/' + self.get_fetchday() + 'pgtas.jpg'
br = BasicNewsRecipe.get_browser()
try:
br.open(cover)
except:
cover = None
return cover
def parse_index(self):
feeds = []
dateStr = self.get_fetchdate()
if __Region__ == 'Hong Kong':
if __UseLife__:
for title, url, keystr in [(u'\u8981\u805e Headline', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalga', 'nal'),
(u'\u6e2f\u805e Local', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgb', 'nal'),
(u'\u6559\u80b2 Education', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgf', 'nal'),
(u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr', 'nal'),
(u'\u8ad6\u58c7 Forum', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalfa', 'nal'),
(u'\u4e2d\u570b China', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalca', 'nal'),
(u'\u570b\u969b World', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalta', 'nal'),
(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal'),
(u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'),
(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal'),
(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')]:
articles = self.parse_section2(url, keystr)
if articles:
feeds.append((title, articles))
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
else:
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
(u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
# special- editorial
ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
if ed_articles:
feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
(u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
(u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
# special - finance
#fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
if fin_articles:
feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
(u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
# special - entertainment
ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
if ent_articles:
feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
# special- columns
col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
if col_articles:
feeds.append((u'\u5c08\u6b04 Columns', col_articles))
elif __Region__ == 'Vancouver':
for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VAindex.htm'),
(u'\u52a0\u570b Canada', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VBindex.htm'),
(u'\u793e\u5340 Local', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VDindex.htm'),
(u'\u6e2f\u805e Hong Kong', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/HK-VGindex.htm'),
(u'\u570b\u969b World', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VTindex.htm'),
(u'\u4e2d\u570b China', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VCindex.htm'),
(u'\u7d93\u6fdf Economics', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VEindex.htm'),
(u'\u9ad4\u80b2 Sports', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VSindex.htm'),
(u'\u5f71\u8996 Film/TV', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/HK-MAindex.htm'),
(u'\u526f\u520a Supplements', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/WWindex.htm'),]:
articles = self.parse_section3(url, 'http://www.mingpaovan.com/')
if articles:
feeds.append((title, articles))
elif __Region__ == 'Toronto':
for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TAindex.htm'),
(u'\u52a0\u570b Canada', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TDindex.htm'),
(u'\u793e\u5340 Local', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TFindex.htm'),
(u'\u4e2d\u570b China', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TCAindex.htm'),
(u'\u570b\u969b World', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TTAindex.htm'),
(u'\u6e2f\u805e Hong Kong', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/HK-GAindex.htm'),
(u'\u7d93\u6fdf Economics', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/THindex.htm'),
(u'\u9ad4\u80b2 Sports', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TSindex.htm'),
(u'\u5f71\u8996 Film/TV', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/HK-MAindex.htm'),
(u'\u526f\u520a Supplements', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/WWindex.htm'),]:
articles = self.parse_section3(url, 'http://www.mingpaotor.com/')
if articles:
feeds.append((title, articles))
return feeds
# parse from news.mingpao.com
def parse_section(self, url):
dateStr = self.get_fetchdate()
soup = self.index_to_soup(url)
divs = soup.findAll(attrs={'class': ['bullet','bullet_grey']})
current_articles = []
included_urls = []
divs.reverse()
for i in divs:
a = i.find('a', href = True)
title = self.tag_to_string(a)
url = a.get('href', False)
url = 'http://news.mingpao.com/' + dateStr + '/' +url
if url not in included_urls and url.rfind('Redirect') == -1:
current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
included_urls.append(url)
current_articles.reverse()
return current_articles
# parse from life.mingpao.com
def parse_section2(self, url, keystr):
self.get_fetchdate()
soup = self.index_to_soup(url)
a = soup.findAll('a', href=True)
a.reverse()
current_articles = []
included_urls = []
for i in a:
title = self.tag_to_string(i)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article
current_articles.append({'title': title, 'url': url, 'description': ''})
included_urls.append(url)
current_articles.reverse()
return current_articles
# parse from www.mingpaovan.com
def parse_section3(self, url, baseUrl):
self.get_fetchdate()
soup = self.index_to_soup(url)
divs = soup.findAll(attrs={'class': ['ListContentLargeLink']})
current_articles = []
included_urls = []
divs.reverse()
for i in divs:
title = self.tag_to_string(i)
urlstr = i.get('href', False)
urlstr = baseUrl + '/' + urlstr.replace('../../../', '')
if urlstr not in included_urls:
current_articles.append({'title': title, 'url': urlstr, 'description': '', 'date': ''})
included_urls.append(urlstr)
current_articles.reverse()
return current_articles
def parse_ed_section(self, url):
self.get_fetchdate()
soup = self.index_to_soup(url)
a = soup.findAll('a', href=True)
a.reverse()
current_articles = []
included_urls = []
for i in a:
title = self.tag_to_string(i)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('nal') == -1):
current_articles.append({'title': title, 'url': url, 'description': ''})
included_urls.append(url)
current_articles.reverse()
return current_articles
def parse_fin_section(self, url):
self.get_fetchdate()
soup = self.index_to_soup(url)
a = soup.findAll('a', href= True)
current_articles = []
included_urls = []
for i in a:
#url = 'http://www.mpfinance.com/cfm/' + i.get('href', False)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
#if url not in included_urls and not url.rfind(dateStr) == -1 and url.rfind('index') == -1:
if url not in included_urls and (not url.rfind('txt') == -1) and (not url.rfind('nal') == -1):
title = self.tag_to_string(i)
current_articles.append({'title': title, 'url': url, 'description':''})
included_urls.append(url)
return current_articles
def parse_ent_section(self, url):
self.get_fetchdate()
soup = self.index_to_soup(url)
a = soup.findAll('a', href=True)
a.reverse()
current_articles = []
included_urls = []
for i in a:
title = self.tag_to_string(i)
url = 'http://ol.mingpao.com/cfm/' + i.get('href', False)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('star') == -1):
current_articles.append({'title': title, 'url': url, 'description': ''})
included_urls.append(url)
current_articles.reverse()
return current_articles
def parse_col_section(self, url):
self.get_fetchdate()
soup = self.index_to_soup(url)
a = soup.findAll('a', href=True)
a.reverse()
current_articles = []
included_urls = []
for i in a:
title = self.tag_to_string(i)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('ncl') == -1):
current_articles.append({'title': title, 'url': url, 'description': ''})
included_urls.append(url)
current_articles.reverse()
return current_articles
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll(style=True):
del item['width']
for item in soup.findAll(stype=True):
del item['absmiddle']
return soup
def create_opf(self, feeds, dir=None):
if dir is None:
dir = self.output_dir
if __UseChineseTitle__ == True:
if __Region__ == 'Hong Kong':
title = u'\u660e\u5831 (\u9999\u6e2f)'
elif __Region__ == 'Vancouver':
title = u'\u660e\u5831 (\u6eab\u54e5\u83ef)'
elif __Region__ == 'Toronto':
title = u'\u660e\u5831 (\u591a\u502b\u591a)'
else:
title = self.short_title()
# if not generating a periodical, force date to apply in title
if __MakePeriodical__ == False:
title = title + ' ' + self.get_fetchformatteddate()
if True:
mi = MetaInformation(title, [self.publisher])
mi.publisher = self.publisher
mi.author_sort = self.publisher
if __MakePeriodical__ == True:
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
else:
mi.publication_type = self.publication_type+':'+self.short_title()
#mi.timestamp = nowf()
mi.timestamp = self.get_dtlocal()
mi.comments = self.description
if not isinstance(mi.comments, unicode):
mi.comments = mi.comments.decode('utf-8', 'replace')
#mi.pubdate = nowf()
mi.pubdate = self.get_dtlocal()
opf_path = os.path.join(dir, 'index.opf')
ncx_path = os.path.join(dir, 'index.ncx')
opf = OPFCreator(dir, mi)
# Add mastheadImage entry to <guide> section
mp = getattr(self, 'masthead_path', None)
if mp is not None and os.access(mp, os.R_OK):
from calibre.ebooks.metadata.opf2 import Guide
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
ref.type = 'masthead'
ref.title = 'Masthead Image'
opf.guide.append(ref)
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
manifest.append(os.path.join(dir, 'index.html'))
manifest.append(os.path.join(dir, 'index.ncx'))
# Get cover
cpath = getattr(self, 'cover_path', None)
if cpath is None:
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
if self.default_cover(pf):
cpath = pf.name
if cpath is not None and os.access(cpath, os.R_OK):
opf.cover = cpath
manifest.append(cpath)
# Get masthead
mpath = getattr(self, 'masthead_path', None)
if mpath is not None and os.access(mpath, os.R_OK):
manifest.append(mpath)
opf.create_manifest_from_files_in(manifest)
for mani in opf.manifest:
if mani.path.endswith('.ncx'):
mani.id = 'ncx'
if mani.path.endswith('mastheadImage.jpg'):
mani.id = 'masthead-image'
entries = ['index.html']
toc = TOC(base_path=dir)
self.play_order_counter = 0
self.play_order_map = {}
def feed_index(num, parent):
f = feeds[num]
for j, a in enumerate(f):
if getattr(a, 'downloaded', False):
adir = 'feed_%d/article_%d/'%(num, j)
auth = a.author
if not auth:
auth = None
desc = a.text_summary
if not desc:
desc = None
else:
desc = self.description_limiter(desc)
entries.append('%sindex.html'%adir)
po = self.play_order_map.get(entries[-1], None)
if po is None:
self.play_order_counter += 1
po = self.play_order_counter
parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
play_order=po, author=auth, description=desc)
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
for sp in a.sub_pages:
prefix = os.path.commonprefix([opf_path, sp])
relp = sp[len(prefix):]
entries.append(relp.replace(os.sep, '/'))
last = sp
if os.path.exists(last):
with open(last, 'rb') as fi:
src = fi.read().decode('utf-8')
soup = BeautifulSoup(src)
body = soup.find('body')
if body is not None:
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
templ = self.navbar.generate(True, num, j, len(f),
not self.has_single_feed,
a.orig_url, self.publisher, prefix=prefix,
center=self.center_navbar)
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
body.insert(len(body.contents), elem)
with open(last, 'wb') as fi:
fi.write(unicode(soup).encode('utf-8'))
if len(feeds) == 0:
raise Exception('All feeds are empty, aborting.')
if len(feeds) > 1:
for i, f in enumerate(feeds):
entries.append('feed_%d/index.html'%i)
po = self.play_order_map.get(entries[-1], None)
if po is None:
self.play_order_counter += 1
po = self.play_order_counter
auth = getattr(f, 'author', None)
if not auth:
auth = None
desc = getattr(f, 'description', None)
if not desc:
desc = None
feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
f.title, play_order=po, description=desc, author=auth))
else:
entries.append('feed_%d/index.html'%0)
feed_index(0, toc)
for i, p in enumerate(entries):
entries[i] = os.path.join(dir, p.replace('/', os.sep))
opf.create_spine(entries)
opf.set_toc(toc)
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
opf.render(opf_file, ncx_file)

View File

@ -0,0 +1,594 @@
__license__ = 'GPL v3'
__copyright__ = '2010-2011, Eddie Lau'
# Region - Hong Kong, Vancouver, Toronto
__Region__ = 'Vancouver'
# Users of Kindle 3 with limited system-level CJK support
# please replace the following "True" with "False".
__MakePeriodical__ = True
# Turn below to true if your device supports display of CJK titles
__UseChineseTitle__ = False
# Set it to False if you want to skip images
__KeepImages__ = True
# (HK only) Turn below to true if you wish to use life.mingpao.com as the main article source
__UseLife__ = True
'''
Change Log:
2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
provide options to remove all images in the file
2011/05/12: switch the main parse source to life.mingpao.com, which has more photos on the article pages
2011/03/06: add new articles for finance section, also a new section "Columns"
2011/02/28: rearrange the sections
[Disabled until Kindle has better CJK support and can remember last (section,article) read in Sections & Articles
View] make it the same title if generating a periodical, so past issue will be automatically put into "Past Issues"
folder in Kindle 3
2011/02/20: skip duplicated links in finance section, put photos which may extend a whole page to the back of the articles
clean up the indentation
2010/12/07: add entertainment section, use newspaper front page as ebook cover, suppress date display in section list
(to avoid wrong date display in case the user generates the ebook in a time zone different from HKT)
2010/11/22: add English section, remove eco-news section which is not updated daily, correct
ordering of articles
2010/11/12: add news image and eco-news section
2010/11/08: add parsing of finance section
2010/11/06: temporary work-around for Kindle device having no capability to display unicode
in section/article list.
2010/10/31: skip repeated articles in section pages
'''
import os, datetime, re
from calibre.web.feeds.recipes import BasicNewsRecipe
from contextlib import nested
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.metadata import MetaInformation
# MAIN CLASS
class MPRecipe(BasicNewsRecipe):
if __Region__ == 'Hong Kong':
title = 'Ming Pao - Hong Kong'
description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
category = 'Chinese, News, Hong Kong'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}'
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
keep_only_tags = [dict(name='h1'),
dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
dict(name='font', attrs={'color':['AA0000']}), # for column articles title
dict(attrs={'id':['newscontent']}), # entertainment and column page content
dict(attrs={'id':['newscontent01','newscontent02']}),
dict(attrs={'class':['photo']}),
dict(name='table', attrs={'width':['100%'], 'border':['0'], 'cellspacing':['5'], 'cellpadding':['0']}), # content in printed version of life.mingpao.com
dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}) # images for source from life.mingpao.com
]
if __KeepImages__:
remove_tags = [dict(name='style'),
dict(attrs={'id':['newscontent135']}), # for the finance page from mpfinance.com
dict(name='font', attrs={'size':['2'], 'color':['666666']}), # article date in life.mingpao.com article
#dict(name='table') # for content fetched from life.mingpao.com
]
else:
remove_tags = [dict(name='style'),
dict(attrs={'id':['newscontent135']}), # for the finance page from mpfinance.com
dict(name='font', attrs={'size':['2'], 'color':['666666']}), # article date in life.mingpao.com article
dict(name='img'),
#dict(name='table') # for content fetched from life.mingpao.com
]
remove_attributes = ['width']
preprocess_regexps = [
(re.compile(r'<h5>', re.DOTALL|re.IGNORECASE),
lambda match: '<h1>'),
(re.compile(r'</h5>', re.DOTALL|re.IGNORECASE),
lambda match: '</h1>'),
(re.compile(r'<p><a href=.+?</a></p>', re.DOTALL|re.IGNORECASE), # for entertainment page
lambda match: ''),
# skip <br> after title in life.mingpao.com fetched article
(re.compile(r"<div id='newscontent'><br>", re.DOTALL|re.IGNORECASE),
lambda match: "<div id='newscontent'>"),
(re.compile(r"<br><br></b>", re.DOTALL|re.IGNORECASE),
lambda match: "</b>")
]
elif __Region__ == 'Vancouver':
title = 'Ming Pao - Vancouver'
description = 'Vancouver Chinese Newspaper (http://www.mingpaovan.com)'
category = 'Chinese, News, Vancouver'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
masthead_url = 'http://www.mingpaovan.com/image/mainlogo2_VAN2.gif'
keep_only_tags = [dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['1']}),
dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['3'], 'cellpadding':['3'], 'id':['tblContent3']}),
dict(name='table', attrs={'width':['180'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['0'], 'bgcolor':['F0F0F0']}),
]
if __KeepImages__:
remove_tags = [dict(name='img', attrs={'src':['../../../image/magnifier.gif']})] # the magnifier icon
else:
remove_tags = [dict(name='img')]
remove_attributes = ['width']
preprocess_regexps = [(re.compile(r'&nbsp;', re.DOTALL|re.IGNORECASE),
lambda match: ''),
]
elif __Region__ == 'Toronto':
title = 'Ming Pao - Toronto'
description = 'Toronto Chinese Newspaper (http://www.mingpaotor.com)'
category = 'Chinese, News, Toronto'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
masthead_url = 'http://www.mingpaotor.com/image/mainlogo2_TOR2.gif'
keep_only_tags = [dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['1']}),
dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['3'], 'cellpadding':['3'], 'id':['tblContent3']}),
dict(name='table', attrs={'width':['180'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['0'], 'bgcolor':['F0F0F0']}),
]
if __KeepImages__:
remove_tags = [dict(name='img', attrs={'src':['../../../image/magnifier.gif']})] # the magnifier icon
else:
remove_tags = [dict(name='img')]
remove_attributes = ['width']
preprocess_regexps = [(re.compile(r'&nbsp;', re.DOTALL|re.IGNORECASE),
lambda match: ''),
]
oldest_article = 1
max_articles_per_feed = 100
__author__ = 'Eddie Lau'
publisher = 'MingPao'
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
language = 'zh'
encoding = 'Big5-HKSCS'
recursions = 0
conversion_options = {'linearize_tables':True}
timefmt = ''
def image_url_processor(cls, baseurl, url):
# trick: break the url at the first occurance of digit, add an additional
# '_' at the front
# not working, may need to move this to preprocess_html() method
# minIdx = 10000
# i0 = url.find('0')
# if i0 >= 0 and i0 < minIdx:
# minIdx = i0
# i1 = url.find('1')
# if i1 >= 0 and i1 < minIdx:
# minIdx = i1
# i2 = url.find('2')
# if i2 >= 0 and i2 < minIdx:
# minIdx = i2
# i3 = url.find('3')
# if i3 >= 0 and i0 < minIdx:
# minIdx = i3
# i4 = url.find('4')
# if i4 >= 0 and i4 < minIdx:
# minIdx = i4
# i5 = url.find('5')
# if i5 >= 0 and i5 < minIdx:
# minIdx = i5
# i6 = url.find('6')
# if i6 >= 0 and i6 < minIdx:
# minIdx = i6
# i7 = url.find('7')
# if i7 >= 0 and i7 < minIdx:
# minIdx = i7
# i8 = url.find('8')
# if i8 >= 0 and i8 < minIdx:
# minIdx = i8
# i9 = url.find('9')
# if i9 >= 0 and i9 < minIdx:
# minIdx = i9
return url
def get_dtlocal(self):
dt_utc = datetime.datetime.utcnow()
if __Region__ == 'Hong Kong':
# convert UTC to local hk time - at HKT 5.30am, all news are available
dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(5.5/24)
# dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(5.5/24)
elif __Region__ == 'Vancouver':
# convert UTC to local Vancouver time - at PST time 5.30am, all news are available
dt_local = dt_utc + datetime.timedelta(-8.0/24) - datetime.timedelta(5.5/24)
#dt_local = dt_utc.astimezone(pytz.timezone('America/Vancouver')) - datetime.timedelta(5.5/24)
elif __Region__ == 'Toronto':
# convert UTC to local Toronto time - at EST time 8.30am, all news are available
dt_local = dt_utc + datetime.timedelta(-5.0/24) - datetime.timedelta(8.5/24)
#dt_local = dt_utc.astimezone(pytz.timezone('America/Toronto')) - datetime.timedelta(8.5/24)
return dt_local
def get_fetchdate(self):
return self.get_dtlocal().strftime("%Y%m%d")
def get_fetchformatteddate(self):
return self.get_dtlocal().strftime("%Y-%m-%d")
def get_fetchday(self):
return self.get_dtlocal().strftime("%d")
def get_cover_url(self):
if __Region__ == 'Hong Kong':
cover = 'http://news.mingpao.com/' + self.get_fetchdate() + '/' + self.get_fetchdate() + '_' + self.get_fetchday() + 'gacov.jpg'
elif __Region__ == 'Vancouver':
cover = 'http://www.mingpaovan.com/ftp/News/' + self.get_fetchdate() + '/' + self.get_fetchday() + 'pgva1s.jpg'
elif __Region__ == 'Toronto':
cover = 'http://www.mingpaotor.com/ftp/News/' + self.get_fetchdate() + '/' + self.get_fetchday() + 'pgtas.jpg'
br = BasicNewsRecipe.get_browser()
try:
br.open(cover)
except:
cover = None
return cover
def parse_index(self):
feeds = []
dateStr = self.get_fetchdate()
if __Region__ == 'Hong Kong':
if __UseLife__:
for title, url, keystr in [(u'\u8981\u805e Headline', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalga', 'nal'),
(u'\u6e2f\u805e Local', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgb', 'nal'),
(u'\u6559\u80b2 Education', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgf', 'nal'),
(u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr', 'nal'),
(u'\u8ad6\u58c7 Forum', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalfa', 'nal'),
(u'\u4e2d\u570b China', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalca', 'nal'),
(u'\u570b\u969b World', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalta', 'nal'),
(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal'),
(u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'),
(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal'),
(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')]:
articles = self.parse_section2(url, keystr)
if articles:
feeds.append((title, articles))
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
else:
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
(u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
# special- editorial
ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
if ed_articles:
feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
(u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
(u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
# special - finance
#fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
if fin_articles:
feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
(u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
# special - entertainment
ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
if ent_articles:
feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
# special- columns
col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
if col_articles:
feeds.append((u'\u5c08\u6b04 Columns', col_articles))
elif __Region__ == 'Vancouver':
for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VAindex.htm'),
(u'\u52a0\u570b Canada', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VBindex.htm'),
(u'\u793e\u5340 Local', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VDindex.htm'),
(u'\u6e2f\u805e Hong Kong', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/HK-VGindex.htm'),
(u'\u570b\u969b World', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VTindex.htm'),
(u'\u4e2d\u570b China', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VCindex.htm'),
(u'\u7d93\u6fdf Economics', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VEindex.htm'),
(u'\u9ad4\u80b2 Sports', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VSindex.htm'),
(u'\u5f71\u8996 Film/TV', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/HK-MAindex.htm'),
(u'\u526f\u520a Supplements', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/WWindex.htm'),]:
articles = self.parse_section3(url, 'http://www.mingpaovan.com/')
if articles:
feeds.append((title, articles))
elif __Region__ == 'Toronto':
for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TAindex.htm'),
(u'\u52a0\u570b Canada', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TDindex.htm'),
(u'\u793e\u5340 Local', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TFindex.htm'),
(u'\u4e2d\u570b China', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TCAindex.htm'),
(u'\u570b\u969b World', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TTAindex.htm'),
(u'\u6e2f\u805e Hong Kong', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/HK-GAindex.htm'),
(u'\u7d93\u6fdf Economics', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/THindex.htm'),
(u'\u9ad4\u80b2 Sports', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TSindex.htm'),
(u'\u5f71\u8996 Film/TV', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/HK-MAindex.htm'),
(u'\u526f\u520a Supplements', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/WWindex.htm'),]:
articles = self.parse_section3(url, 'http://www.mingpaotor.com/')
if articles:
feeds.append((title, articles))
return feeds
# parse from news.mingpao.com
def parse_section(self, url):
dateStr = self.get_fetchdate()
soup = self.index_to_soup(url)
divs = soup.findAll(attrs={'class': ['bullet','bullet_grey']})
current_articles = []
included_urls = []
divs.reverse()
for i in divs:
a = i.find('a', href = True)
title = self.tag_to_string(a)
url = a.get('href', False)
url = 'http://news.mingpao.com/' + dateStr + '/' +url
if url not in included_urls and url.rfind('Redirect') == -1:
current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
included_urls.append(url)
current_articles.reverse()
return current_articles
# parse from life.mingpao.com
def parse_section2(self, url, keystr):
self.get_fetchdate()
soup = self.index_to_soup(url)
a = soup.findAll('a', href=True)
a.reverse()
current_articles = []
included_urls = []
for i in a:
title = self.tag_to_string(i)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article
current_articles.append({'title': title, 'url': url, 'description': ''})
included_urls.append(url)
current_articles.reverse()
return current_articles
# parse from www.mingpaovan.com
def parse_section3(self, url, baseUrl):
self.get_fetchdate()
soup = self.index_to_soup(url)
divs = soup.findAll(attrs={'class': ['ListContentLargeLink']})
current_articles = []
included_urls = []
divs.reverse()
for i in divs:
title = self.tag_to_string(i)
urlstr = i.get('href', False)
urlstr = baseUrl + '/' + urlstr.replace('../../../', '')
if urlstr not in included_urls:
current_articles.append({'title': title, 'url': urlstr, 'description': '', 'date': ''})
included_urls.append(urlstr)
current_articles.reverse()
return current_articles
def parse_ed_section(self, url):
self.get_fetchdate()
soup = self.index_to_soup(url)
a = soup.findAll('a', href=True)
a.reverse()
current_articles = []
included_urls = []
for i in a:
title = self.tag_to_string(i)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('nal') == -1):
current_articles.append({'title': title, 'url': url, 'description': ''})
included_urls.append(url)
current_articles.reverse()
return current_articles
def parse_fin_section(self, url):
self.get_fetchdate()
soup = self.index_to_soup(url)
a = soup.findAll('a', href= True)
current_articles = []
included_urls = []
for i in a:
#url = 'http://www.mpfinance.com/cfm/' + i.get('href', False)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
#if url not in included_urls and not url.rfind(dateStr) == -1 and url.rfind('index') == -1:
if url not in included_urls and (not url.rfind('txt') == -1) and (not url.rfind('nal') == -1):
title = self.tag_to_string(i)
current_articles.append({'title': title, 'url': url, 'description':''})
included_urls.append(url)
return current_articles
def parse_ent_section(self, url):
self.get_fetchdate()
soup = self.index_to_soup(url)
a = soup.findAll('a', href=True)
a.reverse()
current_articles = []
included_urls = []
for i in a:
title = self.tag_to_string(i)
url = 'http://ol.mingpao.com/cfm/' + i.get('href', False)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('star') == -1):
current_articles.append({'title': title, 'url': url, 'description': ''})
included_urls.append(url)
current_articles.reverse()
return current_articles
def parse_col_section(self, url):
self.get_fetchdate()
soup = self.index_to_soup(url)
a = soup.findAll('a', href=True)
a.reverse()
current_articles = []
included_urls = []
for i in a:
title = self.tag_to_string(i)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('ncl') == -1):
current_articles.append({'title': title, 'url': url, 'description': ''})
included_urls.append(url)
current_articles.reverse()
return current_articles
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll(style=True):
del item['width']
for item in soup.findAll(stype=True):
del item['absmiddle']
return soup
def create_opf(self, feeds, dir=None):
if dir is None:
dir = self.output_dir
if __UseChineseTitle__ == True:
if __Region__ == 'Hong Kong':
title = u'\u660e\u5831 (\u9999\u6e2f)'
elif __Region__ == 'Vancouver':
title = u'\u660e\u5831 (\u6eab\u54e5\u83ef)'
elif __Region__ == 'Toronto':
title = u'\u660e\u5831 (\u591a\u502b\u591a)'
else:
title = self.short_title()
# if not generating a periodical, force date to apply in title
if __MakePeriodical__ == False:
title = title + ' ' + self.get_fetchformatteddate()
if True:
mi = MetaInformation(title, [self.publisher])
mi.publisher = self.publisher
mi.author_sort = self.publisher
if __MakePeriodical__ == True:
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
else:
mi.publication_type = self.publication_type+':'+self.short_title()
#mi.timestamp = nowf()
mi.timestamp = self.get_dtlocal()
mi.comments = self.description
if not isinstance(mi.comments, unicode):
mi.comments = mi.comments.decode('utf-8', 'replace')
#mi.pubdate = nowf()
mi.pubdate = self.get_dtlocal()
opf_path = os.path.join(dir, 'index.opf')
ncx_path = os.path.join(dir, 'index.ncx')
opf = OPFCreator(dir, mi)
# Add mastheadImage entry to <guide> section
mp = getattr(self, 'masthead_path', None)
if mp is not None and os.access(mp, os.R_OK):
from calibre.ebooks.metadata.opf2 import Guide
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
ref.type = 'masthead'
ref.title = 'Masthead Image'
opf.guide.append(ref)
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
manifest.append(os.path.join(dir, 'index.html'))
manifest.append(os.path.join(dir, 'index.ncx'))
# Get cover
cpath = getattr(self, 'cover_path', None)
if cpath is None:
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
if self.default_cover(pf):
cpath = pf.name
if cpath is not None and os.access(cpath, os.R_OK):
opf.cover = cpath
manifest.append(cpath)
# Get masthead
mpath = getattr(self, 'masthead_path', None)
if mpath is not None and os.access(mpath, os.R_OK):
manifest.append(mpath)
opf.create_manifest_from_files_in(manifest)
for mani in opf.manifest:
if mani.path.endswith('.ncx'):
mani.id = 'ncx'
if mani.path.endswith('mastheadImage.jpg'):
mani.id = 'masthead-image'
entries = ['index.html']
toc = TOC(base_path=dir)
self.play_order_counter = 0
self.play_order_map = {}
def feed_index(num, parent):
f = feeds[num]
for j, a in enumerate(f):
if getattr(a, 'downloaded', False):
adir = 'feed_%d/article_%d/'%(num, j)
auth = a.author
if not auth:
auth = None
desc = a.text_summary
if not desc:
desc = None
else:
desc = self.description_limiter(desc)
entries.append('%sindex.html'%adir)
po = self.play_order_map.get(entries[-1], None)
if po is None:
self.play_order_counter += 1
po = self.play_order_counter
parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
play_order=po, author=auth, description=desc)
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
for sp in a.sub_pages:
prefix = os.path.commonprefix([opf_path, sp])
relp = sp[len(prefix):]
entries.append(relp.replace(os.sep, '/'))
last = sp
if os.path.exists(last):
with open(last, 'rb') as fi:
src = fi.read().decode('utf-8')
soup = BeautifulSoup(src)
body = soup.find('body')
if body is not None:
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
templ = self.navbar.generate(True, num, j, len(f),
not self.has_single_feed,
a.orig_url, self.publisher, prefix=prefix,
center=self.center_navbar)
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
body.insert(len(body.contents), elem)
with open(last, 'wb') as fi:
fi.write(unicode(soup).encode('utf-8'))
if len(feeds) == 0:
raise Exception('All feeds are empty, aborting.')
if len(feeds) > 1:
for i, f in enumerate(feeds):
entries.append('feed_%d/index.html'%i)
po = self.play_order_map.get(entries[-1], None)
if po is None:
self.play_order_counter += 1
po = self.play_order_counter
auth = getattr(f, 'author', None)
if not auth:
auth = None
desc = getattr(f, 'description', None)
if not desc:
desc = None
feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
f.title, play_order=po, description=desc, author=auth))
else:
entries.append('feed_%d/index.html'%0)
feed_index(0, toc)
for i, p in enumerate(entries):
entries[i] = os.path.join(dir, p.replace('/', os.sep))
opf.create_spine(entries)
opf.set_toc(toc)
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
opf.render(opf_file, ncx_file)

View File

@ -2,90 +2,92 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2010, matek09, matek09@gmail.com' __copyright__ = '2010, matek09, matek09@gmail.com'
__copyright__ = 'Modified 2011, Mariusz Wolek <mariusz_dot_wolek @ gmail dot com>'
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
import re import re
class Wprost(BasicNewsRecipe): class Wprost(BasicNewsRecipe):
EDITION = 0 EDITION = 0
FIND_LAST_FULL_ISSUE = True FIND_LAST_FULL_ISSUE = True
EXCLUDE_LOCKED = True EXCLUDE_LOCKED = True
ICO_BLOCKED = 'http://www.wprost.pl/G/icons/ico_blocked.gif' ICO_BLOCKED = 'http://www.wprost.pl/G/icons/ico_blocked.gif'
title = u'Wprost' title = u'Wprost'
__author__ = 'matek09' __author__ = 'matek09'
description = 'Weekly magazine' description = 'Weekly magazine'
encoding = 'ISO-8859-2' encoding = 'ISO-8859-2'
no_stylesheets = True no_stylesheets = True
language = 'pl' language = 'pl'
remove_javascript = True remove_javascript = True
remove_tags_before = dict(dict(name = 'div', attrs = {'id' : 'print-layer'})) remove_tags_before = dict(dict(name = 'div', attrs = {'id' : 'print-layer'}))
remove_tags_after = dict(dict(name = 'div', attrs = {'id' : 'print-layer'})) remove_tags_after = dict(dict(name = 'div', attrs = {'id' : 'print-layer'}))
'''keep_only_tags =[] '''keep_only_tags =[]
keep_only_tags.append(dict(name = 'table', attrs = {'id' : 'title-table'})) keep_only_tags.append(dict(name = 'table', attrs = {'id' : 'title-table'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'div-header'})) keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'div-header'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'div-content'})) keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'div-content'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'def element-autor'}))''' keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'def element-autor'}))'''
preprocess_regexps = [(re.compile(r'style="display: none;"'), lambda match: ''), preprocess_regexps = [(re.compile(r'style="display: none;"'), lambda match: ''),
(re.compile(r'display: block;'), lambda match: '')] (re.compile(r'display: block;'), lambda match: ''),
(re.compile(r'\<td\>\<tr\>\<\/table\>'), lambda match: ''),
(re.compile(r'\<table .*?\>'), lambda match: ''),
(re.compile(r'\<tr>'), lambda match: ''),
(re.compile(r'\<td .*?\>'), lambda match: '')]
remove_tags =[]
remove_tags.append(dict(name = 'div', attrs = {'class' : 'def element-date'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'def silver'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'content-main-column-right'}))
remove_tags =[] extra_css = '''
remove_tags.append(dict(name = 'div', attrs = {'class' : 'def element-date'})) .div-header {font-size: x-small; font-weight: bold}
remove_tags.append(dict(name = 'div', attrs = {'class' : 'def silver'})) '''
remove_tags.append(dict(name = 'div', attrs = {'id' : 'content-main-column-right'}))
extra_css = '''
.div-header {font-size: x-small; font-weight: bold}
'''
#h2 {font-size: x-large; font-weight: bold} #h2 {font-size: x-large; font-weight: bold}
def is_blocked(self, a): def is_blocked(self, a):
if a.findNextSibling('img') is None: if a.findNextSibling('img') is None:
return False return False
else: else:
return True return True
def find_last_issue(self): def find_last_issue(self):
soup = self.index_to_soup('http://www.wprost.pl/archiwum/') soup = self.index_to_soup('http://www.wprost.pl/archiwum/')
a = 0 a = 0
if self.FIND_LAST_FULL_ISSUE: if self.FIND_LAST_FULL_ISSUE:
ico_blocked = soup.findAll('img', attrs={'src' : self.ICO_BLOCKED}) ico_blocked = soup.findAll('img', attrs={'src' : self.ICO_BLOCKED})
a = ico_blocked[-1].findNext('a', attrs={'title' : re.compile('Zobacz spis tre.ci')}) a = ico_blocked[-1].findNext('a', attrs={'title' : re.compile('Zobacz spis tre.ci')})
else: else:
a = soup.find('a', attrs={'title' : re.compile('Zobacz spis tre.ci')}) a = soup.find('a', attrs={'title' : re.compile('Zobacz spis tre.ci')})
self.EDITION = a['href'].replace('/tygodnik/?I=', '') self.EDITION = a['href'].replace('/tygodnik/?I=', '')
self.cover_url = a.img['src'] self.cover_url = a.img['src']
def parse_index(self): def parse_index(self):
self.find_last_issue() self.find_last_issue()
soup = self.index_to_soup('http://www.wprost.pl/tygodnik/?I=' + self.EDITION) soup = self.index_to_soup('http://www.wprost.pl/tygodnik/?I=' + self.EDITION)
feeds = [] feeds = []
for main_block in soup.findAll(attrs={'class':'main-block-s3 s3-head head-red3'}): for main_block in soup.findAll(attrs={'class':'main-block-s3 s3-head head-red3'}):
articles = list(self.find_articles(main_block)) articles = list(self.find_articles(main_block))
if len(articles) > 0: if len(articles) > 0:
section = self.tag_to_string(main_block) section = self.tag_to_string(main_block)
feeds.append((section, articles)) feeds.append((section, articles))
return feeds return feeds
def find_articles(self, main_block):
for a in main_block.findAllNext( attrs={'style':['','padding-top: 15px;']}):
if a.name in "td":
break
if self.EXCLUDE_LOCKED & self.is_blocked(a):
continue
yield {
'title' : self.tag_to_string(a),
'url' : 'http://www.wprost.pl' + a['href'],
'date' : '',
'description' : ''
}
def find_articles(self, main_block):
for a in main_block.findAllNext( attrs={'style':['','padding-top: 15px;']}):
if a.name in "td":
break
if self.EXCLUDE_LOCKED & self.is_blocked(a):
continue
yield {
'title' : self.tag_to_string(a),
'url' : 'http://www.wprost.pl' + a['href'],
'date' : '',
'description' : ''
}

View File

@ -1,6 +1,7 @@
CREATE TABLE authors ( id INTEGER PRIMARY KEY, CREATE TABLE authors ( id INTEGER PRIMARY KEY,
name TEXT NOT NULL COLLATE NOCASE, name TEXT NOT NULL COLLATE NOCASE,
sort TEXT COLLATE NOCASE, sort TEXT COLLATE NOCASE,
link TEXT NOT NULL DEFAULT "",
UNIQUE(name) UNIQUE(name)
); );
CREATE TABLE books ( id INTEGER PRIMARY KEY AUTOINCREMENT, CREATE TABLE books ( id INTEGER PRIMARY KEY AUTOINCREMENT,
@ -545,4 +546,4 @@ CREATE TRIGGER series_update_trg
BEGIN BEGIN
UPDATE series SET sort=NEW.name WHERE id=NEW.id; UPDATE series SET sort=NEW.name WHERE id=NEW.id;
END; END;
pragma user_version=20; pragma user_version=21;

View File

@ -106,10 +106,12 @@ def sanitize_file_name(name, substitute='_', as_unicode=False):
name = name.encode(filesystem_encoding, 'ignore') name = name.encode(filesystem_encoding, 'ignore')
one = _filename_sanitize.sub(substitute, name) one = _filename_sanitize.sub(substitute, name)
one = re.sub(r'\s', ' ', one).strip() one = re.sub(r'\s', ' ', one).strip()
one = re.sub(r'^\.+$', '_', one) bname, ext = os.path.splitext(one)
one = re.sub(r'^\.+$', '_', bname)
if as_unicode: if as_unicode:
one = one.decode(filesystem_encoding) one = one.decode(filesystem_encoding)
one = one.replace('..', substitute) one = one.replace('..', substitute)
one += ext
# Windows doesn't like path components that end with a period # Windows doesn't like path components that end with a period
if one and one[-1] in ('.', ' '): if one and one[-1] in ('.', ' '):
one = one[:-1]+'_' one = one[:-1]+'_'
@ -132,8 +134,10 @@ def sanitize_file_name_unicode(name, substitute='_'):
name] name]
one = u''.join(chars) one = u''.join(chars)
one = re.sub(r'\s', ' ', one).strip() one = re.sub(r'\s', ' ', one).strip()
one = re.sub(r'^\.+$', '_', one) bname, ext = os.path.splitext(one)
one = re.sub(r'^\.+$', '_', bname)
one = one.replace('..', substitute) one = one.replace('..', substitute)
one += ext
# Windows doesn't like path components that end with a period or space # Windows doesn't like path components that end with a period or space
if one and one[-1] in ('.', ' '): if one and one[-1] in ('.', ' '):
one = one[:-1]+'_' one = one[:-1]+'_'

View File

@ -105,11 +105,13 @@ class ManyToManyTable(ManyToOneTable):
class AuthorsTable(ManyToManyTable): class AuthorsTable(ManyToManyTable):
def read_id_maps(self, db): def read_id_maps(self, db):
self.alink_map = {}
for row in db.conn.execute( for row in db.conn.execute(
'SELECT id, name, sort FROM authors'): 'SELECT id, name, sort, link FROM authors'):
self.id_map[row[0]] = row[1] self.id_map[row[0]] = row[1]
self.extra_map[row[0]] = (row[2] if row[2] else self.extra_map[row[0]] = (row[2] if row[2] else
author_to_author_sort(row[1])) author_to_author_sort(row[1]))
self.alink_map[row[0]] = row[3]
class FormatsTable(ManyToManyTable): class FormatsTable(ManyToManyTable):

View File

@ -19,10 +19,11 @@ class ANDROID(USBMS):
VENDOR_ID = { VENDOR_ID = {
# HTC # HTC
0x0bb4 : { 0x0c02 : [0x100, 0x0227, 0x0226, 0x222], 0x0bb4 : { 0xc02 : [0x100, 0x0227, 0x0226, 0x222],
0x0c01 : [0x100, 0x0227, 0x0226], 0xc01 : [0x100, 0x0227, 0x0226],
0x0ff9 : [0x0100, 0x0227, 0x0226], 0xff9 : [0x0100, 0x0227, 0x0226],
0x0c87 : [0x0100, 0x0227, 0x0226], 0xc87 : [0x0100, 0x0227, 0x0226],
0xc91 : [0x0100, 0x0227, 0x0226],
0xc92 : [0x100], 0xc92 : [0x100],
0xc97 : [0x226], 0xc97 : [0x226],
0xc99 : [0x0100], 0xc99 : [0x0100],

View File

@ -5,7 +5,7 @@ __copyright__ = '2010, Gregory Riker'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import cStringIO, ctypes, datetime, os, re, sys, tempfile, time import cStringIO, ctypes, datetime, os, re, shutil, sys, tempfile, time
from calibre.constants import __appname__, __version__, DEBUG from calibre.constants import __appname__, __version__, DEBUG
from calibre import fit_image, confirm_config_name from calibre import fit_image, confirm_config_name
from calibre.constants import isosx, iswindows from calibre.constants import isosx, iswindows
@ -119,11 +119,17 @@ class DriverBase(DeviceConfig, DevicePlugin):
'iBooks Category'), 'iBooks Category'),
_('Cache covers from iTunes/iBooks') + _('Cache covers from iTunes/iBooks') +
':::' + ':::' +
_('Enable to cache and display covers from iTunes/iBooks') _('Enable to cache and display covers from iTunes/iBooks'),
_("'Copy files to iTunes Media folder" u"\u2026" "' is enabled in iTunes Preferences|Advanced") +
':::' +
_("<p>This setting should match your iTunes <i>Preferences</i>|<i>Advanced</i> setting.</p>"
"<p>Disabling will store copies of books transferred to iTunes in your calibre configuration directory.</p>"
"<p>Enabling indicates that iTunes is configured to store copies in your iTunes Media folder.</p>")
] ]
EXTRA_CUSTOMIZATION_DEFAULT = [ EXTRA_CUSTOMIZATION_DEFAULT = [
True, True,
True, True,
False,
] ]
@ -193,6 +199,7 @@ class ITUNES(DriverBase):
# EXTRA_CUSTOMIZATION_MESSAGE indexes # EXTRA_CUSTOMIZATION_MESSAGE indexes
USE_SERIES_AS_CATEGORY = 0 USE_SERIES_AS_CATEGORY = 0
CACHE_COVERS = 1 CACHE_COVERS = 1
USE_ITUNES_STORAGE = 2
OPEN_FEEDBACK_MESSAGE = _( OPEN_FEEDBACK_MESSAGE = _(
'Apple device detected, launching iTunes, please wait ...') 'Apple device detected, launching iTunes, please wait ...')
@ -281,6 +288,7 @@ class ITUNES(DriverBase):
description_prefix = "added by calibre" description_prefix = "added by calibre"
ejected = False ejected = False
iTunes= None iTunes= None
iTunes_local_storage = None
library_orphans = None library_orphans = None
log = Log() log = Log()
manual_sync_mode = False manual_sync_mode = False
@ -825,7 +833,7 @@ class ITUNES(DriverBase):
# Confirm/create thumbs archive # Confirm/create thumbs archive
if not os.path.exists(self.cache_dir): if not os.path.exists(self.cache_dir):
if DEBUG: if DEBUG:
self.log.info(" creating thumb cache '%s'" % self.cache_dir) self.log.info(" creating thumb cache at '%s'" % self.cache_dir)
os.makedirs(self.cache_dir) os.makedirs(self.cache_dir)
if not os.path.exists(self.archive_path): if not os.path.exists(self.archive_path):
@ -837,6 +845,17 @@ class ITUNES(DriverBase):
if DEBUG: if DEBUG:
self.log.info(" existing thumb cache at '%s'" % self.archive_path) self.log.info(" existing thumb cache at '%s'" % self.archive_path)
# If enabled in config options, create/confirm an iTunes storage folder
if not self.settings().extra_customization[self.USE_ITUNES_STORAGE]:
self.iTunes_local_storage = os.path.join(config_dir,'iTunes storage')
if not os.path.exists(self.iTunes_local_storage):
if DEBUG:
self.log(" creating iTunes_local_storage at '%s'" % self.iTunes_local_storage)
os.mkdir(self.iTunes_local_storage)
else:
if DEBUG:
self.log(" existing iTunes_local_storage at '%s'" % self.iTunes_local_storage)
def remove_books_from_metadata(self, paths, booklists): def remove_books_from_metadata(self, paths, booklists):
''' '''
Remove books from the metadata list. This function must not communicate Remove books from the metadata list. This function must not communicate
@ -1281,50 +1300,27 @@ class ITUNES(DriverBase):
if DEBUG: if DEBUG:
self.log.info(" ITUNES._add_new_copy()") self.log.info(" ITUNES._add_new_copy()")
def _save_last_known_iTunes_storage(lb_added):
if isosx:
fp = lb_added.location().path
index = fp.rfind('/Books') + len('/Books')
last_known_iTunes_storage = fp[:index]
elif iswindows:
fp = lb_added.Location
index = fp.rfind('\Books') + len('\Books')
last_known_iTunes_storage = fp[:index]
dynamic['last_known_iTunes_storage'] = last_known_iTunes_storage
self.log.warning(" last_known_iTunes_storage: %s" % last_known_iTunes_storage)
db_added = None db_added = None
lb_added = None lb_added = None
# If using iTunes_local_storage, copy the file, redirect iTunes to use local copy
if not self.settings().extra_customization[self.USE_ITUNES_STORAGE]:
local_copy = os.path.join(self.iTunes_local_storage, str(metadata.uuid) + os.path.splitext(fpath)[1])
shutil.copyfile(fpath,local_copy)
fpath = local_copy
if self.manual_sync_mode: if self.manual_sync_mode:
''' '''
This is the unsupported direct-connect mode. Unsupported direct-connect mode.
In an attempt to avoid resetting the iTunes library Media folder, don't try to
add the book to iTunes if the last_known_iTunes_storage path is inaccessible.
This means that the path has to be set at least once, probably by using
'Connect to iTunes' and doing a transfer.
''' '''
self.log.warning(" unsupported direct connect mode") self.log.warning(" unsupported direct connect mode")
db_added = self._add_device_book(fpath, metadata) db_added = self._add_device_book(fpath, metadata)
last_known_iTunes_storage = dynamic.get('last_known_iTunes_storage', None) lb_added = self._add_library_book(fpath, metadata)
if last_known_iTunes_storage is not None:
if os.path.exists(last_known_iTunes_storage):
if DEBUG:
self.log.warning(" iTunes storage online, adding to library")
lb_added = self._add_library_book(fpath, metadata)
else:
if DEBUG:
self.log.warning(" iTunes storage not online, can't add to library")
if lb_added:
_save_last_known_iTunes_storage(lb_added)
if not lb_added and DEBUG: if not lb_added and DEBUG:
self.log.warn(" failed to add '%s' to iTunes, iTunes Media folder inaccessible" % metadata.title) self.log.warn(" failed to add '%s' to iTunes, iTunes Media folder inaccessible" % metadata.title)
else: else:
lb_added = self._add_library_book(fpath, metadata) lb_added = self._add_library_book(fpath, metadata)
if lb_added: if not lb_added:
_save_last_known_iTunes_storage(lb_added)
else:
raise UserFeedback("iTunes Media folder inaccessible", raise UserFeedback("iTunes Media folder inaccessible",
details="Failed to add '%s' to iTunes" % metadata.title, details="Failed to add '%s' to iTunes" % metadata.title,
level=UserFeedback.WARN) level=UserFeedback.WARN)
@ -1520,7 +1516,7 @@ class ITUNES(DriverBase):
else: else:
self.log.error(" book_playlist not found") self.log.error(" book_playlist not found")
if len(dev_books): if dev_books is not None and len(dev_books):
first_book = dev_books[0] first_book = dev_books[0]
if False: if False:
self.log.info(" determing manual mode by modifying '%s' by %s" % (first_book.name(), first_book.artist())) self.log.info(" determing manual mode by modifying '%s' by %s" % (first_book.name(), first_book.artist()))
@ -1551,7 +1547,7 @@ class ITUNES(DriverBase):
dev_books = pl.Tracks dev_books = pl.Tracks
break break
if dev_books.Count: if dev_books is not None and dev_books.Count:
first_book = dev_books.Item(1) first_book = dev_books.Item(1)
#if DEBUG: #if DEBUG:
#self.log.info(" determing manual mode by modifying '%s' by %s" % (first_book.Name, first_book.Artist)) #self.log.info(" determing manual mode by modifying '%s' by %s" % (first_book.Name, first_book.Artist))
@ -2526,7 +2522,15 @@ class ITUNES(DriverBase):
self.log.info(" processing %s" % fp) self.log.info(" processing %s" % fp)
if fp.startswith(prefs['library_path']): if fp.startswith(prefs['library_path']):
self.log.info(" '%s' stored in calibre database, not removed" % cached_book['title']) self.log.info(" '%s' stored in calibre database, not removed" % cached_book['title'])
elif not self.settings().extra_customization[self.USE_ITUNES_STORAGE] and \
fp.startswith(self.iTunes_local_storage) and \
os.path.exists(fp):
# Delete the copy in iTunes_local_storage
os.remove(fp)
if DEBUG:
self.log(" removing from iTunes_local_storage")
else: else:
# Delete from iTunes Media folder
if os.path.exists(fp): if os.path.exists(fp):
os.remove(fp) os.remove(fp)
if DEBUG: if DEBUG:
@ -2544,12 +2548,6 @@ class ITUNES(DriverBase):
os.rmdir(author_storage_path) os.rmdir(author_storage_path)
if DEBUG: if DEBUG:
self.log.info(" removing empty author directory") self.log.info(" removing empty author directory")
'''
else:
if DEBUG:
self.log.info(" author_storage_path not empty:")
self.log.info(" %s" % '\n'.join(author_files))
'''
else: else:
self.log.info(" '%s' does not exist at storage location" % cached_book['title']) self.log.info(" '%s' does not exist at storage location" % cached_book['title'])
@ -2586,7 +2584,15 @@ class ITUNES(DriverBase):
self.log.info(" processing %s" % fp) self.log.info(" processing %s" % fp)
if fp.startswith(prefs['library_path']): if fp.startswith(prefs['library_path']):
self.log.info(" '%s' stored in calibre database, not removed" % cached_book['title']) self.log.info(" '%s' stored in calibre database, not removed" % cached_book['title'])
elif not self.settings().extra_customization[self.USE_ITUNES_STORAGE] and \
fp.startswith(self.iTunes_local_storage) and \
os.path.exists(fp):
# Delete the copy in iTunes_local_storage
os.remove(fp)
if DEBUG:
self.log(" removing from iTunes_local_storage")
else: else:
# Delete from iTunes Media folder
if os.path.exists(fp): if os.path.exists(fp):
os.remove(fp) os.remove(fp)
if DEBUG: if DEBUG:
@ -3234,6 +3240,17 @@ class ITUNES_ASYNC(ITUNES):
if DEBUG: if DEBUG:
self.log.info(" existing thumb cache at '%s'" % self.archive_path) self.log.info(" existing thumb cache at '%s'" % self.archive_path)
# If enabled in config options, create/confirm an iTunes storage folder
if not self.settings().extra_customization[self.USE_ITUNES_STORAGE]:
self.iTunes_local_storage = os.path.join(config_dir,'iTunes storage')
if not os.path.exists(self.iTunes_local_storage):
if DEBUG:
self.log(" creating iTunes_local_storage at '%s'" % self.iTunes_local_storage)
os.mkdir(self.iTunes_local_storage)
else:
if DEBUG:
self.log(" existing iTunes_local_storage at '%s'" % self.iTunes_local_storage)
def sync_booklists(self, booklists, end_session=True): def sync_booklists(self, booklists, end_session=True):
''' '''
Update metadata on device. Update metadata on device.

View File

@ -20,11 +20,11 @@ class IRIVER_STORY(USBMS):
FORMATS = ['epub', 'fb2', 'pdf', 'djvu', 'txt'] FORMATS = ['epub', 'fb2', 'pdf', 'djvu', 'txt']
VENDOR_ID = [0x1006] VENDOR_ID = [0x1006]
PRODUCT_ID = [0x4023, 0x4024, 0x4025] PRODUCT_ID = [0x4023, 0x4024, 0x4025, 0x4034]
BCD = [0x0323] BCD = [0x0323, 0x0326]
VENDOR_NAME = 'IRIVER' VENDOR_NAME = 'IRIVER'
WINDOWS_MAIN_MEM = ['STORY', 'STORY_EB05', 'STORY_WI-FI'] WINDOWS_MAIN_MEM = ['STORY', 'STORY_EB05', 'STORY_WI-FI', 'STORY_EB07']
WINDOWS_CARD_A_MEM = ['STORY', 'STORY_SD'] WINDOWS_CARD_A_MEM = ['STORY', 'STORY_SD']
#OSX_MAIN_MEM = 'Kindle Internal Storage Media' #OSX_MAIN_MEM = 'Kindle Internal Storage Media'

View File

@ -64,14 +64,24 @@ int do_mount(const char *dev, const char *mp) {
snprintf(options, 1000, "rw,noexec,nosuid,sync,nodev"); snprintf(options, 1000, "rw,noexec,nosuid,sync,nodev");
snprintf(uids, 100, "%d", getuid()); snprintf(uids, 100, "%d", getuid());
snprintf(gids, 100, "%d", getgid()); snprintf(gids, 100, "%d", getgid());
#else
#ifdef __FreeBSD__
snprintf(options, 1000, "rw,noexec,nosuid,sync,-u=%d,-g=%d",getuid(),getgid());
#else #else
snprintf(options, 1000, "rw,noexec,nosuid,sync,nodev,quiet,shortname=mixed,uid=%d,gid=%d,umask=077,fmask=0177,dmask=0077,utf8,iocharset=iso8859-1", getuid(), getgid()); snprintf(options, 1000, "rw,noexec,nosuid,sync,nodev,quiet,shortname=mixed,uid=%d,gid=%d,umask=077,fmask=0177,dmask=0077,utf8,iocharset=iso8859-1", getuid(), getgid());
#endif #endif
#endif
ensure_root(); ensure_root();
#ifdef __NetBSD__ #ifdef __NetBSD__
execlp("mount_msdos", "mount_msdos", "-u", uids, "-g", gids, "-o", options, dev, mp, NULL); execlp("mount_msdos", "mount_msdos", "-u", uids, "-g", gids, "-o", options, dev, mp, NULL);
#else
#ifdef __FreeBSD__
execlp("mount", "mount", "-t", "msdosfs", "-o", options, dev, mp, NULL);
#else #else
execlp("mount", "mount", "-t", "auto", "-o", options, dev, mp, NULL); execlp("mount", "mount", "-t", "auto", "-o", options, dev, mp, NULL);
#endif
#endif #endif
errsv = errno; errsv = errno;
fprintf(stderr, "Failed to mount with error: %s\n", strerror(errsv)); fprintf(stderr, "Failed to mount with error: %s\n", strerror(errsv));
@ -91,8 +101,12 @@ int call_eject(const char *dev, const char *mp) {
ensure_root(); ensure_root();
#ifdef __NetBSD__ #ifdef __NetBSD__
execlp("eject", "eject", dev, NULL); execlp("eject", "eject", dev, NULL);
#else
#ifdef __FreeBSD__
execlp("umount", "umount", dev, NULL);
#else #else
execlp("eject", "eject", "-s", dev, NULL); execlp("eject", "eject", "-s", dev, NULL);
#endif
#endif #endif
/* execlp failed */ /* execlp failed */
errsv = errno; errsv = errno;
@ -121,7 +135,11 @@ int call_umount(const char *dev, const char *mp) {
if (pid == 0) { /* Child process */ if (pid == 0) { /* Child process */
ensure_root(); ensure_root();
#ifdef __FreeBSD__
execlp("umount", "umount", mp, NULL);
#else
execlp("umount", "umount", "-l", mp, NULL); execlp("umount", "umount", "-l", mp, NULL);
#endif
/* execlp failed */ /* execlp failed */
errsv = errno; errsv = errno;
fprintf(stderr, "Failed to umount with error: %s\n", strerror(errsv)); fprintf(stderr, "Failed to umount with error: %s\n", strerror(errsv));

View File

@ -14,7 +14,7 @@ from calibre.constants import preferred_encoding
from calibre import isbytestring, force_unicode from calibre import isbytestring, force_unicode
from calibre.utils.config import prefs, tweaks from calibre.utils.config import prefs, tweaks
from calibre.utils.icu import strcmp from calibre.utils.icu import strcmp
from calibre.utils.formatter import eval_formatter from calibre.utils.formatter import EvalFormatter
class Book(Metadata): class Book(Metadata):
def __init__(self, prefix, lpath, size=None, other=None): def __init__(self, prefix, lpath, size=None, other=None):
@ -116,7 +116,7 @@ class CollectionsBookList(BookList):
field_name = field_meta['name'] field_name = field_meta['name']
else: else:
field_name = '' field_name = ''
cat_name = eval_formatter.safe_format( cat_name = EvalFormatter().safe_format(
fmt=tweaks['sony_collection_name_template'], fmt=tweaks['sony_collection_name_template'],
kwargs={'category':field_name, 'value':field_value}, kwargs={'category':field_name, 'value':field_value},
error_value='GET_CATEGORY', book=None) error_value='GET_CATEGORY', book=None)

View File

@ -17,7 +17,7 @@ from itertools import repeat
from calibre.devices.interface import DevicePlugin from calibre.devices.interface import DevicePlugin
from calibre.devices.errors import DeviceError, FreeSpaceError from calibre.devices.errors import DeviceError, FreeSpaceError
from calibre.devices.usbms.deviceconfig import DeviceConfig from calibre.devices.usbms.deviceconfig import DeviceConfig
from calibre.constants import iswindows, islinux, isosx, plugins from calibre.constants import iswindows, islinux, isosx, isfreebsd, plugins
from calibre.utils.filenames import ascii_filename as sanitize, shorten_components_to from calibre.utils.filenames import ascii_filename as sanitize, shorten_components_to
if isosx: if isosx:
@ -701,7 +701,152 @@ class Device(DeviceConfig, DevicePlugin):
self._card_a_prefix = self._card_b_prefix self._card_a_prefix = self._card_b_prefix
self._card_b_prefix = None self._card_b_prefix = None
# ------------------------------------------------------
#
# open for FreeBSD
# find the device node or nodes that match the S/N we already have from the scanner
# and attempt to mount each one
# 1. get list of disk devices from sysctl
# 2. compare that list with the one from camcontrol
# 3. and see if it has a matching s/n
# 6. find any partitions/slices associated with each node
# 7. attempt to mount, using calibre-mount-helper, each one
# 8. when finished, we have a list of mount points and associated device nodes
#
def open_freebsd(self):
# this gives us access to the S/N, etc. of the reader that the scanner has found
# and the match routines for some of that data, like s/n, vendor ID, etc.
d=self.detected_device
if not d.serial:
raise DeviceError("Device has no S/N. Can't continue")
return False
devs={}
di=0
ndevs=4 # number of possible devices per reader (main, carda, cardb, launcher)
#get list of disk devices
p=subprocess.Popen(["sysctl", "kern.disks"], stdout=subprocess.PIPE)
kdsks=subprocess.Popen(["sed", "s/kern.disks: //"], stdin=p.stdout, stdout=subprocess.PIPE).communicate()[0]
p.stdout.close()
#print kdsks
for dvc in kdsks.split():
# for each one that's also in the list of cam devices ...
p=subprocess.Popen(["camcontrol", "devlist"], stdout=subprocess.PIPE)
devmatch=subprocess.Popen(["grep", dvc], stdin=p.stdout, stdout=subprocess.PIPE).communicate()[0]
p.stdout.close()
if devmatch:
#print "Checking ", devmatch
# ... see if we can get a S/N from the actual device node
sn=subprocess.Popen(["camcontrol", "inquiry", dvc, "-S"], stdout=subprocess.PIPE).communicate()[0]
sn=sn[0:-1] # drop the trailing newline
#print "S/N = ", sn
if sn and d.match_serial(sn):
# we have a matching s/n, record this device node
#print "match found: ", dvc
devs[di]=dvc
di += 1
# sort the list of devices
for i in range(1,ndevs+1):
for j in reversed(range(1,i)):
if devs[j-1] > devs[j]:
x=devs[j-1]
devs[j-1]=devs[j]
devs[j]=x
#print devs
# now we need to see if any of these have slices/partitions
mtd=0
label="READER" # could use something more unique, like S/N or productID...
cmd = '/usr/local/bin/calibre-mount-helper'
cmd = [cmd, 'mount']
for i in range(0,ndevs):
cmd2="ls /dev/"+devs[i]+"*"
p=subprocess.Popen(cmd2, shell=True, stdout=subprocess.PIPE)
devs[i]=subprocess.Popen(["cut", "-d", "/", "-f" "3"], stdin=p.stdout, stdout=subprocess.PIPE).communicate()[0]
p.stdout.close()
# try all the nodes to see what we can mount
for dev in devs[i].split():
mp='/media/'+label+'-'+dev
#print "trying ", dev, "on", mp
try:
p = subprocess.Popen(cmd + ["/dev/"+dev, mp])
except OSError:
raise DeviceError(_('Could not find mount helper: %s.')%cmd[0])
while p.poll() is None:
time.sleep(0.1)
if p.returncode == 0:
#print " mounted", dev
if i == 0:
self._main_prefix = mp
self._main_dev = "/dev/"+dev
#print "main = ", self._main_dev, self._main_prefix
if i == 1:
self._card_a_prefix = mp
self._card_a_dev = "/dev/"+dev
#print "card a = ", self._card_a_dev, self._card_a_prefix
if i == 2:
self._card_b_prefix = mp
self._card_b_dev = "/dev/"+dev
#print "card b = ", self._card_b_dev, self._card_b_prefix
mtd += 1
break
if mtd > 0:
return True
else :
return False
#
# ------------------------------------------------------
#
# this one is pretty simple:
# just umount each of the previously
# mounted filesystems, using the mount helper
#
def eject_freebsd(self):
cmd = '/usr/local/bin/calibre-mount-helper'
cmd = [cmd, 'eject']
if self._main_prefix:
#print "umount main:", cmd, self._main_dev, self._main_prefix
try:
p = subprocess.Popen(cmd + [self._main_dev, self._main_prefix])
except OSError:
raise DeviceError(
_('Could not find mount helper: %s.')%cmd[0])
while p.poll() is None:
time.sleep(0.1)
if self._card_a_prefix:
#print "umount card a:", cmd, self._card_a_dev, self._card_a_prefix
try:
p = subprocess.Popen(cmd + [self._card_a_dev, self._card_a_prefix])
except OSError:
raise DeviceError(
_('Could not find mount helper: %s.')%cmd[0])
while p.poll() is None:
time.sleep(0.1)
if self._card_b_prefix:
#print "umount card b:", cmd, self._card_b_dev, self._card_b_prefix
try:
p = subprocess.Popen(cmd + [self._card_b_dev, self._card_b_prefix])
except OSError:
raise DeviceError(
_('Could not find mount helper: %s.')%cmd[0])
while p.poll() is None:
time.sleep(0.1)
self._main_prefix = None
self._card_a_prefix = None
self._card_b_prefix = None
# ------------------------------------------------------
def open(self, library_uuid): def open(self, library_uuid):
time.sleep(5) time.sleep(5)
@ -712,6 +857,14 @@ class Device(DeviceConfig, DevicePlugin):
except DeviceError: except DeviceError:
time.sleep(7) time.sleep(7)
self.open_linux() self.open_linux()
if isfreebsd:
self._main_dev = self._card_a_dev = self._card_b_dev = None
try:
self.open_freebsd()
except DeviceError:
subprocess.Popen(["camcontrol", "rescan", "all"])
time.sleep(2)
self.open_freebsd()
if iswindows: if iswindows:
try: try:
self.open_windows() self.open_windows()
@ -800,6 +953,11 @@ class Device(DeviceConfig, DevicePlugin):
self.eject_linux() self.eject_linux()
except: except:
pass pass
if isfreebsd:
try:
self.eject_freebsd()
except:
pass
if iswindows: if iswindows:
try: try:
self.eject_windows() self.eject_windows()

View File

@ -54,7 +54,7 @@ cpalmdoc_decompress(PyObject *self, PyObject *args) {
// Map chars to bytes // Map chars to bytes
for (j = 0; j < input_len; j++) for (j = 0; j < input_len; j++)
input[j] = (_input[j] < 0) ? _input[j]+256 : _input[j]; input[j] = (_input[j] < 0) ? _input[j]+256 : _input[j];
output = (char *)PyMem_Malloc(sizeof(char)*(MAX(BUFFER, 5*input_len))); output = (char *)PyMem_Malloc(sizeof(char)*(MAX(BUFFER, 8*input_len)));
if (output == NULL) return PyErr_NoMemory(); if (output == NULL) return PyErr_NoMemory();
while (i < input_len) { while (i < input_len) {

View File

@ -86,6 +86,8 @@ CALIBRE_METADATA_FIELDS = frozenset([
# a dict of user category names, where the value is a list of item names # a dict of user category names, where the value is a list of item names
# from the book that are in that category # from the book that are in that category
'user_categories', 'user_categories',
# a dict of author to an associated hyperlink
'author_link_map',
] ]
) )

View File

@ -34,6 +34,7 @@ NULL_VALUES = {
'authors' : [_('Unknown')], 'authors' : [_('Unknown')],
'title' : _('Unknown'), 'title' : _('Unknown'),
'user_categories' : {}, 'user_categories' : {},
'author_link_map' : {},
'language' : 'und' 'language' : 'und'
} }
@ -70,6 +71,7 @@ class SafeFormat(TemplateFormatter):
return '' return ''
return v return v
# DEPRECATED. This is not thread safe. Do not use.
composite_formatter = SafeFormat() composite_formatter = SafeFormat()
class Metadata(object): class Metadata(object):
@ -110,6 +112,7 @@ class Metadata(object):
# List of strings or [] # List of strings or []
self.author = list(authors) if authors else []# Needed for backward compatibility self.author = list(authors) if authors else []# Needed for backward compatibility
self.authors = list(authors) if authors else [] self.authors = list(authors) if authors else []
self.formatter = SafeFormat()
def is_null(self, field): def is_null(self, field):
''' '''
@ -146,7 +149,7 @@ class Metadata(object):
return val return val
if val is None: if val is None:
d['#value#'] = 'RECURSIVE_COMPOSITE FIELD (Metadata) ' + field d['#value#'] = 'RECURSIVE_COMPOSITE FIELD (Metadata) ' + field
val = d['#value#'] = composite_formatter.safe_format( val = d['#value#'] = self.formatter.safe_format(
d['display']['composite_template'], d['display']['composite_template'],
self, self,
_('TEMPLATE ERROR'), _('TEMPLATE ERROR'),
@ -423,11 +426,12 @@ class Metadata(object):
''' '''
if not ops: if not ops:
return return
formatter = SafeFormat()
for op in ops: for op in ops:
try: try:
src = op[0] src = op[0]
dest = op[1] dest = op[1]
val = composite_formatter.safe_format\ val = formatter.safe_format\
(src, other, 'PLUGBOARD TEMPLATE ERROR', other) (src, other, 'PLUGBOARD TEMPLATE ERROR', other)
if dest == 'tags': if dest == 'tags':
self.set(dest, [f.strip() for f in val.split(',') if f.strip()]) self.set(dest, [f.strip() for f in val.split(',') if f.strip()])

View File

@ -474,7 +474,7 @@ def serialize_user_metadata(metadata_elem, all_user_metadata, tail='\n'+(' '*8))
metadata_elem.append(meta) metadata_elem.append(meta)
def dump_user_categories(cats): def dump_dict(cats):
if not cats: if not cats:
cats = {} cats = {}
from calibre.ebooks.metadata.book.json_codec import object_to_unicode from calibre.ebooks.metadata.book.json_codec import object_to_unicode
@ -537,8 +537,9 @@ class OPF(object): # {{{
formatter=parse_date, renderer=isoformat) formatter=parse_date, renderer=isoformat)
user_categories = MetadataField('user_categories', is_dc=False, user_categories = MetadataField('user_categories', is_dc=False,
formatter=json.loads, formatter=json.loads,
renderer=dump_user_categories) renderer=dump_dict)
author_link_map = MetadataField('author_link_map', is_dc=False,
formatter=json.loads, renderer=dump_dict)
def __init__(self, stream, basedir=os.getcwdu(), unquote_urls=True, def __init__(self, stream, basedir=os.getcwdu(), unquote_urls=True,
populate_spine=True): populate_spine=True):
@ -1039,7 +1040,7 @@ class OPF(object): # {{{
for attr in ('title', 'authors', 'author_sort', 'title_sort', for attr in ('title', 'authors', 'author_sort', 'title_sort',
'publisher', 'series', 'series_index', 'rating', 'publisher', 'series', 'series_index', 'rating',
'isbn', 'tags', 'category', 'comments', 'isbn', 'tags', 'category', 'comments',
'pubdate', 'user_categories'): 'pubdate', 'user_categories', 'author_link_map'):
val = getattr(mi, attr, None) val = getattr(mi, attr, None)
if val is not None and val != [] and val != (None, None): if val is not None and val != [] and val != (None, None):
setattr(self, attr, val) setattr(self, attr, val)
@ -1336,6 +1337,8 @@ def metadata_to_opf(mi, as_string=True):
for tag in mi.tags: for tag in mi.tags:
factory(DC('subject'), tag) factory(DC('subject'), tag)
meta = lambda n, c: factory('meta', name='calibre:'+n, content=c) meta = lambda n, c: factory('meta', name='calibre:'+n, content=c)
if getattr(mi, 'author_link_map', None) is not None:
meta('author_link_map', dump_dict(mi.author_link_map))
if mi.series: if mi.series:
meta('series', mi.series) meta('series', mi.series)
if mi.series_index is not None: if mi.series_index is not None:
@ -1349,7 +1352,7 @@ def metadata_to_opf(mi, as_string=True):
if mi.title_sort: if mi.title_sort:
meta('title_sort', mi.title_sort) meta('title_sort', mi.title_sort)
if mi.user_categories: if mi.user_categories:
meta('user_categories', dump_user_categories(mi.user_categories)) meta('user_categories', dump_dict(mi.user_categories))
serialize_user_metadata(metadata, mi.get_all_user_metadata(False)) serialize_user_metadata(metadata, mi.get_all_user_metadata(False))

View File

@ -957,7 +957,10 @@ def get_metadata(stream):
return get_metadata(stream) return get_metadata(stream)
from calibre.utils.logging import Log from calibre.utils.logging import Log
log = Log() log = Log()
mi = MetaInformation(os.path.basename(stream.name), [_('Unknown')]) try:
mi = MetaInformation(os.path.basename(stream.name), [_('Unknown')])
except:
mi = MetaInformation(_('Unknown'), [_('Unknown')])
mh = MetadataHeader(stream, log) mh = MetadataHeader(stream, log)
if mh.title and mh.title != _('Unknown'): if mh.title and mh.title != _('Unknown'):
mi.title = mh.title mi.title = mh.title

View File

@ -7,12 +7,13 @@ from urllib import unquote
from PyQt4.Qt import (QVariant, QFileInfo, QObject, SIGNAL, QBuffer, Qt, from PyQt4.Qt import (QVariant, QFileInfo, QObject, SIGNAL, QBuffer, Qt,
QByteArray, QTranslator, QCoreApplication, QThread, QByteArray, QTranslator, QCoreApplication, QThread,
QEvent, QTimer, pyqtSignal, QDate, QDesktopServices, QEvent, QTimer, pyqtSignal, QDate, QDesktopServices,
QFileDialog, QFileIconProvider, QFileDialog, QFileIconProvider, QSettings,
QIcon, QApplication, QDialog, QUrl, QFont) QIcon, QApplication, QDialog, QUrl, QFont)
ORG_NAME = 'KovidsBrain' ORG_NAME = 'KovidsBrain'
APP_UID = 'libprs500' APP_UID = 'libprs500'
from calibre.constants import islinux, iswindows, isbsd, isfrozen, isosx from calibre.constants import (islinux, iswindows, isbsd, isfrozen, isosx,
config_dir)
from calibre.utils.config import Config, ConfigProxy, dynamic, JSONConfig from calibre.utils.config import Config, ConfigProxy, dynamic, JSONConfig
from calibre.utils.localization import set_qt_translator from calibre.utils.localization import set_qt_translator
from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata import MetaInformation
@ -82,13 +83,14 @@ gprefs.defaults['tags_browser_partition_method'] = 'first letter'
gprefs.defaults['tags_browser_collapse_at'] = 100 gprefs.defaults['tags_browser_collapse_at'] = 100
gprefs.defaults['edit_metadata_single_layout'] = 'default' gprefs.defaults['edit_metadata_single_layout'] = 'default'
gprefs.defaults['book_display_fields'] = [ gprefs.defaults['book_display_fields'] = [
('title', False), ('authors', False), ('formats', True), ('title', False), ('authors', True), ('formats', True),
('series', True), ('identifiers', True), ('tags', True), ('series', True), ('identifiers', True), ('tags', True),
('path', True), ('publisher', False), ('rating', False), ('path', True), ('publisher', False), ('rating', False),
('author_sort', False), ('sort', False), ('timestamp', False), ('author_sort', False), ('sort', False), ('timestamp', False),
('uuid', False), ('comments', True), ('id', False), ('pubdate', False), ('uuid', False), ('comments', True), ('id', False), ('pubdate', False),
('last_modified', False), ('size', False), ('last_modified', False), ('size', False),
] ]
gprefs.defaults['default_author_link'] = 'http://en.wikipedia.org/w/index.php?search={author}'
# }}} # }}}
@ -192,6 +194,11 @@ def _config(): # {{{
config = _config() config = _config()
# }}} # }}}
QSettings.setPath(QSettings.IniFormat, QSettings.UserScope, config_dir)
QSettings.setPath(QSettings.IniFormat, QSettings.SystemScope,
config_dir)
QSettings.setDefaultFormat(QSettings.IniFormat)
# Turn off DeprecationWarnings in windows GUI # Turn off DeprecationWarnings in windows GUI
if iswindows: if iswindows:
import warnings import warnings

View File

@ -38,3 +38,6 @@ class ShowQuickviewAction(InterfaceAction):
Quickview(self.gui, self.gui.library_view, index) Quickview(self.gui, self.gui.library_view, index)
self.current_instance.show() self.current_instance.show()
def library_changed(self, db):
if self.current_instance and not self.current_instance.is_closed:
self.current_instance.set_database(db)

View File

@ -5,6 +5,7 @@ __license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import urllib2
from PyQt4.Qt import (QPixmap, QSize, QWidget, Qt, pyqtSignal, QUrl, from PyQt4.Qt import (QPixmap, QSize, QWidget, Qt, pyqtSignal, QUrl,
QPropertyAnimation, QEasingCurve, QApplication, QFontInfo, QPropertyAnimation, QEasingCurve, QApplication, QFontInfo,
@ -23,6 +24,7 @@ from calibre.library.comments import comments_to_html
from calibre.gui2 import (config, open_local_file, open_url, pixmap_to_data, from calibre.gui2 import (config, open_local_file, open_url, pixmap_to_data,
gprefs) gprefs)
from calibre.utils.icu import sort_key from calibre.utils.icu import sort_key
from calibre.utils.formatter import EvalFormatter
def render_html(mi, css, vertical, widget, all_fields=False): # {{{ def render_html(mi, css, vertical, widget, all_fields=False): # {{{
table = render_data(mi, all_fields=all_fields, table = render_data(mi, all_fields=all_fields,
@ -121,6 +123,27 @@ def render_data(mi, use_roman_numbers=True, all_fields=False):
if links: if links:
ans.append((field, u'<td class="title">%s</td><td>%s</td>'%( ans.append((field, u'<td class="title">%s</td><td>%s</td>'%(
_('Ids')+':', links))) _('Ids')+':', links)))
elif field == 'authors' and not isdevice:
authors = []
formatter = EvalFormatter()
for aut in mi.authors:
if mi.author_link_map[aut]:
link = mi.author_link_map[aut]
elif gprefs.get('default_author_link'):
vals = {'author': aut.replace(' ', '+')}
try:
vals['author_sort'] = mi.author_sort_map[aut].replace(' ', '+')
except:
vals['author_sort'] = aut.replace(' ', '+')
link = formatter.safe_format(
gprefs.get('default_author_link'), vals, '', vals)
if link:
link = prepare_string_for_xml(link)
authors.append(u'<a href="%s">%s</a>'%(link, aut))
else:
authors.append(aut)
ans.append((field, u'<td class="title">%s</td><td>%s</td>'%(name,
u' & '.join(authors))))
else: else:
val = mi.format_field(field)[-1] val = mi.format_field(field)[-1]
if val is None: if val is None:

View File

@ -4,10 +4,11 @@ __docformat__ = 'restructuredtext en'
__license__ = 'GPL v3' __license__ = 'GPL v3'
from PyQt4.Qt import (Qt, QDialog, QTableWidgetItem, QAbstractItemView, QIcon, from PyQt4.Qt import (Qt, QDialog, QTableWidgetItem, QAbstractItemView, QIcon,
QDialogButtonBox, QFrame, QLabel, QTimer, QMenu, QApplication) QDialogButtonBox, QFrame, QLabel, QTimer, QMenu, QApplication,
QByteArray)
from calibre.ebooks.metadata import author_to_author_sort from calibre.ebooks.metadata import author_to_author_sort
from calibre.gui2 import error_dialog from calibre.gui2 import error_dialog, gprefs
from calibre.gui2.dialogs.edit_authors_dialog_ui import Ui_EditAuthorsDialog from calibre.gui2.dialogs.edit_authors_dialog_ui import Ui_EditAuthorsDialog
from calibre.utils.icu import sort_key from calibre.utils.icu import sort_key
@ -20,7 +21,7 @@ class tableItem(QTableWidgetItem):
class EditAuthorsDialog(QDialog, Ui_EditAuthorsDialog): class EditAuthorsDialog(QDialog, Ui_EditAuthorsDialog):
def __init__(self, parent, db, id_to_select, select_sort): def __init__(self, parent, db, id_to_select, select_sort, select_link):
QDialog.__init__(self, parent) QDialog.__init__(self, parent)
Ui_EditAuthorsDialog.__init__(self) Ui_EditAuthorsDialog.__init__(self)
self.setupUi(self) self.setupUi(self)
@ -29,11 +30,19 @@ class EditAuthorsDialog(QDialog, Ui_EditAuthorsDialog):
self.setWindowFlags(self.windowFlags()&(~Qt.WindowContextHelpButtonHint)) self.setWindowFlags(self.windowFlags()&(~Qt.WindowContextHelpButtonHint))
self.setWindowIcon(icon) self.setWindowIcon(icon)
try:
self.table_column_widths = \
gprefs.get('manage_authors_table_widths', None)
geom = gprefs.get('manage_authors_dialog_geometry', bytearray(''))
self.restoreGeometry(QByteArray(geom))
except:
pass
self.buttonBox.accepted.connect(self.accepted) self.buttonBox.accepted.connect(self.accepted)
# Set up the column headings # Set up the column headings
self.table.setSelectionMode(QAbstractItemView.SingleSelection) self.table.setSelectionMode(QAbstractItemView.SingleSelection)
self.table.setColumnCount(2) self.table.setColumnCount(3)
self.down_arrow_icon = QIcon(I('arrow-down.png')) self.down_arrow_icon = QIcon(I('arrow-down.png'))
self.up_arrow_icon = QIcon(I('arrow-up.png')) self.up_arrow_icon = QIcon(I('arrow-up.png'))
self.blank_icon = QIcon(I('blank.png')) self.blank_icon = QIcon(I('blank.png'))
@ -43,26 +52,35 @@ class EditAuthorsDialog(QDialog, Ui_EditAuthorsDialog):
self.aus_col = QTableWidgetItem(_('Author sort')) self.aus_col = QTableWidgetItem(_('Author sort'))
self.table.setHorizontalHeaderItem(1, self.aus_col) self.table.setHorizontalHeaderItem(1, self.aus_col)
self.aus_col.setIcon(self.up_arrow_icon) self.aus_col.setIcon(self.up_arrow_icon)
self.aul_col = QTableWidgetItem(_('Link'))
self.table.setHorizontalHeaderItem(2, self.aul_col)
self.aus_col.setIcon(self.blank_icon)
# Add the data # Add the data
self.authors = {} self.authors = {}
auts = db.get_authors_with_ids() auts = db.get_authors_with_ids()
self.table.setRowCount(len(auts)) self.table.setRowCount(len(auts))
select_item = None select_item = None
for row, (id, author, sort) in enumerate(auts): for row, (id, author, sort, link) in enumerate(auts):
author = author.replace('|', ',') author = author.replace('|', ',')
self.authors[id] = (author, sort) self.authors[id] = (author, sort, link)
aut = tableItem(author) aut = tableItem(author)
aut.setData(Qt.UserRole, id) aut.setData(Qt.UserRole, id)
sort = tableItem(sort) sort = tableItem(sort)
link = tableItem(link)
self.table.setItem(row, 0, aut) self.table.setItem(row, 0, aut)
self.table.setItem(row, 1, sort) self.table.setItem(row, 1, sort)
self.table.setItem(row, 2, link)
if id == id_to_select: if id == id_to_select:
if select_sort: if select_sort:
select_item = sort select_item = sort
elif select_link:
select_item = link
else: else:
select_item = aut select_item = aut
self.table.resizeColumnsToContents() self.table.resizeColumnsToContents()
if self.table.columnWidth(2) < 200:
self.table.setColumnWidth(2, 200)
# set up the cellChanged signal only after the table is filled # set up the cellChanged signal only after the table is filled
self.table.cellChanged.connect(self.cell_changed) self.table.cellChanged.connect(self.cell_changed)
@ -115,6 +133,28 @@ class EditAuthorsDialog(QDialog, Ui_EditAuthorsDialog):
self.table.setContextMenuPolicy(Qt.CustomContextMenu) self.table.setContextMenuPolicy(Qt.CustomContextMenu)
self.table.customContextMenuRequested .connect(self.show_context_menu) self.table.customContextMenuRequested .connect(self.show_context_menu)
def save_state(self):
self.table_column_widths = []
for c in range(0, self.table.columnCount()):
self.table_column_widths.append(self.table.columnWidth(c))
gprefs['manage_authors_table_widths'] = self.table_column_widths
gprefs['manage_authors_dialog_geometry'] = bytearray(self.saveGeometry())
def resizeEvent(self, *args):
QDialog.resizeEvent(self, *args)
if self.table_column_widths is not None:
for c,w in enumerate(self.table_column_widths):
self.table.setColumnWidth(c, w)
else:
# the vertical scroll bar might not be rendered, so might not yet
# have a width. Assume 25. Not a problem because user-changed column
# widths will be remembered
w = self.table.width() - 25 - self.table.verticalHeader().width()
w /= self.table.columnCount()
for c in range(0, self.table.columnCount()):
self.table.setColumnWidth(c, w)
self.save_state()
def show_context_menu(self, point): def show_context_menu(self, point):
self.context_item = self.table.itemAt(point) self.context_item = self.table.itemAt(point)
case_menu = QMenu(_('Change Case')) case_menu = QMenu(_('Change Case'))
@ -231,14 +271,16 @@ class EditAuthorsDialog(QDialog, Ui_EditAuthorsDialog):
self.auth_col.setIcon(self.blank_icon) self.auth_col.setIcon(self.blank_icon)
def accepted(self): def accepted(self):
self.save_state()
self.result = [] self.result = []
for row in range(0,self.table.rowCount()): for row in range(0,self.table.rowCount()):
id = self.table.item(row, 0).data(Qt.UserRole).toInt()[0] id = self.table.item(row, 0).data(Qt.UserRole).toInt()[0]
aut = unicode(self.table.item(row, 0).text()).strip() aut = unicode(self.table.item(row, 0).text()).strip()
sort = unicode(self.table.item(row, 1).text()).strip() sort = unicode(self.table.item(row, 1).text()).strip()
orig_aut,orig_sort = self.authors[id] link = unicode(self.table.item(row, 2).text()).strip()
if orig_aut != aut or orig_sort != sort: orig_aut,orig_sort,orig_link = self.authors[id]
self.result.append((id, orig_aut, aut, sort)) if orig_aut != aut or orig_sort != sort or orig_link != link:
self.result.append((id, orig_aut, aut, sort, link))
def do_recalc_author_sort(self): def do_recalc_author_sort(self):
self.table.cellChanged.disconnect() self.table.cellChanged.disconnect()
@ -276,6 +318,6 @@ class EditAuthorsDialog(QDialog, Ui_EditAuthorsDialog):
c.setText(author_to_author_sort(aut)) c.setText(author_to_author_sort(aut))
item = c item = c
else: else:
item = self.table.item(row, 1) item = self.table.item(row, col)
self.table.setCurrentItem(item) self.table.setCurrentItem(item)
self.table.scrollToItem(item) self.table.scrollToItem(item)

View File

@ -12,7 +12,7 @@ from PyQt4.Qt import Qt, QDialog, QGridLayout, QVBoxLayout, QFont, QLabel, \
from calibre.gui2.dialogs.metadata_bulk_ui import Ui_MetadataBulkDialog from calibre.gui2.dialogs.metadata_bulk_ui import Ui_MetadataBulkDialog
from calibre.gui2.dialogs.tag_editor import TagEditor from calibre.gui2.dialogs.tag_editor import TagEditor
from calibre.ebooks.metadata import string_to_authors, authors_to_string, title_sort from calibre.ebooks.metadata import string_to_authors, authors_to_string, title_sort
from calibre.ebooks.metadata.book.base import composite_formatter from calibre.ebooks.metadata.book.base import SafeFormat
from calibre.gui2.custom_column_widgets import populate_metadata_page from calibre.gui2.custom_column_widgets import populate_metadata_page
from calibre.gui2 import error_dialog, ResizableDialog, UNDEFINED_QDATE, \ from calibre.gui2 import error_dialog, ResizableDialog, UNDEFINED_QDATE, \
gprefs, question_dialog gprefs, question_dialog
@ -499,7 +499,7 @@ class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):
def s_r_get_field(self, mi, field): def s_r_get_field(self, mi, field):
if field: if field:
if field == '{template}': if field == '{template}':
v = composite_formatter.safe_format\ v = SafeFormat().safe_format\
(unicode(self.s_r_template.text()), mi, _('S/R TEMPLATE ERROR'), mi) (unicode(self.s_r_template.text()), mi, _('S/R TEMPLATE ERROR'), mi)
return [v] return [v]
fm = self.db.metadata_for_field(field) fm = self.db.metadata_for_field(field)

View File

@ -18,16 +18,29 @@ class TableItem(QTableWidgetItem):
A QTableWidgetItem that sorts on a separate string and uses ICU rules A QTableWidgetItem that sorts on a separate string and uses ICU rules
''' '''
def __init__(self, val, sort): def __init__(self, val, sort, idx=0):
self.sort = sort self.sort = sort
self.sort_idx = idx
QTableWidgetItem.__init__(self, val) QTableWidgetItem.__init__(self, val)
self.setFlags(Qt.ItemIsEnabled|Qt.ItemIsSelectable) self.setFlags(Qt.ItemIsEnabled|Qt.ItemIsSelectable)
def __ge__(self, other): def __ge__(self, other):
return sort_key(self.sort) >= sort_key(other.sort) l = sort_key(self.sort)
r = sort_key(other.sort)
if l > r:
return 1
if l == r:
return self.sort_idx >= other.sort_idx
return 0
def __lt__(self, other): def __lt__(self, other):
return sort_key(self.sort) < sort_key(other.sort) l = sort_key(self.sort)
r = sort_key(other.sort)
if l < r:
return 1
if l == r:
return self.sort_idx < other.sort_idx
return 0
class Quickview(QDialog, Ui_Quickview): class Quickview(QDialog, Ui_Quickview):
@ -95,6 +108,15 @@ class Quickview(QDialog, Ui_Quickview):
self.search_button.clicked.connect(self.do_search) self.search_button.clicked.connect(self.do_search)
view.model().new_bookdisplay_data.connect(self.book_was_changed) view.model().new_bookdisplay_data.connect(self.book_was_changed)
def set_database(self, db):
self.db = db
self.items.blockSignals(True)
self.books_table.blockSignals(True)
self.items.clear()
self.books_table.setRowCount(0)
self.books_table.blockSignals(False)
self.items.blockSignals(False)
# search button # search button
def do_search(self): def do_search(self):
if self.last_search is not None: if self.last_search is not None:
@ -185,7 +207,7 @@ class Quickview(QDialog, Ui_Quickview):
series = mi.format_field('series')[1] series = mi.format_field('series')[1]
if series is None: if series is None:
series = '' series = ''
a = TableItem(series, series) a = TableItem(series, mi.series, mi.series_index)
a.setToolTip(tt) a.setToolTip(tt)
self.books_table.setItem(row, 2, a) self.books_table.setItem(row, 2, a)
self.books_table.setRowHeight(row, self.books_table_row_height) self.books_table.setRowHeight(row, self.books_table_row_height)

View File

@ -57,19 +57,6 @@
</property> </property>
</widget> </widget>
</item> </item>
<item row="2" column="1">
<spacer>
<property name="orientation">
<enum>Qt::Vertical</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>0</width>
<height>0</height>
</size>
</property>
</spacer>
</item>
<item row="3" column="0" colspan="2"> <item row="3" column="0" colspan="2">
<layout class="QHBoxLayout"> <layout class="QHBoxLayout">
<item> <item>

View File

@ -54,7 +54,7 @@ class DBRestore(QDialog):
def reject(self): def reject(self):
self.rejected = True self.rejected = True
self.restorer.progress_callback = lambda x, y: x self.restorer.progress_callback = lambda x, y: x
QDialog.rejecet(self) QDialog.reject(self)
def update(self): def update(self):
if self.restorer.is_alive(): if self.restorer.is_alive():

View File

@ -11,7 +11,7 @@ from PyQt4.Qt import (Qt, QDialog, QDialogButtonBox, QSyntaxHighlighter, QFont,
from calibre.gui2 import error_dialog from calibre.gui2 import error_dialog
from calibre.gui2.dialogs.template_dialog_ui import Ui_TemplateDialog from calibre.gui2.dialogs.template_dialog_ui import Ui_TemplateDialog
from calibre.utils.formatter_functions import formatter_functions from calibre.utils.formatter_functions import formatter_functions
from calibre.ebooks.metadata.book.base import composite_formatter, Metadata from calibre.ebooks.metadata.book.base import SafeFormat, Metadata
from calibre.library.coloring import (displayable_columns) from calibre.library.coloring import (displayable_columns)
@ -270,7 +270,7 @@ class TemplateDialog(QDialog, Ui_TemplateDialog):
self.highlighter.regenerate_paren_positions() self.highlighter.regenerate_paren_positions()
self.text_cursor_changed() self.text_cursor_changed()
self.template_value.setText( self.template_value.setText(
composite_formatter.safe_format(cur_text, self.mi, SafeFormat().safe_format(cur_text, self.mi,
_('EXCEPTION: '), self.mi)) _('EXCEPTION: '), self.mi))
def text_cursor_changed(self): def text_cursor_changed(self):

View File

@ -14,7 +14,7 @@ from PyQt4.Qt import (QAbstractTableModel, Qt, pyqtSignal, QIcon, QImage,
from calibre.gui2 import NONE, UNDEFINED_QDATE from calibre.gui2 import NONE, UNDEFINED_QDATE
from calibre.utils.pyparsing import ParseException from calibre.utils.pyparsing import ParseException
from calibre.ebooks.metadata import fmt_sidx, authors_to_string, string_to_authors from calibre.ebooks.metadata import fmt_sidx, authors_to_string, string_to_authors
from calibre.ebooks.metadata.book.base import composite_formatter from calibre.ebooks.metadata.book.base import SafeFormat
from calibre.ptempfile import PersistentTemporaryFile from calibre.ptempfile import PersistentTemporaryFile
from calibre.utils.config import tweaks, prefs from calibre.utils.config import tweaks, prefs
from calibre.utils.date import dt_factory, qt_to_dt from calibre.utils.date import dt_factory, qt_to_dt
@ -91,6 +91,7 @@ class BooksModel(QAbstractTableModel): # {{{
self.current_highlighted_idx = None self.current_highlighted_idx = None
self.highlight_only = False self.highlight_only = False
self.colors = frozenset([unicode(c) for c in QColor.colorNames()]) self.colors = frozenset([unicode(c) for c in QColor.colorNames()])
self.formatter = SafeFormat()
self.read_config() self.read_config()
def change_alignment(self, colname, alignment): def change_alignment(self, colname, alignment):
@ -711,7 +712,7 @@ class BooksModel(QAbstractTableModel): # {{{
try: try:
if mi is None: if mi is None:
mi = self.db.get_metadata(id_, index_is_id=True) mi = self.db.get_metadata(id_, index_is_id=True)
color = composite_formatter.safe_format(fmt, mi, '', mi) color = self.formatter.safe_format(fmt, mi, '', mi)
if color in self.colors: if color in self.colors:
color = QColor(color) color = QColor(color)
if color.isValid(): if color.isValid():

View File

@ -1092,11 +1092,12 @@ class IdentifiersEdit(QLineEdit): # {{{
for x in parts: for x in parts:
c = x.split(':') c = x.split(':')
if len(c) > 1: if len(c) > 1:
if c[0] == 'isbn': itype = c[0].lower()
if itype == 'isbn':
v = check_isbn(c[1]) v = check_isbn(c[1])
if v is not None: if v is not None:
c[1] = v c[1] = v
ans[c[0]] = c[1] ans[itype] = c[1]
return ans return ans
def fset(self, val): def fset(self, val):
if not val: if not val:
@ -1112,7 +1113,7 @@ class IdentifiersEdit(QLineEdit): # {{{
if v is not None: if v is not None:
val[k] = v val[k] = v
ids = sorted(val.iteritems(), key=keygen) ids = sorted(val.iteritems(), key=keygen)
txt = ', '.join(['%s:%s'%(k, v) for k, v in ids]) txt = ', '.join(['%s:%s'%(k.lower(), v) for k, v in ids])
self.setText(txt.strip()) self.setText(txt.strip())
self.setCursorPosition(0) self.setCursorPosition(0)
return property(fget=fget, fset=fset) return property(fget=fget, fset=fset)

View File

@ -138,6 +138,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
(_('Partitioned'), 'partition')] (_('Partitioned'), 'partition')]
r('tags_browser_partition_method', gprefs, choices=choices) r('tags_browser_partition_method', gprefs, choices=choices)
r('tags_browser_collapse_at', gprefs) r('tags_browser_collapse_at', gprefs)
r('default_author_link', gprefs)
choices = set([k for k in db.field_metadata.all_field_keys() choices = set([k for k in db.field_metadata.all_field_keys()
if db.field_metadata[k]['is_category'] and if db.field_metadata[k]['is_category'] and

View File

@ -192,7 +192,7 @@
<string>Book Details</string> <string>Book Details</string>
</attribute> </attribute>
<layout class="QGridLayout" name="gridLayout_12"> <layout class="QGridLayout" name="gridLayout_12">
<item row="0" column="0" rowspan="2"> <item row="1" column="0" rowspan="2">
<widget class="QGroupBox" name="groupBox"> <widget class="QGroupBox" name="groupBox">
<property name="title"> <property name="title">
<string>Select displayed metadata</string> <string>Select displayed metadata</string>
@ -243,6 +243,31 @@
</layout> </layout>
</widget> </widget>
</item> </item>
<item row="0" column="0">
<layout class="QHBoxLayout">
<item>
<widget class="QLabel" name="label">
<property name="text">
<string>Default author link template:</string>
</property>
<property name="buddy">
<cstring>opt_default_author_link</cstring>
</property>
</widget>
</item>
<item>
<widget class="QLineEdit" name="opt_default_author_link">
<property name="toolTip">
<string>&lt;p&gt;Enter a template to be used to create a link for
an author in the books information dialog. This template will
be used when no link has been provided for the author using
Manage Authors. You can use the values {author} and
{author_sort}, and any template function.</string>
</property>
</widget>
</item>
</layout>
</item>
<item row="0" column="1"> <item row="0" column="1">
<widget class="QCheckBox" name="opt_use_roman_numerals_for_series_number"> <widget class="QCheckBox" name="opt_use_roman_numerals_for_series_number">
<property name="text"> <property name="text">

View File

@ -357,7 +357,6 @@ class Preferences(QMainWindow):
bytearray(self.saveGeometry())) bytearray(self.saveGeometry()))
if self.committed: if self.committed:
self.gui.must_restart_before_config = self.must_restart self.gui.must_restart_before_config = self.must_restart
self.gui.tags_view.set_new_model() # in case columns changed
self.gui.tags_view.recount() self.gui.tags_view.recount()
self.gui.create_device_menu() self.gui.create_device_menu()
self.gui.set_device_menu_items_state(bool(self.gui.device_connected)) self.gui.set_device_menu_items_state(bool(self.gui.device_connected))

View File

@ -173,7 +173,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
def refresh_gui(self, gui): def refresh_gui(self, gui):
gui.set_highlight_only_button_icon() gui.set_highlight_only_button_icon()
if self.muc_changed: if self.muc_changed:
gui.tags_view.set_new_model() gui.tags_view.recount()
gui.search.search_as_you_type(config['search_as_you_type']) gui.search.search_as_you_type(config['search_as_you_type'])
gui.search.do_search() gui.search.do_search()

View File

@ -6,11 +6,7 @@ __license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>' __copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
from contextlib import closing
from lxml import html
from calibre import browser
from calibre.gui2.store.basic_config import BasicStoreConfig from calibre.gui2.store.basic_config import BasicStoreConfig
from calibre.gui2.store.opensearch_store import OpenSearchStore from calibre.gui2.store.opensearch_store import OpenSearchStore
from calibre.gui2.store.search_result import SearchResult from calibre.gui2.store.search_result import SearchResult
@ -19,9 +15,9 @@ class ArchiveOrgStore(BasicStoreConfig, OpenSearchStore):
open_search_url = 'http://bookserver.archive.org/catalog/opensearch.xml' open_search_url = 'http://bookserver.archive.org/catalog/opensearch.xml'
web_url = 'http://www.archive.org/details/texts' web_url = 'http://www.archive.org/details/texts'
# http://bookserver.archive.org/catalog/ # http://bookserver.archive.org/catalog/
def search(self, query, max_results=10, timeout=60): def search(self, query, max_results=10, timeout=60):
for s in OpenSearchStore.search(self, query, max_results, timeout): for s in OpenSearchStore.search(self, query, max_results, timeout):
s.detail_item = 'http://www.archive.org/details/' + s.detail_item.split(':')[-1] s.detail_item = 'http://www.archive.org/details/' + s.detail_item.split(':')[-1]
@ -39,5 +35,5 @@ class ArchiveOrgStore(BasicStoreConfig, OpenSearchStore):
idata = html.fromstring(nf.read()) idata = html.fromstring(nf.read())
formats = ', '.join(idata.xpath('//p[@id="dl" and @class="content"]//a/text()')) formats = ', '.join(idata.xpath('//p[@id="dl" and @class="content"]//a/text()'))
search_result.formats = formats.upper() search_result.formats = formats.upper()
return True return True

View File

@ -20,7 +20,7 @@ from calibre.gui2.store import StorePlugin
from calibre.gui2.store.basic_config import BasicStoreConfig from calibre.gui2.store.basic_config import BasicStoreConfig
from calibre.gui2.store.search_result import SearchResult from calibre.gui2.store.search_result import SearchResult
from calibre.gui2.store.web_store_dialog import WebStoreDialog from calibre.gui2.store.web_store_dialog import WebStoreDialog
class ManyBooksStore(BasicStoreConfig, StorePlugin): class ManyBooksStore(BasicStoreConfig, StorePlugin):
def open(self, parent=None, detail_item=None, external=False): def open(self, parent=None, detail_item=None, external=False):
@ -29,7 +29,7 @@ class ManyBooksStore(BasicStoreConfig, StorePlugin):
detail_url = None detail_url = None
if detail_item: if detail_item:
detail_url = url + detail_item detail_url = url + detail_item
if external or self.config.get('open_external', False): if external or self.config.get('open_external', False):
open_url(QUrl(url_slash_cleaner(detail_url if detail_url else url))) open_url(QUrl(url_slash_cleaner(detail_url if detail_url else url)))
else: else:
@ -44,16 +44,16 @@ class ManyBooksStore(BasicStoreConfig, StorePlugin):
# secondary titles. Google is also faster. # secondary titles. Google is also faster.
# Using a google search so we can search on both fields at once. # Using a google search so we can search on both fields at once.
url = 'http://www.google.com/xhtml?q=site:manybooks.net+' + urllib.quote_plus(query) url = 'http://www.google.com/xhtml?q=site:manybooks.net+' + urllib.quote_plus(query)
br = browser() br = browser()
counter = max_results counter = max_results
with closing(br.open(url, timeout=timeout)) as f: with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read()) doc = html.fromstring(f.read())
for data in doc.xpath('//div[@class="edewpi"]//div[@class="r ld"]'): for data in doc.xpath('//div[@class="edewpi"]//div[@class="r ld"]'):
if counter <= 0: if counter <= 0:
break break
url = '' url = ''
url_a = data.xpath('div[@class="jd"]/a') url_a = data.xpath('div[@class="jd"]/a')
if url_a: if url_a:
@ -65,13 +65,13 @@ class ManyBooksStore(BasicStoreConfig, StorePlugin):
continue continue
id = url.split('/')[-1] id = url.split('/')[-1]
id = id.strip() id = id.strip()
url_a = html.fromstring(html.tostring(url_a)) url_a = html.fromstring(html.tostring(url_a))
heading = ''.join(url_a.xpath('//text()')) heading = ''.join(url_a.xpath('//text()'))
title, _, author = heading.rpartition('by ') title, _, author = heading.rpartition('by ')
author = author.split('-')[0] author = author.split('-')[0]
price = '$0.00' price = '$0.00'
cover_url = '' cover_url = ''
mo = re.match('^\D+', id) mo = re.match('^\D+', id)
if mo: if mo:
@ -79,10 +79,9 @@ class ManyBooksStore(BasicStoreConfig, StorePlugin):
cover_name = cover_name.replace('etext', '') cover_name = cover_name.replace('etext', '')
cover_id = id.split('.')[0] cover_id = id.split('.')[0]
cover_url = 'http://www.manybooks.net/images/' + id[0] + '/' + cover_name + '/' + cover_id + '-thumb.jpg' cover_url = 'http://www.manybooks.net/images/' + id[0] + '/' + cover_name + '/' + cover_id + '-thumb.jpg'
print(cover_url)
counter -= 1 counter -= 1
s = SearchResult() s = SearchResult()
s.cover_url = cover_url s.cover_url = cover_url
s.title = title.strip() s.title = title.strip()
@ -91,5 +90,5 @@ class ManyBooksStore(BasicStoreConfig, StorePlugin):
s.detail_item = '/titles/' + id s.detail_item = '/titles/' + id
s.drm = SearchResult.DRM_UNLOCKED s.drm = SearchResult.DRM_UNLOCKED
s.formts = 'EPUB, PDB (eReader, PalmDoc, zTXT, Plucker, iSilo), FB2, ZIP, AZW, MOBI, PRC, LIT, PKG, PDF, TXT, RB, RTF, LRF, TCR, JAR' s.formts = 'EPUB, PDB (eReader, PalmDoc, zTXT, Plucker, iSilo), FB2, ZIP, AZW, MOBI, PRC, LIT, PKG, PDF, TXT, RB, RTF, LRF, TCR, JAR'
yield s yield s

File diff suppressed because it is too large Load Diff

View File

@ -91,10 +91,10 @@ class TagBrowserMixin(object): # {{{
# Add the new category # Add the new category
user_cats[new_cat] = [] user_cats[new_cat] = []
db.prefs.set('user_categories', user_cats) db.prefs.set('user_categories', user_cats)
self.tags_view.set_new_model() self.tags_view.recount()
m = self.tags_view.model() m = self.tags_view.model()
idx = m.index_for_path(m.find_category_node('@' + new_cat)) idx = m.index_for_path(m.find_category_node('@' + new_cat))
m.show_item_at_index(idx) self.tags_view.show_item_at_index(idx)
# Open the editor on the new item to rename it # Open the editor on the new item to rename it
if new_category_name is None: if new_category_name is None:
self.tags_view.edit(idx) self.tags_view.edit(idx)
@ -111,7 +111,7 @@ class TagBrowserMixin(object): # {{{
for k in d.categories: for k in d.categories:
db.field_metadata.add_user_category('@' + k, k) db.field_metadata.add_user_category('@' + k, k)
db.data.change_search_locations(db.field_metadata.get_search_terms()) db.data.change_search_locations(db.field_metadata.get_search_terms())
self.tags_view.set_new_model() self.tags_view.recount()
def do_delete_user_category(self, category_name): def do_delete_user_category(self, category_name):
''' '''
@ -144,7 +144,7 @@ class TagBrowserMixin(object): # {{{
elif k.startswith(category_name + '.'): elif k.startswith(category_name + '.'):
del user_cats[k] del user_cats[k]
db.prefs.set('user_categories', user_cats) db.prefs.set('user_categories', user_cats)
self.tags_view.set_new_model() self.tags_view.recount()
def do_del_item_from_user_cat(self, user_cat, item_name, item_category): def do_del_item_from_user_cat(self, user_cat, item_name, item_category):
''' '''
@ -262,20 +262,22 @@ class TagBrowserMixin(object): # {{{
self.library_view.select_rows(ids) self.library_view.select_rows(ids)
# refreshing the tags view happens at the emit()/call() site # refreshing the tags view happens at the emit()/call() site
def do_author_sort_edit(self, parent, id, select_sort=True): def do_author_sort_edit(self, parent, id, select_sort=True, select_link=False):
''' '''
Open the manage authors dialog Open the manage authors dialog
''' '''
db = self.library_view.model().db db = self.library_view.model().db
editor = EditAuthorsDialog(parent, db, id, select_sort) editor = EditAuthorsDialog(parent, db, id, select_sort, select_link)
d = editor.exec_() d = editor.exec_()
if d: if d:
for (id, old_author, new_author, new_sort) in editor.result: for (id, old_author, new_author, new_sort, new_link) in editor.result:
if old_author != new_author: if old_author != new_author:
# The id might change if the new author already exists # The id might change if the new author already exists
id = db.rename_author(id, new_author) id = db.rename_author(id, new_author)
db.set_sort_field_for_author(id, unicode(new_sort), db.set_sort_field_for_author(id, unicode(new_sort),
commit=False, notify=False) commit=False, notify=False)
db.set_link_field_for_author(id, unicode(new_link),
commit=False, notify=False)
db.commit() db.commit()
self.library_view.model().refresh() self.library_view.model().refresh()
self.tags_view.recount() self.tags_view.recount()
@ -413,13 +415,14 @@ class TagBrowserWidget(QWidget): # {{{
txt = unicode(self.item_search.currentText()).strip() txt = unicode(self.item_search.currentText()).strip()
if txt.startswith('*'): if txt.startswith('*'):
self.tags_view.set_new_model(filter_categories_by=txt[1:]) model.filter_categories_by = txt[1:]
self.tags_view.recount()
self.current_find_position = None self.current_find_position = None
return return
if model.get_filter_categories_by(): if model.filter_categories_by:
self.tags_view.set_new_model(filter_categories_by=None) model.filter_categories_by = None
self.tags_view.recount()
self.current_find_position = None self.current_find_position = None
model = self.tags_view.model()
if not txt: if not txt:
return return
@ -437,8 +440,9 @@ class TagBrowserWidget(QWidget): # {{{
self.current_find_position = \ self.current_find_position = \
model.find_item_node(key, txt, self.current_find_position) model.find_item_node(key, txt, self.current_find_position)
if self.current_find_position: if self.current_find_position:
model.show_item_at_path(self.current_find_position, box=True) self.tags_view.show_item_at_path(self.current_find_position, box=True)
elif self.item_search.text(): elif self.item_search.text():
self.not_found_label.setVisible(True) self.not_found_label.setVisible(True)
if self.tags_view.verticalScrollBar().isVisible(): if self.tags_view.verticalScrollBar().isVisible():

View File

@ -7,11 +7,12 @@ __license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import cPickle, traceback import cPickle
from functools import partial from functools import partial
from itertools import izip
from PyQt4.Qt import (QItemDelegate, Qt, QTreeView, pyqtSignal, QSize, QIcon, from PyQt4.Qt import (QItemDelegate, Qt, QTreeView, pyqtSignal, QSize, QIcon,
QApplication, QMenu, QPoint) QApplication, QMenu, QPoint, QModelIndex, QToolTip, QCursor)
from calibre.gui2.tag_browser.model import (TagTreeItem, TAG_SEARCH_STATES, from calibre.gui2.tag_browser.model import (TagTreeItem, TAG_SEARCH_STATES,
TagsModel) TagsModel)
@ -65,7 +66,7 @@ class TagsView(QTreeView): # {{{
tag_list_edit = pyqtSignal(object, object) tag_list_edit = pyqtSignal(object, object)
saved_search_edit = pyqtSignal(object) saved_search_edit = pyqtSignal(object)
rebuild_saved_searches = pyqtSignal() rebuild_saved_searches = pyqtSignal()
author_sort_edit = pyqtSignal(object, object) author_sort_edit = pyqtSignal(object, object, object, object)
tag_item_renamed = pyqtSignal() tag_item_renamed = pyqtSignal()
search_item_renamed = pyqtSignal() search_item_renamed = pyqtSignal()
drag_drop_finished = pyqtSignal(object) drag_drop_finished = pyqtSignal(object)
@ -90,55 +91,59 @@ class TagsView(QTreeView): # {{{
self.setDropIndicatorShown(True) self.setDropIndicatorShown(True)
self.setAutoExpandDelay(500) self.setAutoExpandDelay(500)
self.pane_is_visible = False self.pane_is_visible = False
if gprefs['tags_browser_collapse_at'] == 0:
self.collapse_model = 'disable'
else:
self.collapse_model = gprefs['tags_browser_partition_method']
self.search_icon = QIcon(I('search.png')) self.search_icon = QIcon(I('search.png'))
self.user_category_icon = QIcon(I('tb_folder.png')) self.user_category_icon = QIcon(I('tb_folder.png'))
self.delete_icon = QIcon(I('list_remove.png')) self.delete_icon = QIcon(I('list_remove.png'))
self.rename_icon = QIcon(I('edit-undo.png')) self.rename_icon = QIcon(I('edit-undo.png'))
self._model = TagsModel(self)
self._model.search_item_renamed.connect(self.search_item_renamed)
self._model.refresh_required.connect(self.refresh_required,
type=Qt.QueuedConnection)
self._model.tag_item_renamed.connect(self.tag_item_renamed)
self._model.restriction_error.connect(self.restriction_error)
self._model.user_categories_edited.connect(self.user_categories_edited,
type=Qt.QueuedConnection)
self._model.drag_drop_finished.connect(self.drag_drop_finished)
@property
def hidden_categories(self):
return self._model.hidden_categories
@property
def db(self):
return self._model.db
@property
def collapse_model(self):
return self._model.collapse_model
def set_pane_is_visible(self, to_what): def set_pane_is_visible(self, to_what):
pv = self.pane_is_visible pv = self.pane_is_visible
self.pane_is_visible = to_what self.pane_is_visible = to_what
if to_what and not pv: if to_what and not pv:
self.recount() self.recount()
def get_state(self):
state_map = {}
expanded_categories = []
for row, category in enumerate(self._model.category_nodes):
if self.isExpanded(self._model.index(row, 0, QModelIndex())):
expanded_categories.append(category.category_key)
states = [c.tag.state for c in category.child_tags()]
names = [(c.tag.name, c.tag.category) for c in category.child_tags()]
state_map[category.category_key] = dict(izip(names, states))
return expanded_categories, state_map
def reread_collapse_parameters(self): def reread_collapse_parameters(self):
if gprefs['tags_browser_collapse_at'] == 0: self._model.reread_collapse_model(self.get_state()[1])
self.collapse_model = 'disable'
else:
self.collapse_model = gprefs['tags_browser_partition_method']
self.set_new_model(self._model.get_filter_categories_by())
def set_database(self, db, tag_match, sort_by): def set_database(self, db, tag_match, sort_by):
hidden_cats = db.prefs.get('tag_browser_hidden_categories', None) self._model.set_database(db)
self.hidden_categories = []
# migrate from config to db prefs
if hidden_cats is None:
hidden_cats = config['tag_browser_hidden_categories']
# strip out any non-existence field keys
for cat in hidden_cats:
if cat in db.field_metadata:
self.hidden_categories.append(cat)
db.prefs.set('tag_browser_hidden_categories', list(self.hidden_categories))
self.hidden_categories = set(self.hidden_categories)
old = getattr(self, '_model', None) self.pane_is_visible = True # because TagsModel.set_database did a recount
if old is not None:
old.break_cycles()
self._model = TagsModel(db, parent=self,
hidden_categories=self.hidden_categories,
search_restriction=None,
drag_drop_finished=self.drag_drop_finished,
collapse_model=self.collapse_model,
state_map={})
self.pane_is_visible = True # because TagsModel.init did a recount
self.sort_by = sort_by self.sort_by = sort_by
self.tag_match = tag_match self.tag_match = tag_match
self.db = db
self.search_restriction = None
self.setModel(self._model) self.setModel(self._model)
self.setContextMenuPolicy(Qt.CustomContextMenu) self.setContextMenuPolicy(Qt.CustomContextMenu)
pop = config['sort_tags_by'] pop = config['sort_tags_by']
@ -164,6 +169,13 @@ class TagsView(QTreeView): # {{{
self.refresh_signal_processed = False self.refresh_signal_processed = False
self.refresh_required.emit() self.refresh_required.emit()
def user_categories_edited(self, user_cats, nkey):
state_map = self.get_state()[1]
self.db.prefs.set('user_categories', user_cats)
self._model.rebuild_node_tree(state_map=state_map)
p = self._model.find_category_node('@'+nkey)
self.show_item_at_path(p)
@property @property
def match_all(self): def match_all(self):
return self.tag_match and self.tag_match.currentIndex() > 0 return self.tag_match and self.tag_match.currentIndex() > 0
@ -179,11 +191,8 @@ class TagsView(QTreeView): # {{{
pass pass
def set_search_restriction(self, s): def set_search_restriction(self, s):
if s: s = s if s else None
self.search_restriction = s self._model.set_search_restriction(s)
else:
self.search_restriction = None
self.set_new_model()
def mouseReleaseEvent(self, event): def mouseReleaseEvent(self, event):
# Swallow everything except leftButton so context menus work correctly # Swallow everything except leftButton so context menus work correctly
@ -268,23 +277,29 @@ class TagsView(QTreeView): # {{{
self.saved_search_edit.emit(category) self.saved_search_edit.emit(category)
return return
if action == 'edit_author_sort': if action == 'edit_author_sort':
self.author_sort_edit.emit(self, index) self.author_sort_edit.emit(self, index, True, False)
return
if action == 'edit_author_link':
self.author_sort_edit.emit(self, index, False, True)
return return
reset_filter_categories = True
if action == 'hide': if action == 'hide':
self.hidden_categories.add(category) self.hidden_categories.add(category)
elif action == 'show': elif action == 'show':
self.hidden_categories.discard(category) self.hidden_categories.discard(category)
elif action == 'categorization': elif action == 'categorization':
changed = self.collapse_model != category changed = self.collapse_model != category
self.collapse_model = category self._model.collapse_model = category
if changed: if changed:
self.set_new_model(self._model.get_filter_categories_by()) reset_filter_categories = False
gprefs['tags_browser_partition_method'] = category gprefs['tags_browser_partition_method'] = category
elif action == 'defaults': elif action == 'defaults':
self.hidden_categories.clear() self.hidden_categories.clear()
self.db.prefs.set('tag_browser_hidden_categories', list(self.hidden_categories)) self.db.prefs.set('tag_browser_hidden_categories', list(self.hidden_categories))
self.set_new_model() if reset_filter_categories:
self._model.filter_categories_by = None
self._model.rebuild_node_tree()
except: except:
return return
@ -334,6 +349,9 @@ class TagsView(QTreeView): # {{{
self.context_menu.addAction(_('Edit sort for %s')%display_name(tag), self.context_menu.addAction(_('Edit sort for %s')%display_name(tag),
partial(self.context_menu_handler, partial(self.context_menu_handler,
action='edit_author_sort', index=tag.id)) action='edit_author_sort', index=tag.id))
self.context_menu.addAction(_('Edit link for %s')%display_name(tag),
partial(self.context_menu_handler,
action='edit_author_link', index=tag.id))
# is_editable is also overloaded to mean 'can be added # is_editable is also overloaded to mean 'can be added
# to a user category' # to a user category'
@ -475,10 +493,25 @@ class TagsView(QTreeView): # {{{
pa.setCheckable(True) pa.setCheckable(True)
pa.setChecked(True) pa.setChecked(True)
if config['sort_tags_by'] != "name":
fla.setEnabled(False)
m.hovered.connect(self.collapse_menu_hovered)
fla.setToolTip(_('First letter is usable only when sorting by name'))
# Apparently one cannot set a tooltip to empty, so use a star and
# deal with it in the hover method
da.setToolTip('*')
pa.setToolTip('*')
if not self.context_menu.isEmpty(): if not self.context_menu.isEmpty():
self.context_menu.popup(self.mapToGlobal(point)) self.context_menu.popup(self.mapToGlobal(point))
return True return True
def collapse_menu_hovered(self, action):
tip = action.toolTip()
if tip == '*':
tip = ''
QToolTip.showText(QCursor.pos(), tip)
def dragMoveEvent(self, event): def dragMoveEvent(self, event):
QTreeView.dragMoveEvent(self, event) QTreeView.dragMoveEvent(self, event)
self.setDropIndicatorShown(False) self.setDropIndicatorShown(False)
@ -487,6 +520,8 @@ class TagsView(QTreeView): # {{{
return return
src_is_tb = event.mimeData().hasFormat('application/calibre+from_tag_browser') src_is_tb = event.mimeData().hasFormat('application/calibre+from_tag_browser')
item = index.data(Qt.UserRole).toPyObject() item = index.data(Qt.UserRole).toPyObject()
if item.type == TagTreeItem.ROOT:
return
flags = self._model.flags(index) flags = self._model.flags(index)
if item.type == TagTreeItem.TAG and flags & Qt.ItemIsDropEnabled: if item.type == TagTreeItem.TAG and flags & Qt.ItemIsDropEnabled:
self.setDropIndicatorShown(not src_is_tb) self.setDropIndicatorShown(not src_is_tb)
@ -537,11 +572,33 @@ class TagsView(QTreeView): # {{{
if not ci.isValid(): if not ci.isValid():
ci = self.indexAt(QPoint(10, 10)) ci = self.indexAt(QPoint(10, 10))
path = self.model().path_for_index(ci) if self.is_visible(ci) else None path = self.model().path_for_index(ci) if self.is_visible(ci) else None
expanded_categories, state_map = self.model().get_state() expanded_categories, state_map = self.get_state()
self.set_new_model(state_map=state_map) self._model.rebuild_node_tree(state_map=state_map)
for category in expanded_categories: for category in expanded_categories:
self.expand(self.model().index_for_category(category)) self.expand(self._model.index_for_category(category))
self._model.show_item_at_path(path) self.show_item_at_path(path)
def show_item_at_path(self, path, box=False,
position=QTreeView.PositionAtCenter):
'''
Scroll the browser and open categories to show the item referenced by
path. If possible, the item is placed in the center. If box=True, a
box is drawn around the item.
'''
if path:
self.show_item_at_index(self._model.index_for_path(path), box=box,
position=position)
def show_item_at_index(self, idx, box=False,
position=QTreeView.PositionAtCenter):
if idx.isValid() and idx.data(Qt.UserRole).toPyObject() is not self._model.root_item:
self.expand(self._model.parent(idx)) # Needed otherwise Qt sometimes segfaults if the
# node is buried in a collapsed, off
# screen hierarchy
self.setCurrentIndex(idx)
self.scrollTo(idx, position)
if box:
self._model.set_boxed(idx)
def item_expanded(self, idx): def item_expanded(self, idx):
''' '''
@ -549,30 +606,6 @@ class TagsView(QTreeView): # {{{
''' '''
self.setCurrentIndex(idx) self.setCurrentIndex(idx)
def set_new_model(self, filter_categories_by=None, state_map={}):
'''
There are cases where we need to rebuild the category tree without
attempting to reposition the current node.
'''
try:
old = getattr(self, '_model', None)
if old is not None:
old.break_cycles()
self._model = TagsModel(self.db, parent=self,
hidden_categories=self.hidden_categories,
search_restriction=self.search_restriction,
drag_drop_finished=self.drag_drop_finished,
filter_categories_by=filter_categories_by,
collapse_model=self.collapse_model,
state_map=state_map)
self.setModel(self._model)
except:
# The DB must be gone. Set the model to None and hope that someone
# will call set_database later. I don't know if this in fact works.
# But perhaps a Bad Thing Happened, so print the exception
traceback.print_exc()
self._model = None
self.setModel(None)
# }}} # }}}

View File

@ -367,7 +367,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
'uuid', 'uuid',
'has_cover', 'has_cover',
('au_map', 'authors', 'author', ('au_map', 'authors', 'author',
'aum_sortconcat(link.id, authors.name, authors.sort)'), 'aum_sortconcat(link.id, authors.name, authors.sort, authors.link)'),
'last_modified', 'last_modified',
'(SELECT identifiers_concat(type, val) FROM identifiers WHERE identifiers.book=books.id) identifiers', '(SELECT identifiers_concat(type, val) FROM identifiers WHERE identifiers.book=books.id) identifiers',
] ]
@ -894,13 +894,17 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
aut_list = [] aut_list = []
aum = [] aum = []
aus = {} aus = {}
for (author, author_sort) in aut_list: aul = {}
aum.append(author.replace('|', ',')) for (author, author_sort, link) in aut_list:
aus[author] = author_sort.replace('|', ',') aut = author.replace('|', ',')
aum.append(aut)
aus[aut] = author_sort.replace('|', ',')
aul[aut] = link
mi.title = row[fm['title']] mi.title = row[fm['title']]
mi.authors = aum mi.authors = aum
mi.author_sort = row[fm['author_sort']] mi.author_sort = row[fm['author_sort']]
mi.author_sort_map = aus mi.author_sort_map = aus
mi.author_link_map = aul
mi.comments = row[fm['comments']] mi.comments = row[fm['comments']]
mi.publisher = row[fm['publisher']] mi.publisher = row[fm['publisher']]
mi.timestamp = row[fm['timestamp']] mi.timestamp = row[fm['timestamp']]
@ -1245,6 +1249,9 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
ret = tempfile.SpooledTemporaryFile(max_size=SPOOL_SIZE) ret = tempfile.SpooledTemporaryFile(max_size=SPOOL_SIZE)
shutil.copyfileobj(f, ret) shutil.copyfileobj(f, ret)
ret.seek(0) ret.seek(0)
# Various bits of code try to use the name as the default
# title when reading metadata, so set it
ret.name = f.name
else: else:
ret = f.read() ret = f.read()
return ret return ret
@ -1442,7 +1449,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
raise ValueError('sort ' + sort + ' not a valid value') raise ValueError('sort ' + sort + ' not a valid value')
self.books_list_filter.change([] if not ids else ids) self.books_list_filter.change([] if not ids else ids)
id_filter = None if not ids else frozenset(ids) id_filter = None if ids is None else frozenset(ids)
tb_cats = self.field_metadata tb_cats = self.field_metadata
tcategories = {} tcategories = {}
@ -1520,7 +1527,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
rating_dex = self.FIELD_MAP['rating'] rating_dex = self.FIELD_MAP['rating']
tag_class = LibraryDatabase2.TCat_Tag tag_class = LibraryDatabase2.TCat_Tag
for book in self.data.iterall(): for book in self.data.iterall():
if id_filter and book[id_dex] not in id_filter: if id_filter is not None and book[id_dex] not in id_filter:
continue continue
rating = book[rating_dex] rating = book[rating_dex]
# We kept track of all possible category field_map positions above # We kept track of all possible category field_map positions above
@ -2038,13 +2045,13 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
def authors_with_sort_strings(self, id, index_is_id=False): def authors_with_sort_strings(self, id, index_is_id=False):
id = id if index_is_id else self.id(id) id = id if index_is_id else self.id(id)
aut_strings = self.conn.get(''' aut_strings = self.conn.get('''
SELECT authors.id, authors.name, authors.sort SELECT authors.id, authors.name, authors.sort, authors.link
FROM authors, books_authors_link as bl FROM authors, books_authors_link as bl
WHERE bl.book=? and authors.id=bl.author WHERE bl.book=? and authors.id=bl.author
ORDER BY bl.id''', (id,)) ORDER BY bl.id''', (id,))
result = [] result = []
for (id_, author, sort,) in aut_strings: for (id_, author, sort, link) in aut_strings:
result.append((id_, author.replace('|', ','), sort)) result.append((id_, author.replace('|', ','), sort, link))
return result return result
# Given a book, return the author_sort string for authors of the book # Given a book, return the author_sort string for authors of the book
@ -2084,7 +2091,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
aum = self.authors_with_sort_strings(id_, index_is_id=True) aum = self.authors_with_sort_strings(id_, index_is_id=True)
self.data.set(id_, self.FIELD_MAP['au_map'], self.data.set(id_, self.FIELD_MAP['au_map'],
':#:'.join([':::'.join((au.replace(',', '|'), aus)) for (_, au, aus) in aum]), ':#:'.join([':::'.join((au.replace(',', '|'), aus, aul))
for (_, au, aus, aul) in aum]),
row_is_id=True) row_is_id=True)
def _set_authors(self, id, authors, allow_case_change=False): def _set_authors(self, id, authors, allow_case_change=False):
@ -2435,7 +2443,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
self.conn.commit() self.conn.commit()
def get_authors_with_ids(self): def get_authors_with_ids(self):
result = self.conn.get('SELECT id,name,sort FROM authors') result = self.conn.get('SELECT id,name,sort,link FROM authors')
if not result: if not result:
return [] return []
return result return result
@ -2446,6 +2454,13 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
(author,), all=False) (author,), all=False)
return result return result
def set_link_field_for_author(self, aid, link, commit=True, notify=False):
if not link:
link = ''
self.conn.execute('UPDATE authors SET link=? WHERE id=?', (link.strip(), aid))
if commit:
self.conn.commit()
def set_sort_field_for_author(self, old_id, new_sort, commit=True, notify=False): def set_sort_field_for_author(self, old_id, new_sort, commit=True, notify=False):
self.conn.execute('UPDATE authors SET sort=? WHERE id=?', \ self.conn.execute('UPDATE authors SET sort=? WHERE id=?', \
(new_sort.strip(), old_id)) (new_sort.strip(), old_id))

View File

@ -53,6 +53,7 @@ class Restore(Thread):
self.mismatched_dirs = [] self.mismatched_dirs = []
self.successes = 0 self.successes = 0
self.tb = None self.tb = None
self.authors_links = {}
@property @property
def errors_occurred(self): def errors_occurred(self):
@ -160,6 +161,12 @@ class Restore(Thread):
else: else:
self.mismatched_dirs.append(dirpath) self.mismatched_dirs.append(dirpath)
alm = mi.get('author_link_map', {})
for author, link in alm.iteritems():
existing_link, timestamp = self.authors_links.get(author, (None, None))
if existing_link is None or existing_link != link and timestamp < mi.timestamp:
self.authors_links[author] = (link, mi.timestamp)
def create_cc_metadata(self): def create_cc_metadata(self):
self.books.sort(key=itemgetter('timestamp')) self.books.sort(key=itemgetter('timestamp'))
self.custom_columns = {} self.custom_columns = {}
@ -206,6 +213,11 @@ class Restore(Thread):
self.failed_restores.append((book, traceback.format_exc())) self.failed_restores.append((book, traceback.format_exc()))
self.progress_callback(book['mi'].title, i+1) self.progress_callback(book['mi'].title, i+1)
for author in self.authors_links.iterkeys():
link, ign = self.authors_links[author]
db.conn.execute('UPDATE authors SET link=? WHERE name=?',
(link, author.replace(',', '|')))
db.conn.commit()
db.conn.close() db.conn.close()
def restore_book(self, book, db): def restore_book(self, book, db):

View File

@ -600,4 +600,14 @@ class SchemaUpgrade(object):
with open(os.path.join(bdir, fname), 'wb') as f: with open(os.path.join(bdir, fname), 'wb') as f:
f.write(script) f.write(script)
def upgrade_version_20(self):
'''
Add a link column to the authors table.
'''
script = '''
ALTER TABLE authors ADD COLUMN link TEXT NOT NULL DEFAULT "";
'''
self.conn.executescript(script)

View File

@ -144,9 +144,9 @@ class AumSortedConcatenate(object):
def __init__(self): def __init__(self):
self.ans = {} self.ans = {}
def step(self, ndx, author, sort): def step(self, ndx, author, sort, link):
if author is not None: if author is not None:
self.ans[ndx] = author + ':::' + sort self.ans[ndx] = ':::'.join((author, sort, link))
def finalize(self): def finalize(self):
keys = self.ans.keys() keys = self.ans.keys()
@ -229,7 +229,7 @@ class DBThread(Thread):
load_c_extensions(self.conn) load_c_extensions(self.conn)
self.conn.row_factory = sqlite.Row if self.row_factory else lambda cursor, row : list(row) self.conn.row_factory = sqlite.Row if self.row_factory else lambda cursor, row : list(row)
self.conn.create_aggregate('concat', 1, Concatenate) self.conn.create_aggregate('concat', 1, Concatenate)
self.conn.create_aggregate('aum_sortconcat', 3, AumSortedConcatenate) self.conn.create_aggregate('aum_sortconcat', 4, AumSortedConcatenate)
self.conn.create_collation('PYNOCASE', partial(pynocase, self.conn.create_collation('PYNOCASE', partial(pynocase,
encoding=encoding)) encoding=encoding))
self.conn.create_function('title_sort', 1, title_sort) self.conn.create_function('title_sort', 1, title_sort)

View File

@ -558,11 +558,16 @@ Most readers do not support this. You should complain to the manufacturer about
Another alternative is to create a catalog in ebook form containing a listing of all the books in your calibre library, with their metadata. Click the arrow next to the convert button to access the catalog creation tool. And before you ask, no you cannot have the catalog "link directly to" books on your reader. Another alternative is to create a catalog in ebook form containing a listing of all the books in your calibre library, with their metadata. Click the arrow next to the convert button to access the catalog creation tool. And before you ask, no you cannot have the catalog "link directly to" books on your reader.
How do I get |app| to use my HTTP proxy?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
By default, |app| uses whatever proxy settings are set in your OS. Sometimes these are incorrect, for example, on windows if you don't use Internet Explorer then the proxy settings may not be up to date. You can tell |app| to use a particular proxy server by setting the http_proxy environment variable. The format of the variable is: http://username:password@servername you should ask your network admin to give you the correct value for this variable. Note that |app| only supports HTTP proxies not SOCKS proxies. You can see the current proxies used by |app| in Preferences->Miscellaneous.
I want some feature added to |app|. What can I do? I want some feature added to |app|. What can I do?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
You have two choices: You have two choices:
1. Create a patch by hacking on |app| and send it to me for review and inclusion. See `Development <http://calibre-ebook.com/get-involved>`_. 1. Create a patch by hacking on |app| and send it to me for review and inclusion. See `Development <http://calibre-ebook.com/get-involved>`_.
2. `Open a ticket <http://calibre-ebook.com/bugs>`_ (you have to register and login first). Remember that |app| development is done by volunteers, so if you get no response to your feature request, it means no one feels like implementing it. 2. `Open a bug requesting the feature <http://calibre-ebook.com/bugs>`_ . Remember that |app| development is done by volunteers, so if you get no response to your feature request, it means no one feels like implementing it.
Why doesn't |app| have an automatic update? Why doesn't |app| have an automatic update?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

View File

@ -347,5 +347,6 @@ class EvalFormatter(TemplateFormatter):
key = key.lower() key = key.lower()
return kwargs.get(key, _('No such variable ') + key) return kwargs.get(key, _('No such variable ') + key)
# DEPRECATED. This is not thread safe. Do not use.
eval_formatter = EvalFormatter() eval_formatter = EvalFormatter()

View File

@ -202,9 +202,9 @@ class BuiltinEval(BuiltinFormatterFunction):
'results from local variables.') 'results from local variables.')
def evaluate(self, formatter, kwargs, mi, locals, template): def evaluate(self, formatter, kwargs, mi, locals, template):
from formatter import eval_formatter from formatter import EvalFormatter
template = template.replace('[[', '{').replace(']]', '}') template = template.replace('[[', '{').replace(']]', '}')
return eval_formatter.safe_format(template, locals, 'EVAL', None) return EvalFormatter().safe_format(template, locals, 'EVAL', None)
class BuiltinAssign(BuiltinFormatterFunction): class BuiltinAssign(BuiltinFormatterFunction):
name = 'assign' name = 'assign'