merge from trunk

This commit is contained in:
Lee 2011-05-14 21:39:08 +08:00
commit 11fb6cca1b
175 changed files with 91136 additions and 91392 deletions

View File

@ -30,3 +30,4 @@ nbproject/
.project
.pydevproject
.settings/
*.DS_Store

View File

@ -19,6 +19,90 @@
# new recipes:
# - title:
- version: 0.8.1
date: 2011-05-13
new features:
- title: "Add Amazon DE, Beam EBooks, Beam DE, Weightless Books, Wizards Tower Books to the list of ebook stores searched by Get Books"
- title: "TXT output: All new Textile output with much greater preservation of formatting from the input document"
- title: "Migrate metadata plugin for Douban Books to the 0.8 API"
- title: "Driver for Dell Streak on windows"
- title: "Add menu items to Get Books action to search by title and author of current book"
- title: "Add title_sort as available field to CSV/XML catalogs"
- title: "Add a context menu to the manage authors dialog"
- title: "Add a button to paste isbn into the identifiers field in the edit metadata dialog automatically"
bug fixes:
- title: "Amazon metadata download plugin: Fix links being stripped from comments. Also fix ratings/isbn not being parsed from kindle edition pages."
tickets: [782012]
- title: "Fix one source of segfaults on shutdown in the linux binary builds."
- title: "Allow the use of condensed/expanded fonts as interface fonts"
- title: "EPUB Input: Ignore missing cover file when converting, instead of erroring out."
tickets: [781848]
- title: "Fix custom identifier being erased by metadata download"
tickets: [781759]
- title: "Fix regression that broke various things when using Japanese language calibre on windows"
tickets: [780804]
- title: "RTF Input: Handle null color codes correctly"
tickets: [780728]
- title: "ODT Input: Handle inline special styles defined on <text:span> tags."
tickets: [780250]
- title: "Fix error when pressing next previous button with an empty search in the Plugins preferences"
tickets: [781135]
- title: "Ignore 'Unknown' author when downloading metadata."
tickets: [779348]
- title: "Fix timezone bug when setting dates in the edit metadata dialog"
tickets: [779497]
- title: "Fix ebook-convert not recognizing output paths starting with .."
tickets: [779322]
improved recipes:
- "Strategy+Business"
- Readers Digest
- Ming Pao
- Telepolis
- Fronda
- Rzeczpospolita
new recipes:
- title: "Various Taiwanese news sources"
author: Eddie Lau
- title: Replica Vedetelor, Ziua Veche
author: Silviu Cotoara
- title: Welt der Physik
author: schuster
- title: Korea Herald
author: Seongkyoun Yoo
- version: 0.8.0
date: 2010-05-06
new features:
- title: "Go to http://calibre-ebook.com/new-in/eight to see what's new in 0.8.0"
type: major
- version: 0.7.59
date: 2011-04-30

View File

@ -93,7 +93,7 @@ class Arcamax(BasicNewsRecipe):
for page in pages:
page_soup = self.index_to_soup(url)
if page_soup:
title = page_soup.find(name='div', attrs={'class':'comics-header'}).h1.contents[0]
title = self.tag_to_string(page_soup.find(name='div', attrs={'class':'comics-header'}).h1.contents[0])
page_url = url
# orig prev_page_url = 'http://www.arcamax.com' + page_soup.find('a', attrs={'class':'prev'}, text='Previous').parent['href']
prev_page_url = 'http://www.arcamax.com' + page_soup.find('span', text='Previous').parent.parent['href']
@ -127,4 +127,3 @@ class Arcamax(BasicNewsRecipe):
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''

View File

@ -0,0 +1,42 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
# dug from http://www.mobileread.com/forums/showthread.php?p=1012294
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1277443634(BasicNewsRecipe):
title = u'中時電子報'
oldest_article = 1
max_articles_per_feed = 100
feeds = [(u'焦點', u'http://rss.chinatimes.com/rss/focus-u.rss'),
(u'政治', u'http://rss.chinatimes.com/rss/Politic-u.rss'),
(u'社會', u'http://rss.chinatimes.com/rss/social-u.rss'),
(u'國際', u'http://rss.chinatimes.com/rss/international-u.rss'),
(u'兩岸', u'http://rss.chinatimes.com/rss/mainland-u.rss'),
(u'地方', u'http://rss.chinatimes.com/rss/local-u.rss'),
(u'言論', u'http://rss.chinatimes.com/rss/comment-u.rss'),
(u'科技', u'http://rss.chinatimes.com/rss/technology-u.rss'),
(u'運動', u'http://rss.chinatimes.com/rss/sport-u.rss'),
(u'藝文', u'http://rss.chinatimes.com/rss/philology-u.rss'),
#(u'旺報', u'http://rss.chinatimes.com/rss/want-u.rss'),
#(u'財經', u'http://rss.chinatimes.com/rss/finance-u.rss'), # broken links
#(u'股市', u'http://rss.chinatimes.com/rss/stock-u.rss') # broken links
]
__author__ = 'einstuerzende, updated by Eddie Lau'
__version__ = '1.0'
language = 'zh'
publisher = 'China Times Group'
description = 'China Times (Taiwan)'
category = 'News, Chinese, Taiwan'
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
encoding = 'big5'
conversion_options = {'linearize_tables':True}
masthead_url = 'http://www.fcuaa.org/gif/chinatimeslogo.gif'
cover_url = 'http://www.fcuaa.org/gif/chinatimeslogo.gif'
keep_only_tags = [dict(name='div', attrs={'class':['articlebox','articlebox clearfix']})]
remove_tags = [dict(name='div', attrs={'class':['focus-news']})]

53
recipes/divahair.recipe Normal file
View File

@ -0,0 +1,53 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
divahair.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class DivaHair(BasicNewsRecipe):
title = u'Diva Hair'
language = 'ro'
__author__ = u'Silviu Cotoar\u0103'
description = u'Coafuri, frizuri, tunsori ..'
publisher = u'Diva Hair'
category = u'Ziare,Stiri,Coafuri,Femei'
oldest_article = 5
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'utf-8'
remove_javascript = True
cover_url = 'http://www.divahair.ro/imgs/logo.jpg'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='td', attrs={'class':'spatiuart'})
, dict(name='div', attrs={'class':'spatiuart'})
]
remove_tags = [
dict(name='div', attrs={'class':'categorie'})
, dict(name='div', attrs={'class':'gri gri2 detaliiart'})
, dict(name='div', attrs={'class':'articol_box_bottom'})
]
remove_tags_after = [
dict(name='div', attrs={'class':'articol_box_bottom'})
]
feeds = [ (u'\u0218tiri', u'http://www.divahair.ro/feed') ]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,64 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
'''
www.financialsense.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class FinancialSense(BasicNewsRecipe):
title = 'Financial Sense'
__author__ = 'Darko Miletic'
description = 'Uncommon News & Views for the Wise Investor'
publisher = 'Financial Sense'
category = 'news, finances, politics, USA'
oldest_article = 2
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'utf8'
use_embedded_content = False
language = 'en'
remove_empty_feeds = True
publication_type = 'newsportal'
masthead_url = 'http://www.financialsense.com/sites/default/files/logo.jpg'
extra_css = """
body{font-family: Arial,"Helvetica Neue",Helvetica,sans-serif }
img{margin-bottom: 0.4em; display:block}
h2{color: gray}
.name{margin-right: 5em}
"""
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
remove_tags =[dict(name=['meta','link','base','object','embed','iframe'])]
remove_tags_after=dict(attrs={'class':'vcard'})
keep_only_tags =[dict(attrs={'class':['title','post-meta','content','item-title','vcard']})]
remove_attributes=['lang','type']
feeds = [(u'Articles', u'http://feeds.feedburner.com/fso')]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('a'):
limg = item.find('img')
if item.string is not None:
str = item.string
item.replaceWith(str)
else:
if limg:
item.name = 'div'
item.attrs = []
else:
str = self.tag_to_string(item)
item.replaceWith(str)
for item in soup.findAll('img'):
if not item.has_key('alt'):
item['alt'] = 'image'
return soup

View File

@ -21,14 +21,19 @@ class Fronda(BasicNewsRecipe):
feeds = [(u'Infformacje', u'http://fronda.pl/news/feed')]
keep_only_tags = [dict(name='h1', attrs={'class':'big'}),
dict(name='ul', attrs={'class':'about clear'}),
dict(name='div', attrs={'class':'content'})]
keep_only_tags = [dict(name='h2', attrs={'class':'news_title'}),
dict(name='div', attrs={'class':'naglowek_tresc'}),
dict(name='div', attrs={'id':'czytaj'}) ]
remove_tags = [dict(name='a', attrs={'class':'print'})]
preprocess_regexps = [
(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
[ (r'<a href="#" class="print">Drukuj</a>', lambda match: ''),
(r'<p><a href="http://fronda.pl/sklepy">.*</a></p>', lambda match: ''),
[ (r'<p><a href="http://fronda.pl/sklepy">.*</a></p>', lambda match: ''),
(r'<p><a href="http://fronda.pl/pasaz">.*</a></p>', lambda match: ''),
(r'<h3><strong>W.* lektury.*</a></p></div>', lambda match: '</div>'),
(r'<h3>Zobacz t.*?</div>', lambda match: '</div>') ]
(r'<h3>Zobacz t.*?</div>', lambda match: '</div>'),
(r'<p[^>]*>&nbsp;</p>', lambda match: ''),
(r'<p><span style=".*?"><br /></span></p> ', lambda match: ''),
(r'<a style=\'float:right;margin-top:3px;\' href="http://www.facebook.com/share.php?.*?</a>', lambda match: '')]
]

BIN
recipes/icons/divahair.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 675 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 702 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 KiB

BIN
recipes/icons/mayra.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 620 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 243 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 837 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 709 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 KiB

BIN
recipes/icons/ziuaveche.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 554 B

View File

@ -0,0 +1,79 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
'''
www.iprofesional.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class iProfesional(BasicNewsRecipe):
title = 'iProfesional.com'
__author__ = 'Darko Miletic'
description = 'Las ultimas noticias sobre profesionales'
publisher = 'Emprendimientos Corporativos S.A.'
category = 'news, IT, impuestos, negocios, politics, Argentina'
oldest_article = 2
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'utf8'
use_embedded_content = False
language = 'es_AR'
remove_empty_feeds = True
publication_type = 'nesportal'
masthead_url = 'http://www.iprofesional.com/img/logo-iprofesional.png'
extra_css = """
body{font-family: Arial,Helvetica,sans-serif }
img{margin-bottom: 0.4em; display:block}
.titulo-interior{font-family: Georgia,"Times New Roman",Times,serif}
.autor-nota{font-size: small; font-weight: bold; font-style: italic; color: gray}
"""
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
keep_only_tags = [dict(attrs={'class':['fecha','interior-nota']})]
remove_tags = [
dict(name=['meta','link','base','embed','object','iframe'])
,dict(attrs={'class':['menu-imprimir','guardarNota','IN-widget','fin','permalink']})
]
remove_attributes=['lang','xmlns:og','xmlns:fb']
feeds = [
(u'Ultimas noticias' , u'http://feeds.feedburner.com/iprofesional-principales-noticias')
,(u'Finanzas' , u'http://feeds.feedburner.com/iprofesional-finanzas' )
,(u'Impuestos' , u'http://feeds.feedburner.com/iprofesional-impuestos' )
,(u'Negocios' , u'http://feeds.feedburner.com/iprofesional-economia' )
,(u'Comercio Exterior' , u'http://feeds.feedburner.com/iprofesional-comercio-exterior' )
,(u'Tecnologia' , u'http://feeds.feedburner.com/iprofesional-tecnologia' )
,(u'Management' , u'http://feeds.feedburner.com/iprofesional-managment' )
,(u'Marketing' , u'http://feeds.feedburner.com/iprofesional-marketing' )
,(u'Legales' , u'http://feeds.feedburner.com/iprofesional-legales' )
,(u'Autos' , u'http://feeds.feedburner.com/iprofesional-autos' )
,(u'Vinos' , u'http://feeds.feedburner.com/iprofesional-vinos-bodegas' )
]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('a'):
limg = item.find('img')
if item.string is not None:
str = item.string
item.replaceWith(str)
else:
if limg:
item.name = 'div'
item.attrs = []
else:
str = self.tag_to_string(item)
item.replaceWith(str)
for item in soup.findAll('img'):
if not item.has_key('alt'):
item['alt'] = 'image'
return soup

View File

@ -0,0 +1,36 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Seongkyoun Yoo <Seongkyoun.yoo at gmail.com>'
'''
Profile to download KoreaHerald
'''
from calibre.web.feeds.news import BasicNewsRecipe
class KoreaHerald(BasicNewsRecipe):
title = u'KoreaHerald'
language = 'en'
description = u'Korea Herald News articles'
__author__ = 'Seongkyoun Yoo'
oldest_article = 10
recursions = 3
max_articles_per_feed = 10
no_stylesheets = True
keep_only_tags = [
dict(id=['contentLeft', '_article'])
]
remove_tags = [
dict(name='iframe'),
dict(name='div', attrs={'class':['left','htit2', 'navigation','banner_txt','banner_img']}),
dict(name='ul', attrs={'class':['link_icon', 'flow_icon','detailTextAD110113']}),
]
feeds = [
('All News','http://www.koreaherald.com/rss/020000000000.xml'),
('National','http://www.koreaherald.com/rss/020100000000.xml'),
('Business','http://www.koreaherald.com/rss/020200000000.xml'),
('Life&Style','http://www.koreaherald.com/rss/020300000000.xml'),
('Entertainment','http://www.koreaherald.com/rss/020400000000.xml'),
('Sports','http://www.koreaherald.com/rss/020500000000.xml'),
('Opinion','http://www.koreaherald.com/rss/020600000000.xml'),
('English Cafe','http://www.koreaherald.com/rss/021000000000.xml'),
]

View File

@ -0,0 +1,44 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
# dug from http://www.mobileread.com/forums/showthread.php?p=1012294
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1277443634(BasicNewsRecipe):
title = u'自由電子報'
oldest_article = 1
max_articles_per_feed = 100
feeds = [(u'焦點新聞', u'http://www.libertytimes.com.tw/rss/fo.xml'),
(u'政治新聞', u'http://www.libertytimes.com.tw/rss/p.xml'),
(u'生活新聞', u'http://www.libertytimes.com.tw/rss/life.xml'),
(u'國際新聞', u'http://www.libertytimes.com.tw/rss/int.xml'),
(u'自由廣場', u'http://www.libertytimes.com.tw/rss/o.xml'),
(u'社會新聞', u'http://www.libertytimes.com.tw/rss/so.xml'),
(u'體育新聞', u'http://www.libertytimes.com.tw/rss/sp.xml'),
(u'財經焦點', u'http://www.libertytimes.com.tw/rss/e.xml'),
(u'證券理財', u'http://www.libertytimes.com.tw/rss/stock.xml'),
(u'影視焦點', u'http://www.libertytimes.com.tw/rss/show.xml'),
(u'北部新聞', u'http://www.libertytimes.com.tw/rss/north.xml'),
(u'中部新聞', u'http://www.libertytimes.com.tw/rss/center.xml'),
(u'南部新聞', u'http://www.libertytimes.com.tw/rss/south.xml'),
(u'大台北新聞', u'http://www.libertytimes.com.tw/rss/taipei.xml'),
(u'藝術文化', u'http://www.libertytimes.com.tw/rss/art.xml'),
]
extra_css = '''span[class='insubject1'][id='newtitle'] {font-size:200%; font-weight:bold;}'''
__author__ = 'einstuerzende, updated by Eddie Lau'
__version__ = '1.1'
language = 'zh'
publisher = 'Liberty Times Group'
description = 'Liberty Times (Taiwan)'
category = 'News, Chinese, Taiwan'
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
encoding = 'big5'
conversion_options = {'linearize_tables':True}
masthead_url = 'http://www.libertytimes.com.tw/2008/images/img_auto/005/logo_new.gif'
cover_url = 'http://www.libertytimes.com.tw/2008/images/img_auto/005/logo_new.gif'
keep_only_tags = [dict(name='td', attrs={'id':['newsContent']})]

51
recipes/mayra.recipe Normal file
View File

@ -0,0 +1,51 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
mayra.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Mayra(BasicNewsRecipe):
title = u'Mayra'
language = 'ro'
__author__ = u'Silviu Cotoar\u0103'
description = u'Traieste urban, cool, sexy'
publisher = 'Mayra'
category = 'Ziare,Stiri,Reviste'
oldest_article = 5
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'utf-8'
remove_javascript = True
cover_url = 'http://img.konkurs.ro/img/concursuri-cu-premii/147/14672_front.jpg'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='div', attrs={'id':'article_details'})
]
remove_tags = [
dict(name='div', attrs={'id':'LikePluginPagelet'})
, dict(name='p', attrs={'id':'tags'})
, dict(name='span', attrs={'id':'tweet-button'})
]
remove_tags_after = [
dict(name='div', attrs={'id':'LikePluginPagelet'})
]
feeds = [ (u'\u0218tiri', u'http://www.mayra.ro/rss') ]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -1,15 +1,18 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = '2010-2011, Eddie Lau'
# Users of Kindle 3 (with limited system-level CJK support)
# Users of Kindle 3 with limited system-level CJK support
# please replace the following "True" with "False".
__MakePeriodical__ = True
# Turn it to True if your device supports display of CJK titles
# Turn below to true if your device supports display of CJK titles
__UseChineseTitle__ = False
# Trun below to true if you wish to use life.mingpao.com as the main article source
__UseLife__ = True
'''
Change Log:
2011/05/12: switch the main parse source to life.mingpao.com, which has more photos on the article pages
2011/03/06: add new articles for finance section, also a new section "Columns"
2011/02/28: rearrange the sections
[Disabled until Kindle has better CJK support and can remember last (section,article) read in Sections & Articles
@ -32,41 +35,43 @@ import os, datetime, re
from calibre.web.feeds.recipes import BasicNewsRecipe
from contextlib import nested
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.metadata import MetaInformation
class MPHKRecipe(BasicNewsRecipe):
title = 'Ming Pao - Hong Kong'
oldest_article = 1
max_articles_per_feed = 100
__author__ = 'Eddie Lau'
description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
publisher = 'MingPao'
category = 'Chinese, News, Hong Kong'
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
language = 'zh'
encoding = 'Big5-HKSCS'
recursions = 0
conversion_options = {'linearize_tables':True}
timefmt = ''
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}'
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
keep_only_tags = [dict(name='h1'),
title = 'Ming Pao - Hong Kong'
oldest_article = 1
max_articles_per_feed = 100
__author__ = 'Eddie Lau'
description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
publisher = 'MingPao'
category = 'Chinese, News, Hong Kong'
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
language = 'zh'
encoding = 'Big5-HKSCS'
recursions = 0
conversion_options = {'linearize_tables':True}
timefmt = ''
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}'
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
keep_only_tags = [dict(name='h1'),
dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
dict(name='font', attrs={'color':['AA0000']}), # for column articles title
dict(attrs={'id':['newscontent']}), # entertainment and column page content
dict(attrs={'id':['newscontent01','newscontent02']}),
dict(attrs={'class':['photo']})
dict(attrs={'class':['photo']}),
dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}) # images for source from life.mingpao.com
]
remove_tags = [dict(name='style'),
dict(attrs={'id':['newscontent135']}), # for the finance page
dict(name='table')] # for content fetched from life.mingpao.com
remove_attributes = ['width']
preprocess_regexps = [
remove_tags = [dict(name='style'),
dict(attrs={'id':['newscontent135']}), # for the finance page from mpfinance.com
dict(name='table')] # for content fetched from life.mingpao.com
remove_attributes = ['width']
preprocess_regexps = [
(re.compile(r'<h5>', re.DOTALL|re.IGNORECASE),
lambda match: '<h1>'),
(re.compile(r'</h5>', re.DOTALL|re.IGNORECASE),
@ -80,10 +85,10 @@ class MPHKRecipe(BasicNewsRecipe):
lambda match: "</b>")
]
def image_url_processor(cls, baseurl, url):
# trick: break the url at the first occurance of digit, add an additional
# '_' at the front
# not working, may need to move this to preprocess_html() method
def image_url_processor(cls, baseurl, url):
# trick: break the url at the first occurance of digit, add an additional
# '_' at the front
# not working, may need to move this to preprocess_html() method
# minIdx = 10000
# i0 = url.find('0')
# if i0 >= 0 and i0 < minIdx:
@ -115,314 +120,357 @@ class MPHKRecipe(BasicNewsRecipe):
# i9 = url.find('9')
# if i9 >= 0 and i9 < minIdx:
# minIdx = i9
return url
return url
def get_dtlocal(self):
dt_utc = datetime.datetime.utcnow()
# convert UTC to local hk time - at around HKT 6.00am, all news are available
dt_local = dt_utc - datetime.timedelta(-2.0/24)
return dt_local
def get_dtlocal(self):
dt_utc = datetime.datetime.utcnow()
# convert UTC to local hk time - at around HKT 6.00am, all news are available
dt_local = dt_utc - datetime.timedelta(-2.0/24)
return dt_local
def get_fetchdate(self):
return self.get_dtlocal().strftime("%Y%m%d")
def get_fetchdate(self):
return self.get_dtlocal().strftime("%Y%m%d")
def get_fetchformatteddate(self):
return self.get_dtlocal().strftime("%Y-%m-%d")
def get_fetchformatteddate(self):
return self.get_dtlocal().strftime("%Y-%m-%d")
def get_fetchday(self):
# convert UTC to local hk time - at around HKT 6.00am, all news are available
return self.get_dtlocal().strftime("%d")
def get_fetchday(self):
# dt_utc = datetime.datetime.utcnow()
# convert UTC to local hk time - at around HKT 6.00am, all news are available
# dt_local = dt_utc - datetime.timedelta(-2.0/24)
return self.get_dtlocal().strftime("%d")
def get_cover_url(self):
cover = 'http://news.mingpao.com/' + self.get_fetchdate() + '/' + self.get_fetchdate() + '_' + self.get_fetchday() + 'gacov.jpg'
br = BasicNewsRecipe.get_browser()
try:
br.open(cover)
except:
cover = None
return cover
def get_cover_url(self):
cover = 'http://news.mingpao.com/' + self.get_fetchdate() + '/' + self.get_fetchdate() + '_' + self.get_fetchday() + 'gacov.jpg'
br = BasicNewsRecipe.get_browser()
try:
br.open(cover)
except:
cover = None
return cover
def parse_index(self):
feeds = []
dateStr = self.get_fetchdate()
def parse_index(self):
feeds = []
dateStr = self.get_fetchdate()
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
(u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
if __UseLife__:
for title, url, keystr in [(u'\u8981\u805e Headline', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalga', 'nal'),
(u'\u6e2f\u805e Local', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgb', 'nal'),
(u'\u6559\u80b2 Education', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgf', 'nal'),
(u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr', 'nal'),
(u'\u8ad6\u58c7 Forum', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalfa', 'nal'),
(u'\u4e2d\u570b China', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalca', 'nal'),
(u'\u570b\u969b World', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalta', 'nal'),
(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal'),
(u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'),
(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal'),
(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')]:
articles = self.parse_section2(url, keystr)
if articles:
feeds.append((title, articles))
# special- editorial
ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
if ed_articles:
feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
else:
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
(u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
(u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
(u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
# special- editorial
ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
if ed_articles:
feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
# special - finance
#fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
if fin_articles:
feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
(u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
(u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
(u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
# special - finance
#fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
if fin_articles:
feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
# special - entertainment
ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
if ent_articles:
feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
(u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
# special - entertainment
ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
if ent_articles:
feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
# special- columns
col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
if col_articles:
feeds.append((u'\u5c08\u6b04 Columns', col_articles))
# special- columns
col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
if col_articles:
feeds.append((u'\u5c08\u6b04 Columns', col_articles))
return feeds
return feeds
def parse_section(self, url):
dateStr = self.get_fetchdate()
soup = self.index_to_soup(url)
divs = soup.findAll(attrs={'class': ['bullet','bullet_grey']})
current_articles = []
included_urls = []
divs.reverse()
for i in divs:
a = i.find('a', href = True)
title = self.tag_to_string(a)
url = a.get('href', False)
url = 'http://news.mingpao.com/' + dateStr + '/' +url
if url not in included_urls and url.rfind('Redirect') == -1:
current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
included_urls.append(url)
current_articles.reverse()
return current_articles
# parse from news.mingpao.com
def parse_section(self, url):
dateStr = self.get_fetchdate()
soup = self.index_to_soup(url)
divs = soup.findAll(attrs={'class': ['bullet','bullet_grey']})
current_articles = []
included_urls = []
divs.reverse()
for i in divs:
a = i.find('a', href = True)
title = self.tag_to_string(a)
url = a.get('href', False)
url = 'http://news.mingpao.com/' + dateStr + '/' +url
if url not in included_urls and url.rfind('Redirect') == -1:
current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
included_urls.append(url)
current_articles.reverse()
return current_articles
def parse_ed_section(self, url):
self.get_fetchdate()
soup = self.index_to_soup(url)
a = soup.findAll('a', href=True)
a.reverse()
current_articles = []
included_urls = []
for i in a:
title = self.tag_to_string(i)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('nal') == -1):
current_articles.append({'title': title, 'url': url, 'description': ''})
included_urls.append(url)
current_articles.reverse()
return current_articles
# parse from life.mingpao.com
def parse_section2(self, url, keystr):
self.get_fetchdate()
soup = self.index_to_soup(url)
a = soup.findAll('a', href=True)
a.reverse()
current_articles = []
included_urls = []
for i in a:
title = self.tag_to_string(i)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
current_articles.append({'title': title, 'url': url, 'description': ''})
included_urls.append(url)
current_articles.reverse()
return current_articles
def parse_fin_section(self, url):
self.get_fetchdate()
soup = self.index_to_soup(url)
a = soup.findAll('a', href= True)
current_articles = []
included_urls = []
for i in a:
#url = 'http://www.mpfinance.com/cfm/' + i.get('href', False)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
#if url not in included_urls and not url.rfind(dateStr) == -1 and url.rfind('index') == -1:
if url not in included_urls and (not url.rfind('txt') == -1) and (not url.rfind('nal') == -1):
title = self.tag_to_string(i)
current_articles.append({'title': title, 'url': url, 'description':''})
included_urls.append(url)
return current_articles
def parse_ed_section(self, url):
self.get_fetchdate()
soup = self.index_to_soup(url)
a = soup.findAll('a', href=True)
a.reverse()
current_articles = []
included_urls = []
for i in a:
title = self.tag_to_string(i)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('nal') == -1):
current_articles.append({'title': title, 'url': url, 'description': ''})
included_urls.append(url)
current_articles.reverse()
return current_articles
def parse_ent_section(self, url):
self.get_fetchdate()
soup = self.index_to_soup(url)
a = soup.findAll('a', href=True)
a.reverse()
current_articles = []
included_urls = []
for i in a:
title = self.tag_to_string(i)
url = 'http://ol.mingpao.com/cfm/' + i.get('href', False)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('star') == -1):
current_articles.append({'title': title, 'url': url, 'description': ''})
included_urls.append(url)
current_articles.reverse()
return current_articles
def parse_fin_section(self, url):
self.get_fetchdate()
soup = self.index_to_soup(url)
a = soup.findAll('a', href= True)
current_articles = []
included_urls = []
for i in a:
#url = 'http://www.mpfinance.com/cfm/' + i.get('href', False)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
#if url not in included_urls and not url.rfind(dateStr) == -1 and url.rfind('index') == -1:
if url not in included_urls and (not url.rfind('txt') == -1) and (not url.rfind('nal') == -1):
title = self.tag_to_string(i)
current_articles.append({'title': title, 'url': url, 'description':''})
included_urls.append(url)
return current_articles
def parse_col_section(self, url):
self.get_fetchdate()
soup = self.index_to_soup(url)
a = soup.findAll('a', href=True)
a.reverse()
current_articles = []
included_urls = []
for i in a:
title = self.tag_to_string(i)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('ncl') == -1):
current_articles.append({'title': title, 'url': url, 'description': ''})
included_urls.append(url)
current_articles.reverse()
return current_articles
def parse_ent_section(self, url):
self.get_fetchdate()
soup = self.index_to_soup(url)
a = soup.findAll('a', href=True)
a.reverse()
current_articles = []
included_urls = []
for i in a:
title = self.tag_to_string(i)
url = 'http://ol.mingpao.com/cfm/' + i.get('href', False)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('star') == -1):
current_articles.append({'title': title, 'url': url, 'description': ''})
included_urls.append(url)
current_articles.reverse()
return current_articles
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll(style=True):
del item['width']
for item in soup.findAll(stype=True):
del item['absmiddle']
return soup
def parse_col_section(self, url):
self.get_fetchdate()
soup = self.index_to_soup(url)
a = soup.findAll('a', href=True)
a.reverse()
current_articles = []
included_urls = []
for i in a:
title = self.tag_to_string(i)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('ncl') == -1):
current_articles.append({'title': title, 'url': url, 'description': ''})
included_urls.append(url)
current_articles.reverse()
return current_articles
def create_opf(self, feeds, dir=None):
if dir is None:
dir = self.output_dir
if __UseChineseTitle__ == True:
title = u'\u660e\u5831 (\u9999\u6e2f)'
else:
title = self.short_title()
# if not generating a periodical, force date to apply in title
if __MakePeriodical__ == False:
title = title + ' ' + self.get_fetchformatteddate()
if True:
mi = MetaInformation(title, [self.publisher])
mi.publisher = self.publisher
mi.author_sort = self.publisher
if __MakePeriodical__ == True:
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
else:
mi.publication_type = self.publication_type+':'+self.short_title()
#mi.timestamp = nowf()
mi.timestamp = self.get_dtlocal()
mi.comments = self.description
if not isinstance(mi.comments, unicode):
mi.comments = mi.comments.decode('utf-8', 'replace')
#mi.pubdate = nowf()
mi.pubdate = self.get_dtlocal()
opf_path = os.path.join(dir, 'index.opf')
ncx_path = os.path.join(dir, 'index.ncx')
opf = OPFCreator(dir, mi)
# Add mastheadImage entry to <guide> section
mp = getattr(self, 'masthead_path', None)
if mp is not None and os.access(mp, os.R_OK):
from calibre.ebooks.metadata.opf2 import Guide
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
ref.type = 'masthead'
ref.title = 'Masthead Image'
opf.guide.append(ref)
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll(style=True):
del item['width']
for item in soup.findAll(stype=True):
del item['absmiddle']
return soup
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
manifest.append(os.path.join(dir, 'index.html'))
manifest.append(os.path.join(dir, 'index.ncx'))
def create_opf(self, feeds, dir=None):
if dir is None:
dir = self.output_dir
if __UseChineseTitle__ == True:
title = u'\u660e\u5831 (\u9999\u6e2f)'
else:
title = self.short_title()
# if not generating a periodical, force date to apply in title
if __MakePeriodical__ == False:
title = title + ' ' + self.get_fetchformatteddate()
if True:
mi = MetaInformation(title, [self.publisher])
mi.publisher = self.publisher
mi.author_sort = self.publisher
if __MakePeriodical__ == True:
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
else:
mi.publication_type = self.publication_type+':'+self.short_title()
#mi.timestamp = nowf()
mi.timestamp = self.get_dtlocal()
mi.comments = self.description
if not isinstance(mi.comments, unicode):
mi.comments = mi.comments.decode('utf-8', 'replace')
#mi.pubdate = nowf()
mi.pubdate = self.get_dtlocal()
opf_path = os.path.join(dir, 'index.opf')
ncx_path = os.path.join(dir, 'index.ncx')
opf = OPFCreator(dir, mi)
# Add mastheadImage entry to <guide> section
mp = getattr(self, 'masthead_path', None)
if mp is not None and os.access(mp, os.R_OK):
from calibre.ebooks.metadata.opf2 import Guide
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
ref.type = 'masthead'
ref.title = 'Masthead Image'
opf.guide.append(ref)
# Get cover
cpath = getattr(self, 'cover_path', None)
if cpath is None:
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
if self.default_cover(pf):
cpath = pf.name
if cpath is not None and os.access(cpath, os.R_OK):
opf.cover = cpath
manifest.append(cpath)
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
manifest.append(os.path.join(dir, 'index.html'))
manifest.append(os.path.join(dir, 'index.ncx'))
# Get masthead
mpath = getattr(self, 'masthead_path', None)
if mpath is not None and os.access(mpath, os.R_OK):
manifest.append(mpath)
# Get cover
cpath = getattr(self, 'cover_path', None)
if cpath is None:
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
if self.default_cover(pf):
cpath = pf.name
if cpath is not None and os.access(cpath, os.R_OK):
opf.cover = cpath
manifest.append(cpath)
opf.create_manifest_from_files_in(manifest)
for mani in opf.manifest:
if mani.path.endswith('.ncx'):
mani.id = 'ncx'
if mani.path.endswith('mastheadImage.jpg'):
mani.id = 'masthead-image'
entries = ['index.html']
toc = TOC(base_path=dir)
self.play_order_counter = 0
self.play_order_map = {}
# Get masthead
mpath = getattr(self, 'masthead_path', None)
if mpath is not None and os.access(mpath, os.R_OK):
manifest.append(mpath)
def feed_index(num, parent):
f = feeds[num]
for j, a in enumerate(f):
if getattr(a, 'downloaded', False):
adir = 'feed_%d/article_%d/'%(num, j)
auth = a.author
if not auth:
auth = None
desc = a.text_summary
if not desc:
desc = None
else:
desc = self.description_limiter(desc)
entries.append('%sindex.html'%adir)
po = self.play_order_map.get(entries[-1], None)
if po is None:
self.play_order_counter += 1
po = self.play_order_counter
parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
opf.create_manifest_from_files_in(manifest)
for mani in opf.manifest:
if mani.path.endswith('.ncx'):
mani.id = 'ncx'
if mani.path.endswith('mastheadImage.jpg'):
mani.id = 'masthead-image'
entries = ['index.html']
toc = TOC(base_path=dir)
self.play_order_counter = 0
self.play_order_map = {}
def feed_index(num, parent):
f = feeds[num]
for j, a in enumerate(f):
if getattr(a, 'downloaded', False):
adir = 'feed_%d/article_%d/'%(num, j)
auth = a.author
if not auth:
auth = None
desc = a.text_summary
if not desc:
desc = None
else:
desc = self.description_limiter(desc)
entries.append('%sindex.html'%adir)
po = self.play_order_map.get(entries[-1], None)
if po is None:
self.play_order_counter += 1
po = self.play_order_counter
parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
play_order=po, author=auth, description=desc)
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
for sp in a.sub_pages:
prefix = os.path.commonprefix([opf_path, sp])
relp = sp[len(prefix):]
entries.append(relp.replace(os.sep, '/'))
last = sp
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
for sp in a.sub_pages:
prefix = os.path.commonprefix([opf_path, sp])
relp = sp[len(prefix):]
entries.append(relp.replace(os.sep, '/'))
last = sp
if os.path.exists(last):
with open(last, 'rb') as fi:
src = fi.read().decode('utf-8')
soup = BeautifulSoup(src)
body = soup.find('body')
if body is not None:
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
templ = self.navbar.generate(True, num, j, len(f),
if os.path.exists(last):
with open(last, 'rb') as fi:
src = fi.read().decode('utf-8')
soup = BeautifulSoup(src)
body = soup.find('body')
if body is not None:
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
templ = self.navbar.generate(True, num, j, len(f),
not self.has_single_feed,
a.orig_url, self.publisher, prefix=prefix,
center=self.center_navbar)
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
body.insert(len(body.contents), elem)
with open(last, 'wb') as fi:
fi.write(unicode(soup).encode('utf-8'))
if len(feeds) == 0:
raise Exception('All feeds are empty, aborting.')
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
body.insert(len(body.contents), elem)
with open(last, 'wb') as fi:
fi.write(unicode(soup).encode('utf-8'))
if len(feeds) == 0:
raise Exception('All feeds are empty, aborting.')
if len(feeds) > 1:
for i, f in enumerate(feeds):
entries.append('feed_%d/index.html'%i)
po = self.play_order_map.get(entries[-1], None)
if po is None:
self.play_order_counter += 1
po = self.play_order_counter
auth = getattr(f, 'author', None)
if not auth:
auth = None
desc = getattr(f, 'description', None)
if not desc:
desc = None
feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
if len(feeds) > 1:
for i, f in enumerate(feeds):
entries.append('feed_%d/index.html'%i)
po = self.play_order_map.get(entries[-1], None)
if po is None:
self.play_order_counter += 1
po = self.play_order_counter
auth = getattr(f, 'author', None)
if not auth:
auth = None
desc = getattr(f, 'description', None)
if not desc:
desc = None
feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
f.title, play_order=po, description=desc, author=auth))
else:
entries.append('feed_%d/index.html'%0)
feed_index(0, toc)
else:
entries.append('feed_%d/index.html'%0)
feed_index(0, toc)
for i, p in enumerate(entries):
entries[i] = os.path.join(dir, p.replace('/', os.sep))
opf.create_spine(entries)
opf.set_toc(toc)
for i, p in enumerate(entries):
entries[i] = os.path.join(dir, p.replace('/', os.sep))
opf.create_spine(entries)
opf.set_toc(toc)
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
opf.render(opf_file, ncx_file)
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
opf.render(opf_file, ncx_file)

50
recipes/moldovaazi.recipe Normal file
View File

@ -0,0 +1,50 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
azi.md
'''
from calibre.web.feeds.news import BasicNewsRecipe
class MoldovaAzi(BasicNewsRecipe):
title = u'Moldova Azi'
language = 'ro'
__author__ = u'Silviu Cotoar\u0103'
description = u'Moldova pe internet'
publisher = 'Moldova Azi'
category = 'Ziare,Stiri,Moldova'
oldest_article = 5
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'utf-8'
remove_javascript = True
cover_url = 'http://www.azi.md/images/logo.gif'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [ dict(name='div', attrs={'id':'in'})
]
remove_tags = [
dict(name='div', attrs={'class':'in-more-stories'})
]
remove_tags_after = [
dict(name='div', attrs={'id':'comment_wrapper'})
, dict(name='div', attrs={'class':'box-title4'})
]
feeds = [ (u'\u0218tiri', u'http://www.azi.md/ro/feeds/0/rss201') ]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,50 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
newsmoldova.md
'''
from calibre.web.feeds.news import BasicNewsRecipe
class NewsMoldova(BasicNewsRecipe):
title = u'Agen\u0163ia de \u015ftiri Moldova'
language = 'ro'
__author__ = u'Silviu Cotoar\u0103'
description = u'Agen\u0163ia de \u015ftiri Moldova'
publisher = 'Moldova'
category = 'Ziare,Stiri,Moldova'
oldest_article = 5
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'utf-8'
remove_javascript = True
cover_url = 'http://www.newsmoldova.md/i/logo_top_md.gif'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [ dict(name='div', attrs={'class':'main-article-index article'})
]
remove_tags = [
dict(name='div', attrs={'id':'actions'})
, dict(name='li', attrs={'class':'invisible'})
]
remove_tags_after = [
dict(name='div', attrs={'id':'actions'})
]
feeds = [ (u'\u0218tiri', u'http://newsmoldova.md/export/rss2/archive/index.xml') ]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -3,7 +3,6 @@ __license__ = 'GPL v3'
'''
'''
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.web.feeds import Feed
class ReadersDigest(BasicNewsRecipe):
@ -38,151 +37,20 @@ class ReadersDigest(BasicNewsRecipe):
'''
remove_tags = [
dict(name='h4', attrs={'class':'close'}),
dict(name='div', attrs={'class':'fromLine'}),
dict(name='img', attrs={'class':'colorTag'}),
dict(name='div', attrs={'id':'sponsorArticleHeader'}),
dict(name='div', attrs={'class':'horizontalAd'}),
dict(name='div', attrs={'id':'imageCounterLeft'}),
dict(name='div', attrs={'id':'commentsPrint'})
]
feeds = [
('New in RD', 'http://feeds.rd.com/ReadersDigest'),
('Jokes', 'http://feeds.rd.com/ReadersDigestJokes'),
('Cartoons', 'http://feeds.rd.com/ReadersDigestCartoons'),
('Blogs','http://feeds.rd.com/ReadersDigestBlogs')
('Food', 'http://www.rd.com/food/feed'),
('Health', 'http://www.rd.com/health/feed'),
('Home', 'http://www.rd.com/home/feed'),
('Family', 'http://www.rd.com/family/feed'),
('Money', 'http://www.rd.com/money/feed'),
('Travel', 'http://www.rd.com/travel/feed'),
]
cover_url = 'http://www.rd.com/images/logo-main-rd.gif'
#-------------------------------------------------------------------------------------------------
def print_version(self, url):
# Get the identity number of the current article and append it to the root print URL
if url.find('/article') > 0:
ident = url[url.find('/article')+8:url.find('.html?')-4]
url = 'http://www.rd.com/content/printContent.do?contentId=' + ident
elif url.find('/post') > 0:
# in this case, have to get the page itself to derive the Print page.
soup = self.index_to_soup(url)
newsoup = soup.find('ul',attrs={'class':'printBlock'})
url = 'http://www.rd.com' + newsoup('a')[0]['href']
url = url[0:url.find('&Keep')]
return url
#-------------------------------------------------------------------------------------------------
def parse_index(self):
pages = [
('Your America','http://www.rd.com/your-america-inspiring-people-and-stories', 'channelLeftContainer',{'class':'moreLeft'}),
# useless recipes ('Living Healthy','http://www.rd.com/living-healthy', 'channelLeftContainer',{'class':'moreLeft'}),
('Advice and Know-How','http://www.rd.com/advice-and-know-how', 'channelLeftContainer',{'class':'moreLeft'})
keep_only_tags = dict(id='main-content')
remove_tags = [
{'class':['post-categories']},
]
feeds = []
for page in pages:
section, url, divider, attrList = page
newArticles = self.page_parse(url, divider, attrList)
feeds.append((section,newArticles))
# after the pages of the site have been processed, parse several RSS feeds for additional sections
newfeeds = Feed()
newfeeds = self.parse_rss()
# The utility code in parse_rss returns a Feed object. Convert each feed/article combination into a form suitable
# for this module (parse_index).
for feed in newfeeds:
newArticles = []
for article in feed.articles:
newArt = {
'title' : article.title,
'url' : article.url,
'date' : article.date,
'description' : article.text_summary
}
newArticles.append(newArt)
# New and Blogs should be the first two feeds.
if feed.title == 'New in RD':
feeds.insert(0,(feed.title,newArticles))
elif feed.title == 'Blogs':
feeds.insert(1,(feed.title,newArticles))
else:
feeds.append((feed.title,newArticles))
return feeds
#-------------------------------------------------------------------------------------------------
def page_parse(self, mainurl, divider, attrList):
articles = []
mainsoup = self.index_to_soup(mainurl)
for item in mainsoup.findAll(attrs=attrList):
newArticle = {
'title' : item('img')[0]['alt'],
'url' : 'http://www.rd.com'+item('a')[0]['href'],
'date' : '',
'description' : ''
}
articles.append(newArticle)
return articles
#-------------------------------------------------------------------------------------------------
def parse_rss (self):
# Do the "official" parse_feeds first
feeds = BasicNewsRecipe.parse_feeds(self)
# Loop thru the articles in all feeds to find articles with "recipe" in it
recipeArticles = []
for curfeed in feeds:
delList = []
for a,curarticle in enumerate(curfeed.articles):
if curarticle.title.upper().find('RECIPE') >= 0:
recipeArticles.append(curarticle)
delList.append(curarticle)
if len(delList)>0:
for d in delList:
index = curfeed.articles.index(d)
curfeed.articles[index:index+1] = []
# If there are any recipes found, create a new Feed object and append.
if len(recipeArticles) > 0:
pfeed = Feed()
pfeed.title = 'Recipes'
pfeed.descrition = 'Recipe Feed (Virtual)'
pfeed.image_url = None
pfeed.oldest_article = 30
pfeed.id_counter = len(recipeArticles)
# Create a new Feed, add the recipe articles, and then append
# to "official" list of feeds
pfeed.articles = recipeArticles[:]
feeds.append(pfeed)
return feeds

View File

@ -0,0 +1,53 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, '
'''
replicavedetelor.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class ReplicaVedetelor(BasicNewsRecipe):
title = u'Replica Vedetelor'
__author__ = u'Silviu Cotoara'
description = u'Ofer\u0103 vedetelor dreptul la replic\u0103'
publisher = 'Replica Vedetelor'
oldest_article = 5
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Reviste,Vedete'
encoding = 'utf-8'
cover_url = 'http://www.webart-software.eu/_pics/lucrari_referinta/medium/84/1-Replica-Vedetelor.jpg'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='div', attrs={'id':'zona-continut'})
]
remove_tags = [
dict(name='ul', attrs={'id':['lista-imagini']})
, dict(name='form', attrs={'id':['f-trimite-unui-prieten']})
]
remove_tags_after = [
dict(name='form', attrs={'id':['f-trimite-unui-prieten']})
]
feeds = [
(u'Feeds', u'http://www.replicavedetelor.ro/feed')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -2,7 +2,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class RzeczpospolitaRecipe(BasicNewsRecipe):
__license__ = 'GPL v3'
__author__ = 'kwetal'
__author__ = u'kwetal and Tomasz Dlugosz'
language = 'pl'
version = 1
@ -38,6 +38,8 @@ class RzeczpospolitaRecipe(BasicNewsRecipe):
remove_tags.append(dict(name = 'div', attrs = {'class' : 'clr'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'share_bottom'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'copyright_law'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'more'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'editorPicks'}))
extra_css = '''
body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
@ -48,6 +50,13 @@ class RzeczpospolitaRecipe(BasicNewsRecipe):
.fot{font-size: x-small; color: #666666;}
'''
def skip_ad_pages(self, soup):
if ('advertisement' in soup.find('title').string.lower()):
href = soup.find('a').get('href')
return self.index_to_soup(href, raw=True)
else:
return None
def print_version(self, url):
start, sep, rest = url.rpartition('/')
forget, sep, index = rest.rpartition(',')

View File

@ -33,7 +33,7 @@ class StrategyBusinessRecipe(BasicNewsRecipe):
elif c.name.endswith('_password'):
br[c.name] = self.password
raw = br.submit().read()
if '>Logout' not in raw:
if 'You have been logged in' not in raw:
raise ValueError('Failed to login, check your username and password')
return br

View File

@ -1,17 +1,12 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = '2009, Gerhard Aigner <gerhard.aigner at gmail.com>'
import re
from calibre.web.feeds.news import BasicNewsRecipe
class TelepolisNews(BasicNewsRecipe):
title = u'Telepolis (News+Artikel)'
__author__ = 'Gerhard Aigner'
__author__ = 'syntaxis'
publisher = 'Heise Zeitschriften Verlag GmbH & Co KG'
description = 'News from telepolis'
description = 'News from Telepolis'
category = 'news'
oldest_article = 7
max_articles_per_feed = 100
@ -20,14 +15,19 @@ class TelepolisNews(BasicNewsRecipe):
encoding = "utf-8"
language = 'de'
use_embedded_content =False
remove_empty_feeds = True
preprocess_regexps = [(re.compile(r'<a[^>]*>', re.DOTALL|re.IGNORECASE), lambda match: ''),
(re.compile(r'</a>', re.DOTALL|re.IGNORECASE), lambda match: ''),]
keep_only_tags = [dict(name = 'td',attrs={'class':'bloghead'}),dict(name = 'td',attrs={'class':'blogfliess'})]
remove_tags = [dict(name='img'), dict(name='td',attrs={'class':'blogbottom'}), dict(name='td',attrs={'class':'forum'})]
keep_only_tags = [dict(name = 'div',attrs={'class':'head'}),dict(name = 'div',attrs={'class':'leftbox'}),dict(name='td',attrs={'class':'strict'})]
remove_tags = [ dict(name='td',attrs={'class':'blogbottom'}),
dict(name='div',attrs={'class':'forum'}), dict(name='div',attrs={'class':'social'}),dict(name='div',attrs={'class':'blog-letter p-news'}),
dict(name='div',attrs={'class':'blog-sub'}),dict(name='div',attrs={'class':'version-div'}),dict(name='div',attrs={'id':'breadcrumb'})
,dict(attrs={'class':'tp-url'}),dict(attrs={'class':'blog-name entry_'}) ]
remove_tags_after = [dict(name='span', attrs={'class':['breadcrumb']})]
feeds = [(u'News', u'http://www.heise.de/tp/news-atom.xml')]
@ -39,15 +39,8 @@ class TelepolisNews(BasicNewsRecipe):
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
def get_article_url(self, article):
'''if the linked article is of kind artikel don't take it'''
if (article.link.count('artikel') > 1) :
return None
return article.link
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
soup.head.insert(0,mtag)
return soup

View File

@ -10,6 +10,8 @@ import re
from calibre.web.feeds.news import BasicNewsRecipe
class Time(BasicNewsRecipe):
recipe_disabled = ('This recipe has been disabled as TIME no longer'
' publish complete articles on the web.')
title = u'Time'
__author__ = 'Kovid Goyal and Sujata Raman'
description = 'Weekly magazine'

View File

@ -0,0 +1,67 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
from calibre.web.feeds.news import BasicNewsRecipe
class UnitedDaily(BasicNewsRecipe):
title = u'聯合新聞網'
oldest_article = 1
max_articles_per_feed = 100
feeds = [(u'焦點', u'http://udn.com/udnrss/focus.xml'),
(u'政治', u'http://udn.com/udnrss/politics.xml'),
(u'社會', u'http://udn.com/udnrss/social.xml'),
(u'生活', u'http://udn.com/udnrss/life.xml'),
(u'綜合', u'http://udn.com/udnrss/education.xml'),
(u'意見評論', u'http://udn.com/udnrss/opinion.xml'),
(u'大台北', u'http://udn.com/udnrss/local_taipei.xml'),
(u'桃竹苗', u'http://udn.com/udnrss/local_tyhcml.xml'),
(u'中彰投', u'http://udn.com/udnrss/local_tcchnt.xml'),
(u'雲嘉南', u'http://udn.com/udnrss/local_ylcytn.xml'),
(u'高屏離島', u'http://udn.com/udnrss/local_ksptisland.xml'),
(u'基宜花東', u'http://udn.com/udnrss/local_klilhltt.xml'),
(u'台灣百寶鄉', u'http://udn.com/udnrss/local_oddlyenough.xml'),
(u'兩岸要聞', u'http://udn.com/udnrss/mainland.xml'),
(u'國際焦點', u'http://udn.com/udnrss/international.xml'),
(u'台商經貿', u'http://udn.com/udnrss/financechina.xml'),
(u'國際財經', u'http://udn.com/udnrss/financeworld.xml'),
(u'財經焦點', u'http://udn.com/udnrss/financesfocus.xml'),
(u'股市要聞', u'http://udn.com/udnrss/stock.xml'),
(u'股市快訊', u'http://udn.com/udnrss/stklatest.xml'),
(u'稅務法務', u'http://udn.com/udnrss/tax.xml'),
(u'房市情報', u'http://udn.com/udnrss/houses.xml'),
(u'棒球', u'http://udn.com/udnrss/baseball.xml'),
(u'籃球', u'http://udn.com/udnrss/basketball.xml'),
(u'體壇動態', u'http://udn.com/udnrss/sportsfocus.xml'),
(u'熱門星聞', u'http://udn.com/udnrss/starsfocus.xml'),
(u'廣電港陸', u'http://udn.com/udnrss/tv.xml'),
(u'海外星球', u'http://udn.com/udnrss/starswestern.xml'),
(u'日韓星情', u'http://udn.com/udnrss/starsjk.xml'),
(u'電影世界', u'http://udn.com/udnrss/movie.xml'),
(u'流行音樂', u'http://udn.com/udnrss/music.xml'),
(u'觀點專題', u'http://udn.com/udnrss/starssubject.xml'),
(u'食樂指南', u'http://udn.com/udnrss/food.xml'),
(u'折扣好康', u'http://udn.com/udnrss/shopping.xml'),
(u'醫藥新聞', u'http://udn.com/udnrss/health.xml'),
(u'家婦繽紛', u'http://udn.com/udnrss/benfen.xml'),
(u'談星論命', u'http://udn.com/udnrss/astrology.xml'),
(u'文化副刊', u'http://udn.com/udnrss/reading.xml'),
]
extra_css = '''div[id='story_title'] {font-size:200%; font-weight:bold;}'''
__author__ = 'Eddie Lau'
__version__ = '1.0'
language = 'zh'
publisher = 'United Daily News Group'
description = 'United Daily (Taiwan)'
category = 'News, Chinese, Taiwan'
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
encoding = 'big5'
conversion_options = {'linearize_tables':True}
masthead_url = 'http://udn.com/NEWS/2004/images/logo_udn.gif'
cover_url = 'http://udn.com/NEWS/2004/images/logo_udn.gif'
keep_only_tags = [dict(name='div', attrs={'id':['story_title','story_author', 'story']})]
remove_tags = [dict(name='div', attrs={'id':['mvouter']})]

View File

@ -0,0 +1,20 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
title = u'Welt der Physik'
__author__ = 'schuster'
remove_tags_befor = [dict(name='div', attrs={'class':'inhalt_bild_text_printonly'})]
remove_tags_after = [dict(name='span', attrs={'class':'clearinhalt_bild'})]
remove_tags = [dict(attrs={'class':['invisible', 'searchfld', 'searchbtn', 'topnavi', 'topsearch']}),
dict(id=['naservice', 'phservicemenu', '',]),
dict(name=['naservice'])]
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
language = 'de'
remove_javascript = True
feeds = [(u'Nachrichten und Neuigkeiten', u'http://www.weltderphysik.de/rss/alles.xml')]

53
recipes/ziuaveche.recipe Normal file
View File

@ -0,0 +1,53 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
ziuaveche.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class ZiuaVeche(BasicNewsRecipe):
title = u'Ziua Veche'
__author__ = u'Silviu Cotoar\u0103'
description = 'Cotidian online'
publisher = 'Ziua Veche'
oldest_article = 5
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Cotidiane,Stiri'
encoding = 'utf-8'
cover_url = 'http://www.ziuaveche.ro/wp-content/themes/tema/images/zv-logo-alb-old.png'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='div', attrs={'id':'singlePost'})
]
remove_tags = [
dict(name='div', attrs={'id':'LikePluginPagelet'})
]
remove_tags_after = [
dict(name='div', attrs={'id':'LikePluginPagelet'})
]
feeds = [
(u'Feeds', u'http://www.ziuaveche.ro/feed/rss')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -11,7 +11,7 @@ __all__ = [
'build', 'build_pdf2xml', 'server',
'gui',
'develop', 'install',
'resources',
'kakasi', 'resources',
'check',
'sdist',
'manual', 'tag_release',
@ -49,8 +49,9 @@ gui = GUI()
from setup.check import Check
check = Check()
from setup.resources import Resources
from setup.resources import Resources, Kakasi
resources = Resources()
kakasi = Kakasi()
from setup.publish import Manual, TagRelease, Stage1, Stage2, \
Stage3, Stage4, Publish

View File

@ -30,11 +30,12 @@ int report_libc_error(const char *msg) {
}
int pyobject_to_int(PyObject *res) {
int ret; PyObject *tmp;
tmp = PyNumber_Int(res);
if (tmp == NULL) ret = (PyObject_IsTrue(res)) ? 1 : 0;
else ret = (int)PyInt_AS_LONG(tmp);
int ret = 0; PyObject *tmp;
if (res != NULL) {
tmp = PyNumber_Int(res);
if (tmp == NULL) ret = (PyObject_IsTrue(res)) ? 1 : 0;
else ret = (int)PyInt_AS_LONG(tmp);
}
return ret;
}

View File

@ -32,6 +32,7 @@ class Win32(VMInstaller):
FREEZE_TEMPLATE = 'python -OO setup.py {freeze_command} --no-ice'
INSTALLER_EXT = 'msi'
SHUTDOWN_CMD = ['shutdown.exe', '-s', '-f', '-t', '0']
BUILD_BUILD = ['python setup.py kakasi',] + VMInstaller.BUILD_BUILD
def download_installer(self):
installer = self.installer()

View File

@ -14,7 +14,7 @@ from setup.build_environment import msvc, MT, RC
from setup.installer.windows.wix import WixMixIn
OPENSSL_DIR = r'Q:\openssl'
QT_DIR = 'Q:\\Qt\\4.7.2'
QT_DIR = 'Q:\\Qt\\4.7.3'
QT_DLLS = ['Core', 'Gui', 'Network', 'Svg', 'WebKit', 'Xml', 'XmlPatterns']
LIBUSB_DIR = 'C:\\libusb'
LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll'

View File

@ -23,6 +23,9 @@ wWinMain(HINSTANCE Inst, HINSTANCE PrevInst,
ret = execute_python_entrypoint(BASENAME, MODULE, FUNCTION,
stdout_redirect, stderr_redirect);
if (stdout != NULL) fclose(stdout);
if (stderr != NULL) fclose(stderr);
DeleteFile(stdout_redirect);
DeleteFile(stderr_redirect);

View File

@ -11,9 +11,6 @@
SummaryCodepage='1252' />
<Media Id="1" Cabinet="{app}.cab" CompressionLevel="{compression}" EmbedCab="yes" />
<!-- The following line is needed because of the patch to QtCore4.dll. You can remove this line
after you update Qt beyond 4.7.2. 'emus' means re-install even if version is the same not just if it is older. -->
<Property Id='REINSTALLMODE' Value='emus'/>
<Upgrade Id="{upgrade_code}">
<UpgradeVersion Maximum="{version}"

View File

@ -9,7 +9,7 @@ __docformat__ = 'restructuredtext en'
import os, cPickle, re, anydbm, shutil, marshal, zipfile, glob
from zlib import compress
from setup import Command, basenames, __appname__
from setup import Command, basenames, __appname__, iswindows
def get_opts_from_parser(parser):
def do_opt(opt):
@ -23,13 +23,119 @@ def get_opts_from_parser(parser):
for o in g.option_list:
for x in do_opt(o): yield x
class Resources(Command):
class Kakasi(Command):
description = 'Compile various needed calibre resources'
description = 'Compile resources for unihandecode'
KAKASI_PATH = os.path.join(Command.SRC, __appname__,
'ebooks', 'unihandecode', 'pykakasi')
def run(self, opts):
self.records = {}
src = self.j(self.KAKASI_PATH, 'kakasidict.utf8')
dest = self.j(self.RESOURCES, 'localization',
'pykakasi','kanwadict2.db')
base = os.path.dirname(dest)
if not os.path.exists(base):
os.makedirs(base)
if self.newer(dest, src) or iswindows:
self.info('\tGenerating Kanwadict')
for line in open(src, "r"):
self.parsekdict(line)
self.kanwaout(dest)
src = self.j(self.KAKASI_PATH, 'itaijidict.utf8')
dest = self.j(self.RESOURCES, 'localization',
'pykakasi','itaijidict2.pickle')
if self.newer(dest, src) or iswindows:
self.info('\tGenerating Itaijidict')
self.mkitaiji(src, dest)
src = self.j(self.KAKASI_PATH, 'kanadict.utf8')
dest = self.j(self.RESOURCES, 'localization',
'pykakasi','kanadict2.pickle')
if self.newer(dest, src) or iswindows:
self.info('\tGenerating kanadict')
self.mkkanadict(src, dest)
return
def mkitaiji(self, src, dst):
dic = {}
for line in open(src, "r"):
line = line.decode("utf-8").strip()
if line.startswith(';;'): # skip comment
continue
if re.match(r"^$",line):
continue
pair = re.sub(r'\\u([0-9a-fA-F]{4})', lambda x:unichr(int(x.group(1),16)), line)
dic[pair[0]] = pair[1]
cPickle.dump(dic, open(dst, 'w'), protocol=-1) #pickle
def mkkanadict(self, src, dst):
dic = {}
for line in open(src, "r"):
line = line.decode("utf-8").strip()
if line.startswith(';;'): # skip comment
continue
if re.match(r"^$",line):
continue
(alpha, kana) = line.split(' ')
dic[kana] = alpha
cPickle.dump(dic, open(dst, 'w'), protocol=-1) #pickle
def parsekdict(self, line):
line = line.decode("utf-8").strip()
if line.startswith(';;'): # skip comment
return
(yomi, kanji) = line.split(' ')
if ord(yomi[-1:]) <= ord('z'):
tail = yomi[-1:]
yomi = yomi[:-1]
else:
tail = ''
self.updaterec(kanji, yomi, tail)
def updaterec(self, kanji, yomi, tail):
key = "%04x"%ord(kanji[0])
if key in self.records:
if kanji in self.records[key]:
rec = self.records[key][kanji]
rec.append((yomi,tail))
self.records[key].update( {kanji: rec} )
else:
self.records[key][kanji]=[(yomi, tail)]
else:
self.records[key] = {}
self.records[key][kanji]=[(yomi, tail)]
def kanwaout(self, out):
try:
# Needed as otherwise anydbm tries to create a gdbm db when the db
# created on Unix is found
os.remove(out)
except:
pass
dic = anydbm.open(out, 'n')
for (k, v) in self.records.iteritems():
dic[k] = compress(marshal.dumps(v))
dic.close()
def clean(self):
kakasi = self.j(self.RESOURCES, 'localization', 'pykakasi')
if os.path.exists(kakasi):
shutil.rmtree(kakasi)
class Resources(Command):
description = 'Compile various needed calibre resources'
sub_commands = ['kakasi']
def run(self, opts):
scripts = {}
for x in ('console', 'gui'):
@ -117,108 +223,13 @@ class Resources(Command):
import json
json.dump(function_dict, open(dest, 'wb'), indent=4)
self.run_kakasi(opts)
def run_kakasi(self, opts):
self.records = {}
src = self.j(self.KAKASI_PATH, 'kakasidict.utf8')
dest = self.j(self.RESOURCES, 'localization',
'pykakasi','kanwadict2.db')
base = os.path.dirname(dest)
if not os.path.exists(base):
os.makedirs(base)
if self.newer(dest, src):
self.info('\tGenerating Kanwadict')
for line in open(src, "r"):
self.parsekdict(line)
self.kanwaout(dest)
src = self.j(self.KAKASI_PATH, 'itaijidict.utf8')
dest = self.j(self.RESOURCES, 'localization',
'pykakasi','itaijidict2.pickle')
if self.newer(dest, src):
self.info('\tGenerating Itaijidict')
self.mkitaiji(src, dest)
src = self.j(self.KAKASI_PATH, 'kanadict.utf8')
dest = self.j(self.RESOURCES, 'localization',
'pykakasi','kanadict2.pickle')
if self.newer(dest, src):
self.info('\tGenerating kanadict')
self.mkkanadict(src, dest)
return
def mkitaiji(self, src, dst):
dic = {}
for line in open(src, "r"):
line = line.decode("utf-8").strip()
if line.startswith(';;'): # skip comment
continue
if re.match(r"^$",line):
continue
pair = re.sub(r'\\u([0-9a-fA-F]{4})', lambda x:unichr(int(x.group(1),16)), line)
dic[pair[0]] = pair[1]
cPickle.dump(dic, open(dst, 'w'), protocol=-1) #pickle
def mkkanadict(self, src, dst):
dic = {}
for line in open(src, "r"):
line = line.decode("utf-8").strip()
if line.startswith(';;'): # skip comment
continue
if re.match(r"^$",line):
continue
(alpha, kana) = line.split(' ')
dic[kana] = alpha
cPickle.dump(dic, open(dst, 'w'), protocol=-1) #pickle
def parsekdict(self, line):
line = line.decode("utf-8").strip()
if line.startswith(';;'): # skip comment
return
(yomi, kanji) = line.split(' ')
if ord(yomi[-1:]) <= ord('z'):
tail = yomi[-1:]
yomi = yomi[:-1]
else:
tail = ''
self.updaterec(kanji, yomi, tail)
def updaterec(self, kanji, yomi, tail):
key = "%04x"%ord(kanji[0])
if key in self.records:
if kanji in self.records[key]:
rec = self.records[key][kanji]
rec.append((yomi,tail))
self.records[key].update( {kanji: rec} )
else:
self.records[key][kanji]=[(yomi, tail)]
else:
self.records[key] = {}
self.records[key][kanji]=[(yomi, tail)]
def kanwaout(self, out):
dic = anydbm.open(out, 'c')
for (k, v) in self.records.iteritems():
dic[k] = compress(marshal.dumps(v))
dic.close()
def clean(self):
for x in ('scripts', 'recipes', 'ebook-convert-complete'):
x = self.j(self.RESOURCES, x+'.pickle')
if os.path.exists(x):
os.remove(x)
kakasi = self.j(self.RESOURCES, 'localization', 'pykakasi')
if os.path.exists(kakasi):
shutil.rmtree(kakasi)
from setup.commands import kakasi
kakasi.clean()

View File

@ -4,7 +4,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
__appname__ = u'calibre'
numeric_version = (0, 7, 59)
numeric_version = (0, 8, 1)
__version__ = u'.'.join(map(unicode, numeric_version))
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"

View File

@ -449,7 +449,7 @@ class CatalogPlugin(Plugin): # {{{
['author_sort','authors','comments','cover','formats',
'id','isbn','ondevice','pubdate','publisher','rating',
'series_index','series','size','tags','timestamp',
'title','uuid'])
'title_sort','title','uuid'])
all_custom_fields = set(db.custom_field_keys())
all_fields = all_std_fields.union(all_custom_fields)
@ -607,7 +607,7 @@ class StoreBase(Plugin): # {{{
supported_platforms = ['windows', 'osx', 'linux']
author = 'John Schember'
type = _('Store')
minimum_calibre_version = (0, 7, 59)
minimum_calibre_version = (0, 8, 0)
actual_plugin = None

View File

@ -628,8 +628,9 @@ from calibre.ebooks.metadata.sources.amazon import Amazon
from calibre.ebooks.metadata.sources.openlibrary import OpenLibrary
from calibre.ebooks.metadata.sources.isbndb import ISBNDB
from calibre.ebooks.metadata.sources.overdrive import OverDrive
from calibre.ebooks.metadata.sources.douban import Douban
plugins += [GoogleBooks, Amazon, OpenLibrary, ISBNDB, OverDrive]
plugins += [GoogleBooks, Amazon, OpenLibrary, ISBNDB, OverDrive, Douban]
# }}}
@ -1096,6 +1097,11 @@ class StoreAmazonKindleStore(StoreBase):
description = _('Kindle books from Amazon')
actual_plugin = 'calibre.gui2.store.amazon_plugin:AmazonKindleStore'
class StoreAmazonDEKindleStore(StoreBase):
name = 'Amazon DE Kindle'
description = _('Kindle eBooks')
actual_plugin = 'calibre.gui2.store.amazon_de_plugin:AmazonDEKindleStore'
class StoreAmazonUKKindleStore(StoreBase):
name = 'Amazon UK Kindle'
description = _('Kindle books from Amazon.uk')
@ -1111,6 +1117,11 @@ class StoreBNStore(StoreBase):
description = _('Books, Textbooks, eBooks, Toys, Games and More.')
actual_plugin = 'calibre.gui2.store.bn_plugin:BNStore'
class StoreBeamEBooksDEStore(StoreBase):
name = 'Beam EBooks DE'
description = _('der eBook Shop')
actual_plugin = 'calibre.gui2.store.beam_ebooks_de_plugin:BeamEBooksDEStore'
class StoreBeWriteStore(StoreBase):
name = 'BeWrite Books'
description = _('Publishers of fine books.')
@ -1126,7 +1137,12 @@ class StoreEbookscomStore(StoreBase):
description = _('The digital bookstore.')
actual_plugin = 'calibre.gui2.store.ebooks_com_plugin:EbookscomStore'
class StoreEHarlequinStoretore(StoreBase):
class StoreEPubBuyDEStore(StoreBase):
name = 'EPUBBuy DE'
description = _('EPUBReaders eBook Shop')
actual_plugin = 'calibre.gui2.store.epubbuy_de_plugin:EPubBuyDEStore'
class StoreEHarlequinStore(StoreBase):
name = 'eHarlequin'
description = _('entertain, enrich, inspire.')
actual_plugin = 'calibre.gui2.store.eharlequin_plugin:EHarlequinStore'
@ -1136,6 +1152,11 @@ class StoreFeedbooksStore(StoreBase):
description = _('Read anywhere.')
actual_plugin = 'calibre.gui2.store.feedbooks_plugin:FeedbooksStore'
class StoreFoylesUKStore(StoreBase):
name = 'Foyles UK'
description = _('Foyles of London, online')
actual_plugin = 'calibre.gui2.store.foyles_uk_plugin:FoylesUKStore'
class StoreGutenbergStore(StoreBase):
name = 'Project Gutenberg'
description = _('The first producer of free ebooks.')
@ -1171,22 +1192,23 @@ class StoreWaterstonesUKStore(StoreBase):
description = _('Feel every word')
actual_plugin = 'calibre.gui2.store.waterstones_uk_plugin:WaterstonesUKStore'
class StoreFoylesUKStore(StoreBase):
name = 'Foyles UK'
description = _('Foyles of London, online')
actual_plugin = 'calibre.gui2.store.foyles_uk_plugin:FoylesUKStore'
class StoreWeightlessBooksStore(StoreBase):
name = 'Weightless Books'
description = '(e)Books That Don\'t Weigh You Down'
actual_plugin = 'calibre.gui2.store.weightless_books_plugin:WeightlessBooksStore'
class AmazonDEKindleStore(StoreBase):
name = 'Amazon DE Kindle'
description = _('Kindle eBooks')
actual_plugin = 'calibre.gui2.store.amazon_de_plugin:AmazonDEKindleStore'
class StoreWizardsTowerBooksStore(StoreBase):
name = 'Wizards Tower Books'
description = 'Wizard\'s Tower Press'
actual_plugin = 'calibre.gui2.store.wizards_tower_books_plugin:WizardsTowerBooksStore'
plugins += [StoreAmazonKindleStore, AmazonDEKindleStore, StoreAmazonUKKindleStore,
plugins += [StoreAmazonKindleStore, StoreAmazonDEKindleStore, StoreAmazonUKKindleStore,
StoreBaenWebScriptionStore, StoreBNStore,
StoreBeWriteStore, StoreDieselEbooksStore, StoreEbookscomStore,
StoreEHarlequinStoretore, StoreFeedbooksStore,
StoreBeamEBooksDEStore, StoreBeWriteStore,
StoreDieselEbooksStore, StoreEbookscomStore, StoreEPubBuyDEStore,
StoreEHarlequinStore, StoreFeedbooksStore,
StoreFoylesUKStore, StoreGutenbergStore, StoreKoboStore, StoreManyBooksStore,
StoreMobileReadStore, StoreOpenLibraryStore, StoreSmashwordsStore,
StoreWaterstonesUKStore]
StoreWaterstonesUKStore, StoreWeightlessBooksStore, StoreWizardsTowerBooksStore]
# }}}

View File

@ -253,7 +253,7 @@ class OutputProfile(Plugin):
periodical_date_in_title = True
#: Characters used in jackets and catalogs
missing_char = u'x'
missing_char = u'x'
ratings_char = u'*'
empty_ratings_char = u' '
read_char = u'+'
@ -293,38 +293,38 @@ class iPadOutput(OutputProfile):
}
]
missing_char = u'\u2715\u200a' # stylized 'x' plus hair space
ratings_char = u'\u2605' # filled star
empty_ratings_char = u'\u2606' # hollow star
read_char = u'\u2713' # check mark
missing_char = u'\u2715\u200a' # stylized 'x' plus hair space
ratings_char = u'\u2605' # filled star
empty_ratings_char = u'\u2606' # hollow star
read_char = u'\u2713' # check mark
touchscreen = True
# touchscreen_news_css {{{
touchscreen_news_css = u'''
/* hr used in articles */
.article_articles_list {
/* hr used in articles */
.article_articles_list {
width:18%;
}
}
.article_link {
color: #593f29;
color: #593f29;
font-style: italic;
}
.article_next {
-webkit-border-top-right-radius:4px;
-webkit-border-bottom-right-radius:4px;
-webkit-border-top-right-radius:4px;
-webkit-border-bottom-right-radius:4px;
font-style: italic;
width:32%;
}
.article_prev {
-webkit-border-top-left-radius:4px;
-webkit-border-bottom-left-radius:4px;
-webkit-border-top-left-radius:4px;
-webkit-border-bottom-left-radius:4px;
font-style: italic;
width:32%;
}
.article_sections_list {
.article_sections_list {
width:18%;
}
}
.articles_link {
font-weight: bold;
}
@ -334,8 +334,8 @@ class iPadOutput(OutputProfile):
.caption_divider {
border:#ccc 1px solid;
}
border:#ccc 1px solid;
}
.touchscreen_navbar {
background:#c3bab2;
@ -357,50 +357,50 @@ class iPadOutput(OutputProfile):
text-align:center;
}
.touchscreen_navbar td a:link {
color: #593f29;
text-decoration: none;
}
.touchscreen_navbar td a:link {
color: #593f29;
text-decoration: none;
}
/* Index formatting */
.publish_date {
text-align:center;
}
.divider {
border-bottom:1em solid white;
border-top:1px solid gray;
}
/* Index formatting */
.publish_date {
text-align:center;
}
.divider {
border-bottom:1em solid white;
border-top:1px solid gray;
}
hr.caption_divider {
border-color:black;
border-style:solid;
border-width:1px;
}
hr.caption_divider {
border-color:black;
border-style:solid;
border-width:1px;
}
/* Feed summary formatting */
.article_summary {
display:inline-block;
}
display:inline-block;
}
.feed {
font-family:sans-serif;
font-weight:bold;
font-size:larger;
}
}
.feed_link {
font-style: italic;
}
.feed_next {
-webkit-border-top-right-radius:4px;
-webkit-border-bottom-right-radius:4px;
-webkit-border-top-right-radius:4px;
-webkit-border-bottom-right-radius:4px;
font-style: italic;
width:40%;
}
.feed_prev {
-webkit-border-top-left-radius:4px;
-webkit-border-bottom-left-radius:4px;
-webkit-border-top-left-radius:4px;
-webkit-border-bottom-left-radius:4px;
font-style: italic;
width:40%;
}
@ -410,24 +410,24 @@ class iPadOutput(OutputProfile):
font-size: 160%;
}
.feed_up {
.feed_up {
font-weight: bold;
width:20%;
}
}
.summary_headline {
font-weight:bold;
text-align:left;
}
}
.summary_byline {
text-align:left;
font-family:monospace;
}
}
.summary_text {
text-align:left;
}
}
'''
# }}}
@ -617,8 +617,8 @@ class KindleOutput(OutputProfile):
supports_mobi_indexing = True
periodical_date_in_title = False
missing_char = u'x\u2009'
empty_ratings_char = u'\u2606'
missing_char = u'x\u2009'
empty_ratings_char = u'\u2606'
ratings_char = u'\u2605'
read_char = u'\u2713'
@ -642,8 +642,8 @@ class KindleDXOutput(OutputProfile):
#comic_screen_size = (741, 1022)
supports_mobi_indexing = True
periodical_date_in_title = False
missing_char = u'x\u2009'
empty_ratings_char = u'\u2606'
missing_char = u'x\u2009'
empty_ratings_char = u'\u2606'
ratings_char = u'\u2605'
read_char = u'\u2713'
mobi_ems_per_blockquote = 2.0

View File

@ -92,8 +92,7 @@ def restore_plugin_state_to_default(plugin_or_name):
config['enabled_plugins'] = ep
default_disabled_plugins = set([
'Douban Books', 'Douban.com covers', 'Nicebooks', 'Nicebooks covers',
'Kent District Library'
'Overdrive', 'Douban Books',
])
def is_disabled(plugin):

View File

@ -109,7 +109,7 @@ class ANDROID(USBMS):
'SGH-T849', '_MB300', 'A70S', 'S_ANDROID', 'A101IT', 'A70H',
'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE', 'SCH-I800_CARD',
'7', 'A956', 'A955', 'A43', 'ANDROID_PLATFORM', 'TEGRA_2',
'MB860', 'MULTI-CARD', 'MID7015A', 'INCREDIBLE', 'A7EB']
'MB860', 'MULTI-CARD', 'MID7015A', 'INCREDIBLE', 'A7EB', 'STREAK']
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD']

View File

@ -203,9 +203,11 @@ class ITUNES(DriverBase):
# 0x1294 iPhone 3GS
# 0x1297 iPhone 4
# 0x129a iPad
# 0x12a2 iPad2
# 0x129f iPad2 (WiFi)
# 0x12a2 iPad2 (GSM)
# 0x12a3 iPad2 (CDMA)
VENDOR_ID = [0x05ac]
PRODUCT_ID = [0x1292,0x1293,0x1294,0x1297,0x1299,0x129a,0x12a2]
PRODUCT_ID = [0x1292,0x1293,0x1294,0x1297,0x1299,0x129a,0x129f,0x12a2,0x12a3]
BCD = [0x01]
# Plugboard ID
@ -506,7 +508,7 @@ class ITUNES(DriverBase):
if self.iTunes:
# Check for connected book-capable device
self.sources = self._get_sources()
if 'iPod' in self.sources:
if 'iPod' in self.sources and not self.ejected:
#if DEBUG:
#sys.stdout.write('.')
#sys.stdout.flush()
@ -2036,16 +2038,17 @@ class ITUNES(DriverBase):
if 'iPod' in self.sources:
connected_device = self.sources['iPod']
device = self.iTunes.sources[connected_device]
dev_books = None
for pl in device.playlists():
if pl.special_kind() == appscript.k.Books:
if DEBUG:
self.log.info(" Book playlist: '%s'" % (pl.name()))
books = pl.file_tracks()
dev_books = pl.file_tracks()
break
else:
self.log.error(" book_playlist not found")
for book in books:
for book in dev_books:
# This may need additional entries for international iTunes users
if book.kind() in self.Audiobooks:
if DEBUG:

View File

@ -64,7 +64,7 @@ class HANLINV3(USBMS):
return names
def linux_swap_drives(self, drives):
if len(drives) < 2: return drives
if len(drives) < 2 or not drives[1] or not drives[2]: return drives
drives = list(drives)
t = drives[0]
drives[0] = drives[1]
@ -95,7 +95,6 @@ class HANLINV5(HANLINV3):
gui_name = 'Hanlin V5'
description = _('Communicate with Hanlin V5 eBook readers.')
VENDOR_ID = [0x0492]
PRODUCT_ID = [0x8813]
BCD = [0x319]

View File

@ -164,7 +164,7 @@ class APNXBuilder(object):
if c == '/':
closing = True
continue
elif c in ('d', 'p'):
elif c == 'p':
if closing:
in_p = False
else:

View File

@ -38,7 +38,7 @@ class KOBO(USBMS):
VENDOR_ID = [0x2237]
PRODUCT_ID = [0x4161]
BCD = [0x0110]
BCD = [0x0110, 0x0323]
VENDOR_NAME = ['KOBO_INC', 'KOBO']
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['.KOBOEREADER', 'EREADER']

View File

@ -68,9 +68,9 @@ class USER_DEFINED(USBMS):
'is prepended to any send_to_device template') + '</p>',
]
EXTRA_CUSTOMIZATION_DEFAULT = [
'0x0000',
'0x0000',
'0x0000',
'0xffff',
'0xffff',
'0xffff',
None,
'',
'',

View File

@ -68,7 +68,8 @@ def check_command_line_options(parser, args, log):
raise SystemExit(1)
output = args[2]
if output.startswith('.') and output != '.':
if output.startswith('.') and (output != '.' and not
output.startswith('..')):
output = os.path.splitext(os.path.basename(input))[0]+output
output = os.path.abspath(output)

View File

@ -103,10 +103,11 @@ class EPUBInput(InputFormatPlugin):
t.set('href', guide_cover)
t.set('title', 'Title Page')
from calibre.ebooks import render_html_svg_workaround
renderer = render_html_svg_workaround(guide_cover, log)
if renderer is not None:
open('calibre_raster_cover.jpg', 'wb').write(
renderer)
if os.path.exists(guide_cover):
renderer = render_html_svg_workaround(guide_cover, log)
if renderer is not None:
open('calibre_raster_cover.jpg', 'wb').write(
renderer)
def find_opf(self):
def attr(n, attr):

View File

@ -7,7 +7,6 @@ __copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import os
import posixpath
from calibre import guess_type, walk
from calibre.customize.conversion import InputFormatPlugin
@ -74,22 +73,23 @@ class HTMLZInput(InputFormatPlugin):
meta_info_to_oeb_metadata(mi, oeb.metadata, log)
# Get the cover path from the OPF.
cover_href = None
cover_path = None
opf = None
for x in walk('.'):
if os.path.splitext(x)[1].lower() in ('.opf'):
opf = x
break
if opf:
opf = OPF(opf)
cover_href = posixpath.relpath(opf.cover, os.path.dirname(stream.name))
opf = OPF(opf, basedir=os.getcwd())
cover_path = opf.raster_cover
# Set the cover.
if cover_href:
if cover_path:
cdata = None
with open(cover_href, 'rb') as cf:
with open(os.path.join(os.getcwd(), cover_path), 'rb') as cf:
cdata = cf.read()
id, href = oeb.manifest.generate('cover', cover_href)
oeb.manifest.add(id, href, guess_type(cover_href)[0], data=cdata)
cover_name = os.path.basename(cover_path)
id, href = oeb.manifest.generate('cover', cover_name)
oeb.manifest.add(id, href, guess_type(cover_name)[0], data=cdata)
oeb.guide.add('cover', 'Cover', href)
return oeb

View File

@ -83,6 +83,7 @@ class ArchiveExtract(FileTypePlugin):
return of.name
def get_comic_book_info(d, mi):
# See http://code.google.com/p/comicbookinfo/wiki/Example
series = d.get('series', '')
if series.strip():
mi.series = series
@ -111,6 +112,7 @@ def get_comic_book_info(d, mi):
def get_cbz_metadata(stream):
# See http://code.google.com/p/comicbookinfo/wiki/Example
from calibre.utils.zipfile import ZipFile
from calibre.ebooks.metadata import MetaInformation
import json

View File

@ -112,10 +112,15 @@ class Metadata(object):
Be careful with numeric fields since this will return True for zero as
well as None.
Also returns True if the field does not exist.
'''
null_val = NULL_VALUES.get(field, None)
val = getattr(self, field, None)
return not val or val == null_val
try:
null_val = NULL_VALUES.get(field, None)
val = getattr(self, field, None)
return not val or val == null_val
except:
return True
def __getattribute__(self, field):
_data = object.__getattribute__(self, '_data')

View File

@ -8,12 +8,11 @@ Read meta information from extZ (TXTZ, HTMLZ...) files.
'''
import os
import posixpath
from cStringIO import StringIO
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.opf2 import OPF, metadata_to_opf
from calibre.ebooks.metadata.opf2 import OPF
from calibre.ptempfile import PersistentTemporaryFile
from calibre.utils.zipfile import ZipFile, safe_replace
@ -31,9 +30,9 @@ def get_metadata(stream, extract_cover=True):
opf = OPF(opf_stream)
mi = opf.to_book_metadata()
if extract_cover:
cover_href = posixpath.relpath(opf.cover, os.path.dirname(stream.name))
cover_href = opf.raster_cover
if cover_href:
mi.cover_data = ('jpg', zf.read(cover_href))
mi.cover_data = (os.path.splitext(cover_href)[1], zf.read(cover_href))
except:
return mi
return mi
@ -59,18 +58,15 @@ def set_metadata(stream, mi):
except:
pass
if new_cdata:
cover = opf.cover
if not cover:
cover = 'cover.jpg'
cpath = posixpath.join(posixpath.dirname(opf_path), cover)
cpath = opf.raster_cover
if not cpath:
cpath = 'cover.jpg'
new_cover = _write_new_cover(new_cdata, cpath)
replacements[cpath] = open(new_cover.name, 'rb')
mi.cover = cover
mi.cover = cpath
# Update the metadata.
old_mi = opf.to_book_metadata()
old_mi.smart_update(mi)
opf.smart_update(metadata_to_opf(old_mi))
opf.smart_update(mi, replace_metadata=True)
newopf = StringIO(opf.render())
safe_replace(stream, opf_path, newopf, extra_replacements=replacements, add_missing=True)

View File

@ -16,7 +16,7 @@ from lxml.html import soupparser, tostring
from calibre import as_unicode
from calibre.ebooks.metadata import check_isbn
from calibre.ebooks.metadata.sources.base import Source
from calibre.ebooks.metadata.sources.base import Source, Option
from calibre.utils.cleantext import clean_ascii_chars
from calibre.ebooks.chardet import xml_to_unicode
from calibre.ebooks.metadata.book.base import Metadata
@ -37,6 +37,92 @@ class Worker(Thread): # Get details {{{
self.relevance, self.plugin = relevance, plugin
self.browser = browser.clone_browser()
self.cover_url = self.amazon_id = self.isbn = None
self.domain = self.plugin.domain
months = {
'de': {
1 : ['jän'],
3 : ['märz'],
5 : ['mai'],
6 : ['juni'],
7 : ['juli'],
10: ['okt'],
12: ['dez']
},
'it': {
1: ['enn'],
2: ['febbr'],
5: ['magg'],
6: ['giugno'],
7: ['luglio'],
8: ['ag'],
9: ['sett'],
10: ['ott'],
12: ['dic'],
},
'fr': {
1: ['janv'],
2: ['févr'],
3: ['mars'],
4: ['avril'],
5: ['mai'],
6: ['juin'],
7: ['juil'],
8: ['août'],
9: ['sept'],
12: ['déc'],
},
}
self.english_months = [None, 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
self.months = months.get(self.domain, {})
self.pd_xpath = '''
//h2[text()="Product Details" or \
text()="Produktinformation" or \
text()="Dettagli prodotto" or \
text()="Product details" or \
text()="Détails sur le produit"]/../div[@class="content"]
'''
self.publisher_xpath = '''
descendant::*[starts-with(text(), "Publisher:") or \
starts-with(text(), "Verlag:") or \
starts-with(text(), "Editore:") or \
starts-with(text(), "Editeur")]
'''
self.language_xpath = '''
descendant::*[
starts-with(text(), "Language:") \
or text() = "Language" \
or text() = "Sprache:" \
or text() = "Lingua:" \
or starts-with(text(), "Langue") \
]
'''
self.ratings_pat = re.compile(
r'([0-9.]+) (out of|von|su|étoiles sur) (\d+)( (stars|Sternen|stelle)){0,1}')
lm = {
'en': ('English', 'Englisch'),
'fr': ('French', 'Français'),
'it': ('Italian', 'Italiano'),
'de': ('German', 'Deutsch'),
}
self.lang_map = {}
for code, names in lm.iteritems():
for name in names:
self.lang_map[name] = code
def delocalize_datestr(self, raw):
if not self.months:
return raw
ans = raw.lower()
for i, vals in self.months.iteritems():
for x in vals:
ans = ans.replace(x, self.english_months[i])
return ans
def run(self):
try:
@ -132,7 +218,7 @@ class Worker(Thread): # Get details {{{
self.log.exception('Error parsing cover for url: %r'%self.url)
mi.has_cover = bool(self.cover_url)
pd = root.xpath('//h2[text()="Product Details"]/../div[@class="content"]')
pd = root.xpath(self.pd_xpath)
if pd:
pd = pd[0]
@ -194,30 +280,42 @@ class Worker(Thread): # Get details {{{
def parse_authors(self, root):
x = '//h1[@class="parseasinTitle"]/following-sibling::span/*[(name()="a" and @href) or (name()="span" and @class="contributorNameTrigger")]'
aname = root.xpath(x)
if not aname:
aname = root.xpath('''
//h1[@class="parseasinTitle"]/following-sibling::*[(name()="a" and @href) or (name()="span" and @class="contributorNameTrigger")]
''')
for x in aname:
x.tail = ''
authors = [tostring(x, encoding=unicode, method='text').strip() for x
in aname]
authors = [a for a in authors if a]
return authors
def parse_rating(self, root):
ratings = root.xpath('//div[@class="jumpBar"]/descendant::span[@class="asinReviewsSummary"]')
pat = re.compile(r'([0-9.]+) out of (\d+) stars')
if not ratings:
ratings = root.xpath('//div[@class="buying"]/descendant::span[@class="asinReviewsSummary"]')
if not ratings:
ratings = root.xpath('//span[@class="crAvgStars"]/descendant::span[@class="asinReviewsSummary"]')
if ratings:
for elem in ratings[0].xpath('descendant::*[@title]'):
t = elem.get('title').strip()
m = pat.match(t)
m = self.ratings_pat.match(t)
if m is not None:
return float(m.group(1))/float(m.group(2)) * 5
return float(m.group(1))/float(m.group(3)) * 5
def parse_comments(self, root):
desc = root.xpath('//div[@id="productDescription"]/*[@class="content"]')
if desc:
desc = desc[0]
for c in desc.xpath('descendant::*[@class="seeAll" or'
' @class="emptyClear" or @href]'):
' @class="emptyClear"]'):
c.getparent().remove(c)
for a in desc.xpath('descendant::a[@href]'):
del a.attrib['href']
a.tag = 'span'
desc = tostring(desc, method='html', encoding=unicode).strip()
# Encoding bug in Amazon data U+fffd (replacement char)
# in some examples it is present in place of '
desc = desc.replace('\ufffd', "'")
@ -246,41 +344,44 @@ class Worker(Thread): # Get details {{{
return ('/'.join(parts[:-1]))+'/'+bn
def parse_isbn(self, pd):
for x in reversed(pd.xpath(
'descendant::*[starts-with(text(), "ISBN")]')):
items = pd.xpath(
'descendant::*[starts-with(text(), "ISBN")]')
if not items:
items = pd.xpath(
'descendant::b[contains(text(), "ISBN:")]')
for x in reversed(items):
if x.tail:
ans = check_isbn(x.tail.strip())
if ans:
return ans
def parse_publisher(self, pd):
for x in reversed(pd.xpath(
'descendant::*[starts-with(text(), "Publisher:")]')):
for x in reversed(pd.xpath(self.publisher_xpath)):
if x.tail:
ans = x.tail.partition(';')[0]
return ans.partition('(')[0].strip()
def parse_pubdate(self, pd):
for x in reversed(pd.xpath(
'descendant::*[starts-with(text(), "Publisher:")]')):
for x in reversed(pd.xpath(self.publisher_xpath)):
if x.tail:
ans = x.tail
date = ans.partition('(')[-1].replace(')', '').strip()
date = self.delocalize_datestr(date)
return parse_date(date, assume_utc=True)
def parse_language(self, pd):
for x in reversed(pd.xpath(
'descendant::*[starts-with(text(), "Language:")]')):
for x in reversed(pd.xpath(self.language_xpath)):
if x.tail:
ans = x.tail.strip()
if ans == 'English':
return 'en'
ans = self.lang_map.get(ans, None)
if ans:
return ans
# }}}
class Amazon(Source):
name = 'Amazon.com'
description = _('Downloads metadata from Amazon')
description = _('Downloads metadata and covers from Amazon')
capabilities = frozenset(['identify', 'cover'])
touched_fields = frozenset(['title', 'authors', 'identifier:amazon',
@ -294,8 +395,15 @@ class Amazon(Source):
'fr' : _('France'),
'de' : _('Germany'),
'uk' : _('UK'),
'it' : _('Italy'),
}
options = (
Option('domain', 'choices', 'com', _('Amazon website to use:'),
_('Metadata from Amazon will be fetched using this '
'country\'s Amazon website.'), choices=AMAZON_DOMAINS),
)
def get_book_url(self, identifiers): # {{{
asin = identifiers.get('amazon', None)
if asin is None:
@ -304,8 +412,16 @@ class Amazon(Source):
return ('amazon', asin, 'http://amzn.com/%s'%asin)
# }}}
@property
def domain(self):
domain = self.prefs['domain']
if domain not in self.AMAZON_DOMAINS:
domain = 'com'
return domain
def create_query(self, log, title=None, authors=None, identifiers={}): # {{{
domain = self.prefs.get('domain', 'com')
domain = self.domain
# See the amazon detailed search page to get all options
q = { 'search-alias' : 'aps',
@ -338,13 +454,15 @@ class Amazon(Source):
q['field-author'] = ' '.join(author_tokens)
if not ('field-keywords' in q or 'field-isbn' in q or
('field-title' in q and 'field-author' in q)):
('field-title' in q)):
# Insufficient metadata to make an identify query
return None
latin1q = dict([(x.encode('latin1', 'ignore'), y.encode('latin1',
'ignore')) for x, y in
q.iteritems()])
if domain == 'uk':
domain = 'co.uk'
url = 'http://www.amazon.%s/s/?'%domain + urlencode(latin1q)
return url
@ -516,11 +634,19 @@ if __name__ == '__main__': # tests {{{
# src/calibre/ebooks/metadata/sources/amazon.py
from calibre.ebooks.metadata.sources.test import (test_identify_plugin,
title_test, authors_test)
test_identify_plugin(Amazon.name,
[
com_tests = [ # {{{
( # An e-book ISBN not on Amazon, one of the authors is
# unknown to Amazon, so no popup wrapper
( # Description has links
{'identifiers':{'isbn': '9780671578275'}},
[title_test('A Civil Campaign: A Comedy of Biology and Manners',
exact=True), authors_test(['Lois McMaster Bujold'])
]
),
( # An e-book ISBN not on Amazon, the title/author search matches
# the Kindle edition, which has different markup for ratings and
# isbn
{'identifiers':{'isbn': '9780307459671'},
'title':'Invisible Gorilla', 'authors':['Christopher Chabris']},
[title_test('The Invisible Gorilla: And Other Ways Our Intuitions Deceive Us',
@ -556,6 +682,38 @@ if __name__ == '__main__': # tests {{{
),
])
] # }}}
de_tests = [ # {{{
(
{'identifiers':{'isbn': '3548283519'}},
[title_test('Wer Wind sät',
exact=True), authors_test(['Nele Neuhaus'])
]
),
] # }}}
it_tests = [ # {{{
(
{'identifiers':{'isbn': '8838922195'}},
[title_test('La briscola in cinque',
exact=True), authors_test(['Marco Malvaldi'])
]
),
] # }}}
fr_tests = [ # {{{
(
{'identifiers':{'isbn': '2221116798'}},
[title_test('L\'étrange voyage de Monsieur Daldry',
exact=True), authors_test(['Marc Levy'])
]
),
] # }}}
test_identify_plugin(Amazon.name, com_tests)
# }}}

View File

@ -145,10 +145,13 @@ class Option(object):
:param default: The default value for this option
:param label: A short (few words) description of this option
:param desc: A longer description of this option
:param choices: A list of possible values, used only if type='choices'
:param choices: A dict of possible values, used only if type='choices'.
dict is of the form {key:human readable label, ...}
'''
self.name, self.type, self.default, self.label, self.desc = (name,
type_, default, label, desc)
if choices and not isinstance(choices, dict):
choices = dict([(x, x) for x in choices])
self.choices = choices
class Source(Plugin):
@ -212,6 +215,9 @@ class Source(Plugin):
def is_customizable(self):
return True
def customization_help(self):
return 'This plugin can only be customized using the GUI'
def config_widget(self):
from calibre.gui2.metadata.config import ConfigWidget
return ConfigWidget(self)
@ -288,10 +294,10 @@ class Source(Plugin):
parts = parts[1:] + parts[:1]
for tok in parts:
tok = remove_pat.sub('', tok).strip()
if len(tok) > 2 and tok.lower() not in ('von', ):
if len(tok) > 2 and tok.lower() not in ('von', 'van',
_('Unknown').lower()):
yield tok
def get_title_tokens(self, title, strip_joiners=True, strip_subtitle=False):
'''
Take a title and return a list of tokens useful for an AND search query.

View File

@ -0,0 +1,347 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>; 2011, Li Fanxi <lifanxi@freemindworld.com>'
__docformat__ = 'restructuredtext en'
import time
from urllib import urlencode
from functools import partial
from Queue import Queue, Empty
from lxml import etree
from calibre.ebooks.metadata import check_isbn
from calibre.ebooks.metadata.sources.base import Source
from calibre.ebooks.metadata.book.base import Metadata
from calibre.ebooks.chardet import xml_to_unicode
from calibre.utils.date import parse_date, utcnow
from calibre.utils.cleantext import clean_ascii_chars
from calibre import as_unicode
NAMESPACES = {
'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
'atom' : 'http://www.w3.org/2005/Atom',
'db': 'http://www.douban.com/xmlns/',
'gd': 'http://schemas.google.com/g/2005'
}
XPath = partial(etree.XPath, namespaces=NAMESPACES)
total_results = XPath('//openSearch:totalResults')
start_index = XPath('//openSearch:startIndex')
items_per_page = XPath('//openSearch:itemsPerPage')
entry = XPath('//atom:entry')
entry_id = XPath('descendant::atom:id')
title = XPath('descendant::atom:title')
description = XPath('descendant::atom:summary')
publisher = XPath("descendant::db:attribute[@name='publisher']")
isbn = XPath("descendant::db:attribute[@name='isbn13']")
date = XPath("descendant::db:attribute[@name='pubdate']")
creator = XPath("descendant::db:attribute[@name='author']")
booktag = XPath("descendant::db:tag/attribute::name")
rating = XPath("descendant::gd:rating/attribute::average")
cover_url = XPath("descendant::atom:link[@rel='image']/attribute::href")
def get_details(browser, url, timeout): # {{{
try:
raw = browser.open_novisit(url, timeout=timeout).read()
except Exception as e:
gc = getattr(e, 'getcode', lambda : -1)
if gc() != 403:
raise
# Douban is throttling us, wait a little
time.sleep(2)
raw = browser.open_novisit(url, timeout=timeout).read()
return raw
# }}}
def to_metadata(browser, log, entry_, timeout): # {{{
def get_text(extra, x):
try:
ans = x(extra)
if ans:
ans = ans[0].text
if ans and ans.strip():
return ans.strip()
except:
log.exception('Programming error:')
return None
id_url = entry_id(entry_)[0].text
douban_id = id_url.split('/')[-1]
title_ = ': '.join([x.text for x in title(entry_)]).strip()
authors = [x.text.strip() for x in creator(entry_) if x.text]
if not authors:
authors = [_('Unknown')]
if not id_url or not title:
# Silently discard this entry
return None
mi = Metadata(title_, authors)
mi.identifiers = {'douban':douban_id}
try:
raw = get_details(browser, id_url, timeout)
feed = etree.fromstring(xml_to_unicode(clean_ascii_chars(raw),
strip_encoding_pats=True)[0])
extra = entry(feed)[0]
except:
log.exception('Failed to get additional details for', mi.title)
return mi
mi.comments = get_text(extra, description)
mi.publisher = get_text(extra, publisher)
# ISBN
isbns = []
for x in [t.text for t in isbn(extra)]:
if check_isbn(x):
isbns.append(x)
if isbns:
mi.isbn = sorted(isbns, key=len)[-1]
mi.all_isbns = isbns
# Tags
try:
btags = [x for x in booktag(extra) if x]
tags = []
for t in btags:
atags = [y.strip() for y in t.split('/')]
for tag in atags:
if tag not in tags:
tags.append(tag)
except:
log.exception('Failed to parse tags:')
tags = []
if tags:
mi.tags = [x.replace(',', ';') for x in tags]
# pubdate
pubdate = get_text(extra, date)
if pubdate:
try:
default = utcnow().replace(day=15)
mi.pubdate = parse_date(pubdate, assume_utc=True, default=default)
except:
log.error('Failed to parse pubdate %r'%pubdate)
# Ratings
if rating(extra):
try:
mi.rating = float(rating(extra)[0]) / 2.0
except:
log.exception('Failed to parse rating')
mi.rating = 0
# Cover
mi.has_douban_cover = None
u = cover_url(extra)
if u:
u = u[0].replace('/spic/', '/lpic/');
# If URL contains "book-default", the book doesn't have a cover
if u.find('book-default') == -1:
mi.has_douban_cover = u
return mi
# }}}
class Douban(Source):
name = 'Douban Books'
author = 'Li Fanxi'
version = (2, 0, 0)
description = _('Downloads metadata and covers from Douban.com')
capabilities = frozenset(['identify', 'cover'])
touched_fields = frozenset(['title', 'authors', 'tags',
'pubdate', 'comments', 'publisher', 'identifier:isbn', 'rating',
'identifier:douban']) # language currently disabled
supports_gzip_transfer_encoding = True
cached_cover_url_is_reliable = True
DOUBAN_API_KEY = '0bd1672394eb1ebf2374356abec15c3d'
DOUBAN_BOOK_URL = 'http://book.douban.com/subject/%s/'
def get_book_url(self, identifiers): # {{{
db = identifiers.get('douban', None)
if db is not None:
return ('douban', db, self.DOUBAN_BOOK_URL%db)
# }}}
def create_query(self, log, title=None, authors=None, identifiers={}): # {{{
SEARCH_URL = 'http://api.douban.com/book/subjects?'
ISBN_URL = 'http://api.douban.com/book/subject/isbn/'
SUBJECT_URL = 'http://api.douban.com/book/subject/'
q = ''
t = None
isbn = check_isbn(identifiers.get('isbn', None))
subject = identifiers.get('douban', None)
if isbn is not None:
q = isbn
t = 'isbn'
elif subject is not None:
q = subject
t = 'subject'
elif title or authors:
def build_term(prefix, parts):
return ' '.join(x for x in parts)
title_tokens = list(self.get_title_tokens(title))
if title_tokens:
q += build_term('title', title_tokens)
author_tokens = self.get_author_tokens(authors,
only_first_author=True)
if author_tokens:
q += ((' ' if q != '' else '') +
build_term('author', author_tokens))
t = 'search'
q = q.strip()
if isinstance(q, unicode):
q = q.encode('utf-8')
if not q:
return None
url = None
if t == "isbn":
url = ISBN_URL + q
elif t == 'subject':
url = SUBJECT_URL + q
else:
url = SEARCH_URL + urlencode({
'q': q,
})
if self.DOUBAN_API_KEY and self.DOUBAN_API_KEY != '':
url = url + "?apikey=" + self.DOUBAN_API_KEY
return url
# }}}
def download_cover(self, log, result_queue, abort, # {{{
title=None, authors=None, identifiers={}, timeout=30):
cached_url = self.get_cached_cover_url(identifiers)
if cached_url is None:
log.info('No cached cover found, running identify')
rq = Queue()
self.identify(log, rq, abort, title=title, authors=authors,
identifiers=identifiers)
if abort.is_set():
return
results = []
while True:
try:
results.append(rq.get_nowait())
except Empty:
break
results.sort(key=self.identify_results_keygen(
title=title, authors=authors, identifiers=identifiers))
for mi in results:
cached_url = self.get_cached_cover_url(mi.identifiers)
if cached_url is not None:
break
if cached_url is None:
log.info('No cover found')
return
if abort.is_set():
return
br = self.browser
log('Downloading cover from:', cached_url)
try:
cdata = br.open_novisit(cached_url, timeout=timeout).read()
if cdata:
result_queue.put((self, cdata))
except:
log.exception('Failed to download cover from:', cached_url)
# }}}
def get_cached_cover_url(self, identifiers): # {{{
url = None
db = identifiers.get('douban', None)
if db is None:
isbn = identifiers.get('isbn', None)
if isbn is not None:
db = self.cached_isbn_to_identifier(isbn)
if db is not None:
url = self.cached_identifier_to_cover_url(db)
return url
# }}}
def get_all_details(self, br, log, entries, abort, # {{{
result_queue, timeout):
for relevance, i in enumerate(entries):
try:
ans = to_metadata(br, log, i, timeout)
if isinstance(ans, Metadata):
ans.source_relevance = relevance
db = ans.identifiers['douban']
for isbn in getattr(ans, 'all_isbns', []):
self.cache_isbn_to_identifier(isbn, db)
if ans.has_douban_cover:
self.cache_identifier_to_cover_url(db,
ans.has_douban_cover)
self.clean_downloaded_metadata(ans)
result_queue.put(ans)
except:
log.exception(
'Failed to get metadata for identify entry:',
etree.tostring(i))
if abort.is_set():
break
# }}}
def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
identifiers={}, timeout=30):
query = self.create_query(log, title=title, authors=authors,
identifiers=identifiers)
if not query:
log.error('Insufficient metadata to construct query')
return
br = self.browser
try:
raw = br.open_novisit(query, timeout=timeout).read()
except Exception as e:
log.exception('Failed to make identify query: %r'%query)
return as_unicode(e)
try:
parser = etree.XMLParser(recover=True, no_network=True)
feed = etree.fromstring(xml_to_unicode(clean_ascii_chars(raw),
strip_encoding_pats=True)[0], parser=parser)
entries = entry(feed)
except Exception as e:
log.exception('Failed to parse identify results')
return as_unicode(e)
if not entries and identifiers and title and authors and \
not abort.is_set():
return self.identify(log, result_queue, abort, title=title,
authors=authors, timeout=timeout)
# There is no point running these queries in threads as douban
# throttles requests returning 403 Forbidden errors
self.get_all_details(br, log, entries, abort, result_queue, timeout)
return None
# }}}
if __name__ == '__main__': # tests {{{
# To run these test use: calibre-debug -e src/calibre/ebooks/metadata/sources/douban.py
from calibre.ebooks.metadata.sources.test import (test_identify_plugin,
title_test, authors_test)
test_identify_plugin(Douban.name,
[
(
{'identifiers':{'isbn': '9787536692930'}, 'title':'三体',
'authors':['刘慈欣']},
[title_test('三体', exact=True),
authors_test(['刘慈欣'])]
),
(
{'title': 'Linux内核修炼之道', 'authors':['任桥伟']},
[title_test('Linux内核修炼之道', exact=False)]
),
])
# }}}

View File

@ -157,7 +157,7 @@ def to_metadata(browser, log, entry_, timeout): # {{{
class GoogleBooks(Source):
name = 'Google'
description = _('Downloads metadata from Google Books')
description = _('Downloads metadata and covers from Google Books')
capabilities = frozenset(['identify', 'cover'])
touched_fields = frozenset(['title', 'authors', 'tags', 'pubdate',

View File

@ -13,6 +13,7 @@ from Queue import Queue, Empty
from threading import Thread
from io import BytesIO
from operator import attrgetter
from urlparse import urlparse
from calibre.customize.ui import metadata_plugins, all_metadata_plugins
from calibre.ebooks.metadata.sources.base import create_log, msprefs
@ -371,6 +372,18 @@ def identify(log, abort, # {{{
longest, lp = -1, ''
for plugin, presults in results.iteritems():
presults.sort(key=plugin.identify_results_keygen(**sort_kwargs))
# Throw away lower priority results from the same source that have exactly the same
# title and authors as a higher priority result
filter_results = set()
filtered_results = []
for r in presults:
key = (r.title, tuple(r.authors))
if key not in filter_results:
filtered_results.append(r)
filter_results.add(key)
results[plugin] = presults = filtered_results
plog = logs[plugin].getvalue().strip()
log('\n'+'*'*30, plugin.name, '*'*30)
log('Request extra headers:', plugin.browser.addheaders)
@ -402,7 +415,7 @@ def identify(log, abort, # {{{
result.identify_plugin = plugin
if msprefs['txt_comments']:
if plugin.has_html_comments and result.comments:
result.comments = html2text(r.comments)
result.comments = html2text(result.comments)
log('The identify phase took %.2f seconds'%(time.time() - start_time))
log('The longest time (%f) was taken by:'%longest, lp)
@ -458,6 +471,14 @@ def urls_from_identifiers(identifiers): # {{{
if oclc:
ans.append(('OCLC', 'oclc', oclc,
'http://www.worldcat.org/oclc/'+oclc))
url = identifiers.get('uri', None)
if url is None:
url = identifiers.get('url', None)
if url and url.startswith('http'):
url = url[:8].replace('|', ':') + url[8:].replace('|', ',')
parts = urlparse(url)
name = parts.netloc
ans.append((name, 'url', url, url))
return ans
# }}}
@ -470,7 +491,7 @@ if __name__ == '__main__': # tests {{{
(
{'title':'Magykal Papers',
'authors':['Sage']},
[title_test('The Magykal Papers', exact=True)],
[title_test('Septimus Heap: The Magykal Papers', exact=True)],
),
@ -497,12 +518,6 @@ if __name__ == '__main__': # tests {{{
exact=True), authors_test(['Dan Brown'])]
),
( # No ISBN
{'title':'Justine', 'authors':['Durrel']},
[title_test('Justine', exact=True),
authors_test(['Lawrence Durrel'])]
),
( # A newer book
{'identifiers':{'isbn': '9780316044981'}},
[title_test('The Heroes', exact=True),

View File

@ -30,7 +30,7 @@ base_url = 'http://search.overdrive.com/'
class OverDrive(Source):
name = 'Overdrive'
description = _('Downloads metadata from Overdrive\'s Content Reserve')
description = _('Downloads metadata and covers from Overdrive\'s Content Reserve')
capabilities = frozenset(['identify', 'cover'])
touched_fields = frozenset(['title', 'authors', 'tags', 'pubdate',
@ -41,7 +41,7 @@ class OverDrive(Source):
cached_cover_url_is_reliable = True
options = (
Option('get_full_metadata', 'bool', False,
Option('get_full_metadata', 'bool', True,
_('Download all metadata (slow)'),
_('Enable this option to gather all metadata available from Overdrive.')),
)

View File

@ -191,7 +191,11 @@ class OEBReader(object):
if not scheme and href not in known:
new.add(href)
elif item.media_type in OEB_STYLES:
for url in cssutils.getUrls(item.data):
try:
urls = list(cssutils.getUrls(item.data))
except:
urls = []
for url in urls:
href, _ = urldefrag(url)
href = item.abshref(urlnormalize(href))
scheme = urlparse(href).scheme

View File

@ -32,10 +32,11 @@ class PDFInput(InputFormatPlugin):
def convert_new(self, stream, accelerators):
from calibre.ebooks.pdf.reflow import PDFDocument
from calibre.utils.cleantext import clean_ascii_chars
if pdfreflow_err:
raise RuntimeError('Failed to load pdfreflow: ' + pdfreflow_err)
pdfreflow.reflow(stream.read(), 1, -1)
xml = open('index.xml', 'rb').read()
xml = clean_ascii_chars(open('index.xml', 'rb').read())
PDFDocument(xml, self.opts, self.log)
return os.path.join(os.getcwd(), 'metadata.opf')

View File

@ -86,7 +86,7 @@ class RTFInput(InputFormatPlugin):
run_lev = 4
self.log('Running RTFParser in debug mode')
except:
pass
self.log.warn('Impossible to run RTFParser in debug mode')
parser = ParseRtf(
in_file = stream,
out_file = ofile,

View File

@ -197,8 +197,8 @@ class ProcessTokens:
# character info => ci
'b' : ('ci', 'bold______', self.bool_st_func),
'blue' : ('ci', 'blue______', self.color_func),
'caps' : ('ci', 'caps______', self.bool_st_func),
'cf' : ('ci', 'font-color', self.default_func),
'caps' : ('ci', 'caps______', self.bool_st_func),
'cf' : ('ci', 'font-color', self.colorz_func),
'chftn' : ('ci', 'footnot-mk', self.bool_st_func),
'dn' : ('ci', 'font-down_', self.divide_by_2),
'embo' : ('ci', 'emboss____', self.bool_st_func),
@ -624,6 +624,11 @@ class ProcessTokens:
num = 'true'
return 'cw<%s<%s<nu<%s\n' % (pre, token, num)
def colorz_func(self, pre, token, num):
if num is None:
num = '0'
return 'cw<%s<%s<nu<%s\n' % (pre, token, num)
def __list_type_func(self, pre, token, num):
type = 'arabic'
if num is None:

View File

@ -12,7 +12,7 @@ A Humane Web Text Generator
#__date__ = '2009/12/04'
__copyright__ = """
Copyright (c) 2011, Leigh Parry
Copyright (c) 2011, Leigh Parry <leighparry@blueyonder.co.uk>
Copyright (c) 2011, John Schember <john@nachtimwald.com>
Copyright (c) 2009, Jason Samsa, http://jsamsa.com/
Copyright (c) 2004, Roberto A. F. De Almeida, http://dealmeida.net/
@ -219,14 +219,13 @@ class Textile(object):
]
glyph_defaults = [
(re.compile(r'(\d+\'?\"?)( ?)x( ?)(?=\d+)'), r'\1\2&#215;\3'), # dimension sign
(re.compile(r'(\d+)\'', re.I), r'\1&#8242;'), # prime
(re.compile(r'(\d+)\"', re.I), r'\1&#8243;'), # prime-double
(re.compile(r'(\d+)\'(\s)', re.I), r'\1&#8242;\2'), # prime
(re.compile(r'(\d+)\"(\s)', re.I), r'\1&#8243;\2'), # prime-double
(re.compile(r'\b([A-Z][A-Z0-9]{2,})\b(?:[(]([^)]*)[)])'), r'<acronym title="\2">\1</acronym>'), # 3+ uppercase acronym
(re.compile(r'\b([A-Z][A-Z\'\-]+[A-Z])(?=[\s.,\)>])'), r'<span class="caps">\1</span>'), # 3+ uppercase
(re.compile(r'\b(\s{0,1})?\.{3}'), r'\1&#8230;'), # ellipsis
(re.compile(r'^[\*_-]{3,}$', re.M), r'<hr />'), # <hr> scene-break
(re.compile(r'\b--\b'), r'&#8212;'), # em dash
(re.compile(r'(\s)--(\s)'), r'\1&#8212;\2'), # em dash
(re.compile(r'(^|[^-])--([^-]|$)'), r'\1&#8212;\2'), # em dash
(re.compile(r'\s-(?:\s|$)'), r' &#8211; '), # en dash
(re.compile(r'\b( ?)[([]TM[])]', re.I), r'\1&#8482;'), # trademark
(re.compile(r'\b( ?)[([]R[])]', re.I), r'\1&#174;'), # registered
@ -706,6 +705,21 @@ class Textile(object):
result.append(line)
return ''.join(result)
def macros_only(self, text):
# fix: hackish
text = re.sub(r'"\Z', '\" ', text)
result = []
for line in re.compile(r'(<.*?>)', re.U).split(text):
if not re.search(r'<.*>', line):
rules = []
if re.search(r'{.+?}', line):
rules = self.macro_defaults
for s, r in rules:
line = s.sub(r, line)
result.append(line)
return ''.join(result)
def vAlign(self, input):
d = {'^':'top', '-':'middle', '~':'bottom'}
return d.get(input, '')
@ -814,6 +828,7 @@ class Textile(object):
'fooobar ... and hello world ...'
"""
text = self.macros_only(text)
punct = '!"#$%&\'*+,-./:;=?@\\^_`|~'
pattern = r'''
@ -1044,4 +1059,3 @@ def textile_restricted(text, lite=True, noimage=True, html_type='xhtml'):
return Textile(restricted=True, lite=lite,
noimage=noimage).textile(text, rel='nofollow',
html_type=html_type)

View File

@ -66,19 +66,26 @@ class TXTOutput(OutputFormatPlugin):
help=_('Do not remove image references within the document. This is only ' \
'useful when paired with a txt-output-formatting option that '
'is not none because links are always removed with plain text output.')),
OptionRecommendation(name='keep_color',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Do not remove font color from output. This is only useful when ' \
'txt-output-formatting is set to textile. Textile is the only ' \
'formatting that supports setting font color. If this option is ' \
'not specified font color will not be set and default to the ' \
'color displayed by the reader (generally this is black).')),
])
def convert(self, oeb_book, output_path, input_plugin, opts, log):
if opts.txt_output_formatting.lower() == 'markdown':
from calibre.ebooks.txt.markdownml import MarkdownMLizer
writer = MarkdownMLizer(log)
self.writer = MarkdownMLizer(log)
elif opts.txt_output_formatting.lower() == 'textile':
from calibre.ebooks.txt.textileml import TextileMLizer
writer = TextileMLizer(log)
self.writer = TextileMLizer(log)
else:
writer = TXTMLizer(log)
self.writer = TXTMLizer(log)
txt = writer.extract_content(oeb_book, opts)
txt = self.writer.extract_content(oeb_book, opts)
txt = clean_ascii_chars(txt)
log.debug('\tReplacing newlines with selected type...')
@ -111,17 +118,28 @@ class TXTZOutput(TXTOutput):
from calibre.ebooks.oeb.base import OEB_IMAGES
with TemporaryDirectory('_txtz_output') as tdir:
# TXT
with TemporaryFile('index.txt') as tf:
txt_name = 'index.txt'
if opts.txt_output_formatting.lower() == 'textile':
txt_name = 'index.text'
with TemporaryFile(txt_name) as tf:
TXTOutput.convert(self, oeb_book, tf, input_plugin, opts, log)
shutil.copy(tf, os.path.join(tdir, 'index.txt'))
shutil.copy(tf, os.path.join(tdir, txt_name))
# Images
for item in oeb_book.manifest:
if item.media_type in OEB_IMAGES:
path = os.path.join(tdir, os.path.dirname(item.href))
if hasattr(self.writer, 'images'):
path = os.path.join(tdir, 'images')
if item.href in self.writer.images:
href = self.writer.images[item.href]
else:
continue
else:
path = os.path.join(tdir, os.path.dirname(item.href))
href = os.path.basename(item.href)
if not os.path.exists(path):
os.makedirs(path)
with open(os.path.join(tdir, item.href), 'wb') as imgf:
with open(os.path.join(path, href), 'wb') as imgf:
imgf.write(item.data)
# Metadata

View File

@ -242,6 +242,8 @@ def detect_formatting_type(txt):
textile_count += len(re.findall(r'(?mu)(?<=\!)\S+(?=\!)', txt))
# Links
textile_count += len(re.findall(r'"[^"]*":\S+', txt))
# paragraph blocks
textile_count += len(re.findall(r'(?mu)^p(<|<>|=|>)?\. ', txt))
# Decide if either markdown or textile is used in the text
# based on the number of unique formatting elements found.

View File

@ -1,62 +1,489 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
__copyright__ = '2011, Leigh Parry <leighparry@blueyonder.co.uk>'
__docformat__ = 'restructuredtext en'
'''
Transform OEB content into Textile formatted plain text
'''
import re
from lxml import etree
from functools import partial
from calibre.ebooks.oeb.base import XHTML
from calibre.utils.html2textile import html2textile
from calibre.ebooks.htmlz.oeb2html import OEB2HTML
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace, rewrite_links
from calibre.ebooks.oeb.stylizer import Stylizer
from calibre.ebooks import unit_convert
from calibre.ebooks.txt.unsmarten import unsmarten
class TextileMLizer(object):
def __init__(self, log):
self.log = log
class TextileMLizer(OEB2HTML):
def extract_content(self, oeb_book, opts):
self.log.info('Converting XHTML to Textile formatted TXT...')
self.oeb_book = oeb_book
self.opts = opts
self.in_pre = False
self.in_table = False
self.links = {}
self.list = []
self.our_links = []
self.in_a_link = False
self.our_ids = []
self.images = {}
self.id_no_text = u''
self.style_embed = []
self.remove_space_after_newline = False
self.base_hrefs = [item.href for item in oeb_book.spine]
self.map_resources(oeb_book)
return self.mlize_spine()
self.style_bold = False
self.style_italic = False
self.style_under = False
self.style_strike = False
self.style_smallcap = False
def mlize_spine(self):
txt = self.mlize_spine(oeb_book)
txt = unsmarten(txt)
# Do some tidying up
txt = self.tidy_up(txt)
return txt
def mlize_spine(self, oeb_book):
output = [u'']
for item in self.oeb_book.spine:
for item in oeb_book.spine:
self.log.debug('Converting %s to Textile formatted TXT...' % item.href)
self.rewrite_ids(item.data, item)
rewrite_links(item.data, partial(self.rewrite_link, page=item))
stylizer = Stylizer(item.data, item.href, oeb_book, self.opts, self.opts.output_profile)
output += self.dump_text(item.data.find(XHTML('body')), stylizer)
output.append('\n\n')
return ''.join(output)
html = unicode(etree.tostring(item.data.find(XHTML('body')), encoding=unicode))
def tidy_up(self, text):
# May need tweaking and finetuning
def check_escaping(text, tests):
for t in tests:
# I'm not checking for duplicated spans '%' as any that follow each other were being incorrectly merged
txt = '%s' % t
if txt != '%':
text = re.sub(r'([^'+t+'|^\n])'+t+'\]\['+t+'([^'+t+'])', r'\1\2', text)
text = re.sub(r'([^'+t+'|^\n])'+t+t+'([^'+t+'])', r'\1\2', text)
text = re.sub(r'(\s|[*_\'"])\[('+t+'[a-zA-Z0-9 \'",.*_]+'+t+')\](\s|[*_\'"?!,.])', r'\1\2\3', text)
return text
if not self.opts.keep_links:
html = re.sub(r'<\s*/*\s*a[^>]*>', '', html)
if not self.opts.keep_image_references:
html = re.sub(r'<\s*img[^>]*>', '', html)
# Now tidyup links and ids - remove ones that don't have a correponding opposite
if self.opts.keep_links:
for i in self.our_links:
if i[0] == '#':
if i not in self.our_ids:
text = re.sub(r'"(.+)":'+i+'(\s)', r'\1\2', text)
for i in self.our_ids:
if i not in self.our_links:
text = re.sub(r'%?\('+i+'\)\xa0?%?', r'', text)
# Remove obvious non-needed escaping, add sub/sup-script ones
text = check_escaping(text, ['\*', '_', '\*'])
# escape the super/sub-scripts if needed
text = re.sub(r'(\w)([~^]\w+[~^])', r'\1[\2]', text)
# escape the super/sub-scripts if needed
text = re.sub(r'([~^]\w+[~^])(\w)', r'[\1]\2', text)
text = html2textile(html)
#remove empty spans
text = re.sub(r'%\xa0+', r'%', text)
#remove empty spans - MAY MERGE SOME ?
text = re.sub(r'%%', r'', text)
#remove spans from tagged output
text = re.sub(r'%([_+*-]+)%', r'\1', text)
#remove spaces before a newline
text = re.sub(r' +\n', r'\n', text)
#remove newlines at top of file
text = re.sub(r'^\n+', r'', text)
#correct blockcode paras
text = re.sub(r'\npre\.\n?\nbc\.', r'\nbc.', text)
#correct blockquote paras
text = re.sub(r'\nbq\.\n?\np.*\. ', r'\nbq. ', text)
# Ensure the section ends with at least two new line characters.
# This is to prevent the last paragraph from a section being
# combined into the fist paragraph of the next.
end_chars = text[-4:]
# Convert all newlines to \n
end_chars = end_chars.replace('\r\n', '\n')
end_chars = end_chars.replace('\r', '\n')
end_chars = end_chars[-2:]
if not end_chars[1] == '\n':
text += '\n\n'
if end_chars[1] == '\n' and not end_chars[0] == '\n':
text += '\n'
#reduce blank lines
text = re.sub(r'\n{3}', r'\n\np. \n\n', text)
text = re.sub(u'%\n(p[<>=]{1,2}\.|p\.)', r'%\n\n\1', text)
#Check span following blank para
text = re.sub(r'\n+ +%', r' %', text)
text = re.sub(u'p[<>=]{1,2}\.\n\n?', r'', text)
# blank paragraph
text = re.sub(r'\n(p.*\.)\n', r'\n\1 \n\n', text)
# blank paragraph
text = re.sub(u'\n\xa0', r'\np. ', text)
# blank paragraph
text = re.sub(u'\np[<>=]{1,2}?\. \xa0', r'\np. ', text)
text = re.sub(r'(^|\n)(p.*\. ?\n)(p.*\.)', r'\1\3', text)
text = re.sub(r'\n(p\. \n)(p.*\.|h.*\.)', r'\n\2', text)
#sort out spaces in tables
text = re.sub(r' {2,}\|', r' |', text)
output += text
# Now put back spaces removed earlier as they're needed here
text = re.sub(r'\np\.\n', r'\np. \n', text)
#reduce blank lines
text = re.sub(r' \n\n\n', r' \n\n', text)
output = u''.join(output)
return text
return output
def remove_newlines(self, text):
text = text.replace('\r\n', ' ')
text = text.replace('\n', ' ')
text = text.replace('\r', ' ')
# Condense redundant spaces created by replacing newlines with spaces.
text = re.sub(r'[ ]{2,}', ' ', text)
text = re.sub(r'\t+', '', text)
if self.remove_space_after_newline == True:
text = re.sub(r'^ +', '', text)
self.remove_space_after_newline = False
return text
def check_styles(self, style):
txt = '{'
if self.opts.keep_color:
if 'color' in style.cssdict() and style['color'] != 'black':
txt += 'color:'+style['color']+';'
if 'background' in style.cssdict():
txt += 'background:'+style['background']+';'
txt += '}'
if txt == '{}': txt = ''
return txt
def check_halign(self, style):
tests = {'left':'<','justify':'<>','center':'=','right':'>'}
for i in tests:
if style['text-align'] == i:
return tests[i]
return ''
def check_valign(self, style):
tests = {'top':'^','bottom':'~'} #, 'middle':'-'}
for i in tests:
if style['vertical-align'] == i:
return tests[i]
return ''
def check_padding(self, style, stylizer):
txt = ''
left_padding_pts = 0
left_margin_pts = 0
if 'padding-left' in style.cssdict() and style['padding-left'] != 'auto':
left_padding_pts = unit_convert(style['padding-left'], style.width, style.fontSize, stylizer.profile.dpi)
if 'margin-left' in style.cssdict() and style['margin-left'] != 'auto':
left_margin_pts = unit_convert(style['margin-left'], style.width, style.fontSize, stylizer.profile.dpi)
left = left_margin_pts + left_padding_pts
emleft = int(round(left / stylizer.profile.fbase))
if emleft >= 1:
txt += '(' * emleft
right_padding_pts = 0
right_margin_pts = 0
if 'padding-right' in style.cssdict() and style['padding-right'] != 'auto':
right_padding_pts = unit_convert(style['padding-right'], style.width, style.fontSize, stylizer.profile.dpi)
if 'margin-right' in style.cssdict() and style['margin-right'] != 'auto':
right_margin_pts = unit_convert(style['margin-right'], style.width, style.fontSize, stylizer.profile.dpi)
right = right_margin_pts + right_padding_pts
emright = int(round(right / stylizer.profile.fbase))
if emright >= 1:
txt += ')' * emright
return txt
def check_id_tag(self, attribs):
txt = ''
if attribs.has_key('id'):
txt = '(#'+attribs['id']+ ')'
self.our_ids.append('#'+attribs['id'])
self.id_no_text = u'\xa0'
return txt
def build_block(self, tag, style, attribs, stylizer):
txt = '\n' + tag
if self.opts.keep_links:
txt += self.check_id_tag(attribs)
txt += self.check_padding(style, stylizer)
txt += self.check_halign(style)
txt += self.check_styles(style)
return txt
def prepare_string_for_textile(self, txt):
if re.search(r'(\s([*&_+\-~@%|]|\?{2})\S)|(\S([*&_+\-~@%|]|\?{2})\s)', txt):
return ' ==%s== ' % txt
return txt
def dump_text(self, elem, stylizer):
'''
@elem: The element in the etree that we are working on.
@stylizer: The style information attached to the element.
'''
# We can only processes tags. If there isn't a tag return any text.
if not isinstance(elem.tag, basestring) \
or namespace(elem.tag) != XHTML_NS:
p = elem.getparent()
if p is not None and isinstance(p.tag, basestring) and namespace(p.tag) == XHTML_NS \
and elem.tail:
return [elem.tail]
return ['']
# Setup our variables.
text = ['']
style = stylizer.style(elem)
tags = []
tag = barename(elem.tag)
attribs = elem.attrib
# Ignore anything that is set to not be displayed.
if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
or style['visibility'] == 'hidden':
return ['']
# Soft scene breaks.
if 'margin-top' in style.cssdict() and style['margin-top'] != 'auto':
ems = int(round(float(style.marginTop) / style.fontSize) - 1)
if ems >= 1:
text.append(u'\n\n\xa0' * ems)
if tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'div'):
if tag == 'div':
tag = 'p'
text.append(self.build_block(tag, style, attribs, stylizer))
text.append('. ')
tags.append('\n')
if style['font-style'] == 'italic' or tag in ('i', 'em'):
if tag not in ('h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'cite'):
if self.style_italic == False:
if self.in_a_link:
text.append('_')
tags.append('_')
else:
text.append('[_')
tags.append('_]')
self.style_embed.append('_')
self.style_italic = True
if style['font-weight'] in ('bold', 'bolder') or tag in ('b', 'strong'):
if tag not in ('h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'th'):
if self.style_bold == False:
if self.in_a_link:
text.append('*')
tags.append('*')
else:
text.append('[*')
tags.append('*]')
self.style_embed.append('*')
self.style_bold = True
if style['text-decoration'] == 'underline' or tag in ('u', 'ins'):
if tag != 'a':
if self.style_under == False:
text.append('[+')
tags.append('+]')
self.style_embed.append('+')
self.style_under = True
if style['text-decoration'] == 'line-through' or tag in ('strike', 'del', 's'):
if self.style_strike == False:
text.append('[-')
tags.append('-]')
self.style_embed.append('-')
self.style_strike = True
if tag == 'br':
for i in reversed(self.style_embed):
text.append(i)
text.append('\n')
for i in self.style_embed:
text.append(i)
tags.append('')
self.remove_space_after_newline = True
if tag == 'blockquote':
text.append('\nbq. ')
tags.append('\n')
elif tag in ('abbr', 'acronym'):
text.append('')
txt = attribs['title']
tags.append('(' + txt + ')')
elif tag == 'sup':
text.append('^')
tags.append('^')
elif tag == 'sub':
text.append('~')
tags.append('~')
elif tag == 'code':
if self.in_pre:
text.append('\nbc. ')
tags.append('')
else:
text.append('@')
tags.append('@')
elif tag == 'cite':
text.append('??')
tags.append('??')
elif tag == 'hr':
text.append('\n***')
tags.append('\n')
elif tag == 'pre':
self.in_pre = True
text.append('\npre. ')
tags.append('pre\n')
elif tag == 'a':
if self.opts.keep_links:
if attribs.has_key('href'):
text.append('"')
tags.append('a')
tags.append('":' + attribs['href'])
self.our_links.append(attribs['href'])
if attribs.has_key('title'):
tags.append('(' + attribs['title'] + ')')
self.in_a_link = True
else:
text.append('%')
tags.append('%')
elif tag == 'img':
if self.opts.keep_image_references:
txt = '!' + self.check_halign(style)
txt += self.check_valign(style)
txt += attribs['src']
text.append(txt)
if attribs.has_key('alt'):
txt = attribs['alt']
if txt != '':
text.append('(' + txt + ')')
tags.append('!')
elif tag in ('ol', 'ul'):
self.list.append({'name': tag, 'num': 0})
text.append('')
tags.append(tag)
elif tag == 'li':
if self.list: li = self.list[-1]
else: li = {'name': 'ul', 'num': 0}
text.append('\n')
if li['name'] == 'ul':
text.append('*' * len(self.list) + ' ')
elif li['name'] == 'ol':
text.append('#' * len(self.list) + ' ')
tags.append('')
elif tag == 'dl':
text.append('\n')
tags.append('')
elif tag == 'dt':
text.append('')
tags.append('\n')
elif tag == 'dd':
text.append(' ')
tags.append('')
elif tag == 'dd':
text.append('')
tags.append('\n')
elif tag == 'table':
txt = self.build_block(tag, style, attribs, stylizer)
txt += '. \n'
if txt != '\ntable. \n':
text.append(txt)
else:
text.append('\n')
tags.append('')
elif tag == 'tr':
txt = self.build_block('', style, attribs, stylizer)
txt += '. '
if txt != '\n. ':
txt = re.sub ('\n', '', txt)
text.append(txt)
tags.append('|\n')
elif tag == 'td':
text.append('|')
txt = ''
txt += self.check_halign(style)
txt += self.check_valign(style)
if attribs.has_key ('colspan'):
txt += '\\' + attribs['colspan']
if attribs.has_key ('rowspan'):
txt += '/' + attribs['rowspan']
txt += self.check_styles(style)
if txt != '':
text.append(txt + '. ')
tags.append('')
elif tag == 'th':
text.append('|_. ')
tags.append('')
elif tag == 'span':
if style['font-variant'] == 'small-caps':
if self.style_smallcap == False:
text.append('&')
tags.append('&')
self.style_smallcap = True
else:
if self.in_a_link == False:
txt = '%'
if self.opts.keep_links:
txt += self.check_id_tag(attribs)
txt += self.check_styles(style)
if txt != '%':
text.append(txt)
tags.append('%')
if self.opts.keep_links and attribs.has_key('id'):
if tag not in ('body', 'div', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'span', 'table'):
text.append(self.check_id_tag(attribs))
# Process the styles for any that we want to keep
if tag not in ('body', 'div', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'hr', 'a', 'img', \
'span', 'table', 'tr', 'td'):
if not self.in_a_link:
text.append(self.check_styles(style))
# Process tags that contain text.
if hasattr(elem, 'text') and elem.text:
txt = elem.text
if not self.in_pre:
txt = self.prepare_string_for_textile(self.remove_newlines(txt))
text.append(txt)
self.id_no_text = u''
# Recurse down into tags within the tag we are in.
for item in elem:
text += self.dump_text(item, stylizer)
# Close all open tags.
tags.reverse()
for t in tags:
if tag in ('pre', 'ul', 'ol', 'li', 'table'):
if tag == 'pre':
self.in_pre = False
elif tag in ('ul', 'ol'):
if self.list: self.list.pop()
if not self.list: text.append('\n')
else:
if t == 'a':
self.in_a_link = False
t = ''
text.append(self.id_no_text)
self.id_no_text = u''
if t in ('*]', '*'):
self.style_bold = False
elif t in ('_]', '_'):
self.style_italic = False
elif t == '+]':
self.style_under = False
elif t == '-]':
self.style_strike = False
elif t == '&':
self.style_smallcap = False
if t in ('*]', '_]', '+]', '-]', '*', '_'):
txt = self.style_embed.pop()
text.append('%s' % t)
# Soft scene breaks.
if 'margin-bottom' in style.cssdict() and style['margin-bottom'] != 'auto':
ems = int(round((float(style.marginBottom) / style.fontSize) - 1))
if ems >= 1:
text.append(u'\n\n\xa0' * ems)
# Add the text that is outside of the tag.
if hasattr(elem, 'tail') and elem.tail:
tail = elem.tail
if not self.in_pre:
tail = self.prepare_string_for_textile(self.remove_newlines(tail))
text.append(tail)
return text

View File

@ -0,0 +1,108 @@
# -*- coding: utf-8 -*-
"""unsmarten : html2textile helper function"""
__version__ = '0.1'
__author__ = 'Leigh Parry'
import re
def unsmarten(txt):
txt = re.sub(u'&#8211;|&ndash;|', r'-', txt) # en-dash
txt = re.sub(u'&#8212;|&mdash;|—', r'--', txt) # em-dash
txt = re.sub(u'&#8230;|&hellip;|…', r'...', txt) # ellipsis
txt = re.sub(u'&#8220;|&#8221;|&#8243;|&ldquo;|&rdquo;|&Prime;|“|”|″', r'"', txt) # double quote
txt = re.sub(u'(["\'‘“]|\s)', r"\1{'/}", txt) # apostrophe
txt = re.sub(u'&#8216;|&#8217;|&#8242;|&lsquo;|&rsquo;|&prime;|||', r"'", txt) # single quote
txt = re.sub(u'&#162;|&cent;|¢', r'{c\}', txt) # cent
txt = re.sub(u'&#163;|&pound;|£', r'{L-}', txt) # pound
txt = re.sub(u'&#165;|&yen;|¥', r'{Y=}', txt) # yen
txt = re.sub(u'&#169;|&copy;|©', r'{(c)}', txt) # copyright
txt = re.sub(u'&#174;|&reg;|®', r'{(r)}', txt) # registered
txt = re.sub(u'&#188;|&frac14;|¼', r'{1/4}', txt) # quarter
txt = re.sub(u'&#189;|&frac12;|½', r'{1/2}', txt) # half
txt = re.sub(u'&#190;|&frac34;|¾', r'{3/4}', txt) # three-quarter
txt = re.sub(u'&#192;|&Agrave;|À', r'{A`)}', txt) # A-grave
txt = re.sub(u'&#193;|&Aacute;|Á', r"{A'}", txt) # A-acute
txt = re.sub(u'&#194;|&Acirc;|Â', r'{A^}', txt) # A-circumflex
txt = re.sub(u'&#195;|&Atilde;|Ã', r'{A~}', txt) # A-tilde
txt = re.sub(u'&#196;|&Auml;|Ä', r'{A"}', txt) # A-umlaut
txt = re.sub(u'&#197;|&Aring;|Å', r'{Ao}', txt) # A-ring
txt = re.sub(u'&#198;|&AElig;|Æ', r'{AE}', txt) # AE
txt = re.sub(u'&#199;|&Ccedil;|Ç', r'{C,}', txt) # C-cedilla
txt = re.sub(u'&#200;|&Egrave;|È', r'{E`}', txt) # E-grave
txt = re.sub(u'&#201;|&Eacute;|É', r"{E'}", txt) # E-acute
txt = re.sub(u'&#202;|&Ecirc;|Ê', r'{E^}', txt) # E-circumflex
txt = re.sub(u'&#203;|&Euml;|Ë', r'{E"}', txt) # E-umlaut
txt = re.sub(u'&#204;|&Igrave;|Ì', r'{I`}', txt) # I-grave
txt = re.sub(u'&#205;|&Iacute;|Í', r"{I'}", txt) # I-acute
txt = re.sub(u'&#206;|&Icirc;|Î', r'{I^}', txt) # I-circumflex
txt = re.sub(u'&#207;|&Iuml;|Ï', r'{I"}', txt) # I-umlaut
txt = re.sub(u'&#208;|&ETH;|Ð', r'{D-}', txt) # ETH
txt = re.sub(u'&#209;|&Ntilde;|Ñ', r'{N~}', txt) # N-tilde
txt = re.sub(u'&#210;|&Ograve;|Ò', r'{O`}', txt) # O-grave
txt = re.sub(u'&#211;|&Oacute;|Ó', r"{O'}", txt) # O-acute
txt = re.sub(u'&#212;|&Ocirc;|Ô', r'{O^}', txt) # O-circumflex
txt = re.sub(u'&#213;|&Otilde;|Õ', r'{O~}', txt) # O-tilde
txt = re.sub(u'&#214;|&Ouml;|Ö', r'{O"}', txt) # O-umlaut
txt = re.sub(u'&#215;|&times;|×', r'{x}', txt) # dimension
txt = re.sub(u'&#216;|&Oslash;|Ø', r'{O/}', txt) # O-slash
txt = re.sub(u'&#217;|&Ugrave;|Ù', r"{U`}", txt) # U-grave
txt = re.sub(u'&#218;|&Uacute;|Ú', r"{U'}", txt) # U-acute
txt = re.sub(u'&#219;|&Ucirc;|Û', r'{U^}', txt) # U-circumflex
txt = re.sub(u'&#220;|&Uuml;|Ü', r'{U"}', txt) # U-umlaut
txt = re.sub(u'&#221;|&Yacute;|Ý', r"{Y'}", txt) # Y-grave
txt = re.sub(u'&#223;|&szlig;|ß', r'{sz}', txt) # sharp-s
txt = re.sub(u'&#224;|&agrave;|à', r'{a`}', txt) # a-grave
txt = re.sub(u'&#225;|&aacute;|á', r"{a'}", txt) # a-acute
txt = re.sub(u'&#226;|&acirc;|â', r'{a^}', txt) # a-circumflex
txt = re.sub(u'&#227;|&atilde;|ã', r'{a~}', txt) # a-tilde
txt = re.sub(u'&#228;|&auml;|ä', r'{a"}', txt) # a-umlaut
txt = re.sub(u'&#229;|&aring;|å', r'{ao}', txt) # a-ring
txt = re.sub(u'&#230;|&aelig;|æ', r'{ae}', txt) # ae
txt = re.sub(u'&#231;|&ccedil;|ç', r'{c,}', txt) # c-cedilla
txt = re.sub(u'&#232;|&egrave;|è', r'{e`}', txt) # e-grave
txt = re.sub(u'&#233;|&eacute;|é', r"{e'}", txt) # e-acute
txt = re.sub(u'&#234;|&ecirc;|ê', r'{e^}', txt) # e-circumflex
txt = re.sub(u'&#235;|&euml;|ë', r'{e"}', txt) # e-umlaut
txt = re.sub(u'&#236;|&igrave;|ì', r'{i`}', txt) # i-grave
txt = re.sub(u'&#237;|&iacute;|í', r"{i'}", txt) # i-acute
txt = re.sub(u'&#238;|&icirc;|î', r'{i^}', txt) # i-circumflex
txt = re.sub(u'&#239;|&iuml;|ï', r'{i"}', txt) # i-umlaut
txt = re.sub(u'&#240;|&eth;|ð', r'{d-}', txt) # eth
txt = re.sub(u'&#241;|&ntilde;|ñ', r'{n~}', txt) # n-tilde
txt = re.sub(u'&#242;|&ograve;|ò', r'{o`}', txt) # o-grave
txt = re.sub(u'&#243;|&oacute;|ó', r"{o'}", txt) # o-acute
txt = re.sub(u'&#244;|&ocirc;|ô', r'{o^}', txt) # o-circumflex
txt = re.sub(u'&#245;|&otilde;|õ', r'{o~}', txt) # o-tilde
txt = re.sub(u'&#246;|&ouml;|ö', r'{o"}', txt) # o-umlaut
txt = re.sub(u'&#248;|&oslash;|ø', r'{o/}', txt) # o-stroke
txt = re.sub(u'&#249;|&ugrave;|ù', r'{u`}', txt) # u-grave
txt = re.sub(u'&#250;|&uacute;|ú', r"{u'}", txt) # u-acute
txt = re.sub(u'&#251;|&ucirc;|û', r'{u^}', txt) # u-circumflex
txt = re.sub(u'&#252;|&uuml;|ü', r'{u"}', txt) # u-umlaut
txt = re.sub(u'&#253;|&yacute;|ý', r"{y'}", txt) # y-acute
txt = re.sub(u'&#255;|&yuml;|ÿ', r'{y"}', txt) # y-umlaut
txt = re.sub(u'&#338;|&OElig;|Œ', r'{OE}', txt) # OE
txt = re.sub(u'&#339;|&oelig;|œ', r'{oe}', txt) # oe
txt = re.sub(u'&#348;|&Scaron;|Ŝ', r'{S^}', txt) # Scaron
txt = re.sub(u'&#349;|&scaron;|ŝ', r'{s^}', txt) # scaron
txt = re.sub(u'&#8226;|&bull;|•', r'{*}', txt) # bullet
txt = re.sub(u'&#8355;|₣', r'{Fr}', txt) # Franc
txt = re.sub(u'&#8356;|₤', r'{L=}', txt) # Lira
txt = re.sub(u'&#8360;|₨', r'{Rs}', txt) # Rupee
txt = re.sub(u'&#8364;|&euro;|€', r'{C=}', txt) # euro
txt = re.sub(u'&#8482;|&trade;|™', r'{tm}', txt) # trademark
txt = re.sub(u'&#9824;|&spades;|♠', r'{spade}', txt) # spade
txt = re.sub(u'&#9827;|&clubs;|♣', r'{club}', txt) # club
txt = re.sub(u'&#9829;|&hearts;|♥', r'{heart}', txt) # heart
txt = re.sub(u'&#9830;|&diams;|♦', r'{diamond}', txt) # diamond
# Move into main code?
# txt = re.sub(u'\xa0', r'p. ', txt) # blank paragraph
# txt = re.sub(u'\n\n\n\n', r'\n\np. \n\n', txt) # blank paragraph
# txt = re.sub(u'\n \n', r'\n<br />\n', txt) # blank paragraph - br tag
return txt

View File

@ -620,7 +620,21 @@ class Application(QApplication):
self.original_font = QFont(QApplication.font())
fi = gprefs['font']
if fi is not None:
QApplication.setFont(QFont(*fi))
font = QFont(*(fi[:4]))
s = gprefs.get('font_stretch', None)
if s is not None:
font.setStretch(s)
QApplication.setFont(font)
st = self.style()
if st is not None:
st = unicode(st.objectName()).lower()
if (islinux or isfreebsd) and st in ('windows', 'motif', 'cde'):
from PyQt4.Qt import QStyleFactory
styles = set(map(unicode, QStyleFactory.keys()))
if 'Cleanlooks' in styles:
self.setStyle('Cleanlooks')
else:
self.setStyle('Plastique')
def _send_file_open_events(self):
with self._file_open_lock:

View File

@ -20,6 +20,9 @@ class GenerateCatalogAction(InterfaceAction):
action_spec = (_('Create a catalog of the books in your calibre library'), 'catalog.png', 'Catalog builder', None)
dont_add_to = frozenset(['menubar-device', 'toolbar-device', 'context-menu-device'])
def genesis(self):
self.qaction.triggered.connect(self.generate_catalog)
def generate_catalog(self):
rows = self.gui.library_view.selectionModel().selectedRows()
if not rows or len(rows) < 2:

View File

@ -246,7 +246,8 @@ class ChooseLibraryAction(InterfaceAction):
def delete_requested(self, name, location):
loc = location.replace('/', os.sep)
if not question_dialog(self.gui, _('Are you sure?'), '<p>'+
_('<b style="color: red">All files</b> from <br><br><b>%s</b><br><br> will be '
_('<b style="color: red">All files</b> (not just ebooks) '
'from <br><br><b>%s</b><br><br> will be '
'<b>permanently deleted</b>. Are you sure?') % loc,
show_copy_button=False):
return

View File

@ -478,6 +478,10 @@ class EditMetadataAction(InterfaceAction):
try:
set_title = not mi.is_null('title')
set_authors = not mi.is_null('authors')
idents = db.get_identifiers(i, index_is_id=True)
if mi.identifiers:
idents.update(mi.identifiers)
mi.identifiers = idents
db.set_metadata(i, mi, commit=False, set_title=set_title,
set_authors=set_authors, notify=False)
self.applied_ids.append(i)

View File

@ -10,6 +10,7 @@ from functools import partial
from PyQt4.Qt import QMenu
from calibre.gui2 import error_dialog
from calibre.gui2.actions import InterfaceAction
from calibre.gui2.dialogs.confirm_delete import confirm
@ -19,24 +20,86 @@ class StoreAction(InterfaceAction):
action_spec = (_('Get books'), 'store.png', None, None)
def genesis(self):
self.qaction.triggered.connect(self.search)
self.qaction.triggered.connect(self.do_search)
self.store_menu = QMenu()
self.load_menu()
def load_menu(self):
self.store_menu.clear()
self.store_menu.addAction(_('Search'), self.search)
self.store_menu.addAction(_('Search for ebooks'), self.search)
self.store_menu.addAction(_('Search for this author'), self.search_author)
self.store_menu.addAction(_('Search for this title'), self.search_title)
self.store_menu.addAction(_('Search for this book'), self.search_author_title)
self.store_menu.addSeparator()
for n, p in self.gui.istores.items():
self.store_menu.addAction(n, partial(self.open_store, p))
self.store_list_menu = self.store_menu.addMenu(_('Stores'))
for n, p in sorted(self.gui.istores.items(), key=lambda x: x[0].lower()):
self.store_list_menu.addAction(n, partial(self.open_store, p))
self.qaction.setMenu(self.store_menu)
def search(self):
def do_search(self):
return self.search()
def search(self, query=''):
self.show_disclaimer()
from calibre.gui2.store.search.search import SearchDialog
sd = SearchDialog(self.gui.istores, self.gui)
sd = SearchDialog(self.gui.istores, self.gui, query)
sd.exec_()
def _get_selected_row(self):
rows = self.gui.current_view().selectionModel().selectedRows()
if not rows or len(rows) == 0:
return None
return rows[0].row()
def _get_author(self, row):
author = ''
if self.gui.current_view() is self.gui.library_view:
author = self.gui.library_view.model().authors(row)
if author:
author = author.replace('|', ' ')
else:
mi = self.gui.current_view().model().get_book_display_info(row)
author = ' & '.join(mi.authors)
return author
def search_author(self):
row = self._get_selected_row()
if row == None:
error_dialog(self.gui, _('Cannot search'), _('No book selected'), show=True)
return
query = 'author:"%s"' % self._get_author(row)
self.search(query)
def _get_title(self, row):
title = ''
if self.gui.current_view() is self.gui.library_view:
title = self.gui.library_view.model().title(row)
else:
mi = self.gui.current_view().model().get_book_display_info(row)
title = mi.title
return title
def search_title(self):
row = self._get_selected_row()
if row == None:
error_dialog(self.gui, _('Cannot search'), _('No book selected'), show=True)
return
query = 'title:"%s"' % self._get_title(row)
self.search(query)
def search_author_title(self):
row = self._get_selected_row()
if row == None:
error_dialog(self.gui, _('Cannot search'), _('No book selected'), show=True)
return
query = 'author:"%s" title:"%s"' % (self._get_author(row), self._get_title(row))
self.search(query)
def open_store(self, store_plugin):
self.show_disclaimer()
store_plugin.open(self.gui)

View File

@ -19,7 +19,7 @@ class PluginWidget(Widget, Ui_Form):
Widget.__init__(self, parent,
['newline', 'max_line_length', 'force_max_line_length',
'inline_toc', 'txt_output_formatting', 'keep_links', 'keep_image_references',
'txt_output_encoding'])
'keep_color', 'txt_output_encoding'])
self.db, self.book_id = db, book_id
for x in get_option('newline').option.choices:
self.opt_newline.addItem(x)

View File

@ -122,6 +122,13 @@
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="opt_keep_color">
<property name="text">
<string>Keep text color, when possible</string>
</property>
</widget>
</item>
</layout>
</widget>
</item>

View File

@ -439,6 +439,7 @@ def populate_metadata_page(layout, db, book_id, bulk=False, two_column=False, pa
w = widget_factory(dt, col)
ans.append(w)
for c in range(0, len(w.widgets), 2):
w.widgets[c].setWordWrap(True)
w.widgets[c].setBuddy(w.widgets[c+1])
layout.addWidget(w.widgets[c], row, column)
layout.addWidget(w.widgets[c+1], row, column+1)

View File

@ -3,12 +3,13 @@ __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
__license__ = 'GPL v3'
from PyQt4.Qt import Qt, QDialog, QTableWidgetItem, QAbstractItemView
from PyQt4.Qt import (Qt, QDialog, QTableWidgetItem, QAbstractItemView, QIcon,
QDialogButtonBox, QFrame, QLabel, QTimer, QMenu, QApplication)
from calibre.ebooks.metadata import author_to_author_sort
from calibre.gui2 import error_dialog
from calibre.gui2.dialogs.edit_authors_dialog_ui import Ui_EditAuthorsDialog
from calibre.utils.icu import sort_key, strcmp
from calibre.utils.icu import sort_key
class tableItem(QTableWidgetItem):
def __ge__(self, other):
@ -19,7 +20,7 @@ class tableItem(QTableWidgetItem):
class EditAuthorsDialog(QDialog, Ui_EditAuthorsDialog):
def __init__(self, parent, db, id_to_select):
def __init__(self, parent, db, id_to_select, select_sort):
QDialog.__init__(self, parent)
Ui_EditAuthorsDialog.__init__(self)
self.setupUi(self)
@ -30,14 +31,23 @@ class EditAuthorsDialog(QDialog, Ui_EditAuthorsDialog):
self.buttonBox.accepted.connect(self.accepted)
# Set up the column headings
self.table.setSelectionMode(QAbstractItemView.SingleSelection)
self.table.setColumnCount(2)
self.table.setHorizontalHeaderLabels([_('Author'), _('Author sort')])
self.down_arrow_icon = QIcon(I('arrow-down.png'))
self.up_arrow_icon = QIcon(I('arrow-up.png'))
self.blank_icon = QIcon(I('blank.png'))
self.auth_col = QTableWidgetItem(_('Author'))
self.table.setHorizontalHeaderItem(0, self.auth_col)
self.auth_col.setIcon(self.blank_icon)
self.aus_col = QTableWidgetItem(_('Author sort'))
self.table.setHorizontalHeaderItem(1, self.aus_col)
self.aus_col.setIcon(self.up_arrow_icon)
# Add the data
self.authors = {}
auts = db.get_authors_with_ids()
self.table.setRowCount(len(auts))
setattr(self.table, '__lt__', lambda x, y: True if strcmp(x, y) < 0 else False)
select_item = None
for row, (id, author, sort) in enumerate(auts):
author = author.replace('|', ',')
@ -48,7 +58,10 @@ class EditAuthorsDialog(QDialog, Ui_EditAuthorsDialog):
self.table.setItem(row, 0, aut)
self.table.setItem(row, 1, sort)
if id == id_to_select:
select_item = sort
if select_sort:
select_item = sort
else:
select_item = aut
self.table.resizeColumnsToContents()
# set up the cellChanged signal only after the table is filled
@ -69,23 +82,153 @@ class EditAuthorsDialog(QDialog, Ui_EditAuthorsDialog):
self.recalc_author_sort.clicked.connect(self.do_recalc_author_sort)
self.auth_sort_to_author.clicked.connect(self.do_auth_sort_to_author)
# Position on the desired item
if select_item is not None:
self.table.setCurrentItem(select_item)
self.table.editItem(select_item)
self.start_find_pos = select_item.row() * 2 + select_item.column()
else:
self.table.setCurrentCell(0, 0)
self.start_find_pos = -1
# set up the search box
self.find_box.initialize('manage_authors_search')
self.find_box.lineEdit().returnPressed.connect(self.do_find)
self.find_box.editTextChanged.connect(self.find_text_changed)
self.find_button.clicked.connect(self.do_find)
l = QLabel(self.table)
self.not_found_label = l
l.setFrameStyle(QFrame.StyledPanel)
l.setAutoFillBackground(True)
l.setText(_('No matches found'))
l.setAlignment(Qt.AlignVCenter)
l.resize(l.sizeHint())
l.move(10,20)
l.setVisible(False)
self.not_found_label.move(40, 40)
self.not_found_label_timer = QTimer()
self.not_found_label_timer.setSingleShot(True)
self.not_found_label_timer.timeout.connect(
self.not_found_label_timer_event, type=Qt.QueuedConnection)
self.table.setContextMenuPolicy(Qt.CustomContextMenu)
self.table.customContextMenuRequested .connect(self.show_context_menu)
def show_context_menu(self, point):
self.context_item = self.table.itemAt(point)
case_menu = QMenu(_('Change Case'))
action_upper_case = case_menu.addAction(_('Upper Case'))
action_lower_case = case_menu.addAction(_('Lower Case'))
action_swap_case = case_menu.addAction(_('Swap Case'))
action_title_case = case_menu.addAction(_('Title Case'))
action_capitalize = case_menu.addAction(_('Capitalize'))
action_upper_case.triggered.connect(self.upper_case)
action_lower_case.triggered.connect(self.lower_case)
action_swap_case.triggered.connect(self.swap_case)
action_title_case.triggered.connect(self.title_case)
action_capitalize.triggered.connect(self.capitalize)
m = self.au_context_menu = QMenu()
ca = m.addAction(_('Copy'))
ca.triggered.connect(self.copy_to_clipboard)
ca = m.addAction(_('Paste'))
ca.triggered.connect(self.paste_from_clipboard)
m.addSeparator()
if self.context_item.column() == 0:
ca = m.addAction(_('Copy to author sort'))
ca.triggered.connect(self.copy_au_to_aus)
else:
ca = m.addAction(_('Copy to author'))
ca.triggered.connect(self.copy_aus_to_au)
m.addSeparator()
m.addMenu(case_menu)
m.exec_(self.table.mapToGlobal(point))
def copy_to_clipboard(self):
cb = QApplication.clipboard()
cb.setText(unicode(self.context_item.text()))
def paste_from_clipboard(self):
cb = QApplication.clipboard()
self.context_item.setText(cb.text())
def upper_case(self):
self.context_item.setText(icu_upper(unicode(self.context_item.text())))
def lower_case(self):
self.context_item.setText(icu_lower(unicode(self.context_item.text())))
def swap_case(self):
self.context_item.setText(unicode(self.context_item.text()).swapcase())
def title_case(self):
from calibre.utils.titlecase import titlecase
self.context_item.setText(titlecase(unicode(self.context_item.text())))
def capitalize(self):
from calibre.utils.icu import capitalize
self.context_item.setText(capitalize(unicode(self.context_item.text())))
def copy_aus_to_au(self):
row = self.context_item.row()
dest = self.table.item(row, 0)
dest.setText(self.context_item.text())
def copy_au_to_aus(self):
row = self.context_item.row()
dest = self.table.item(row, 1)
dest.setText(self.context_item.text())
def not_found_label_timer_event(self):
self.not_found_label.setVisible(False)
def find_text_changed(self):
self.start_find_pos = -1
def do_find(self):
self.not_found_label.setVisible(False)
# For some reason the button box keeps stealing the RETURN shortcut.
# Steal it back
self.buttonBox.button(QDialogButtonBox.Ok).setDefault(False)
self.buttonBox.button(QDialogButtonBox.Ok).setAutoDefault(False)
self.buttonBox.button(QDialogButtonBox.Cancel).setDefault(False)
self.buttonBox.button(QDialogButtonBox.Cancel).setAutoDefault(False)
st = icu_lower(unicode(self.find_box.currentText()))
for i in range(0, self.table.rowCount()*2):
self.start_find_pos = (self.start_find_pos + 1) % (self.table.rowCount()*2)
r = (self.start_find_pos/2)%self.table.rowCount()
c = self.start_find_pos % 2
item = self.table.item(r, c)
text = icu_lower(unicode(item.text()))
if st in text:
self.table.setCurrentItem(item)
self.table.setFocus(True)
return
# Nothing found. Pop up the little dialog for 1.5 seconds
self.not_found_label.setVisible(True)
self.not_found_label_timer.start(1500)
def do_sort_by_author(self):
self.author_order = 1 if self.author_order == 0 else 0
self.table.sortByColumn(0, self.author_order)
self.sort_by_author.setChecked(True)
self.sort_by_author_sort.setChecked(False)
self.auth_col.setIcon(self.down_arrow_icon if self.author_order
else self.up_arrow_icon)
self.aus_col.setIcon(self.blank_icon)
def do_sort_by_author_sort(self):
self.author_sort_order = 1 if self.author_sort_order == 0 else 0
self.table.sortByColumn(1, self.author_sort_order)
self.sort_by_author.setChecked(False)
self.sort_by_author_sort.setChecked(True)
self.aus_col.setIcon(self.down_arrow_icon if self.author_sort_order
else self.up_arrow_icon)
self.auth_col.setIcon(self.blank_icon)
def accepted(self):
self.result = []

View File

@ -20,6 +20,50 @@
<string>Manage authors</string>
</property>
<layout class="QVBoxLayout" name="verticalLayout">
<item>
<layout class="QHBoxLayout" name="">
<item>
<widget class="QLabel">
<property name="text">
<string>&amp;Search for:</string>
</property>
<property name="buddy">
<cstring>find_box</cstring>
</property>
</widget>
</item>
<item>
<widget class="HistoryLineEdit" name="find_box">
<property name="minimumSize">
<size>
<width>200</width>
<height>0</height>
</size>
</property>
</widget>
</item>
<item>
<widget class="QPushButton" name="find_button">
<property name="text">
<string>F&amp;ind</string>
</property>
</widget>
</item>
<item>
<spacer>
<property name="orientation">
<enum>Qt::Horizontal</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>40</width>
<height>20</height>
</size>
</property>
</spacer>
</item>
</layout>
</item>
<item>
<widget class="QTableWidget" name="table">
<property name="sizePolicy">
@ -143,4 +187,11 @@ after changing Preferences-&gt;Advanced-&gt;Tweaks-&gt;Author sort name algorith
</hints>
</connection>
</connections>
<customwidgets>
<customwidget>
<class>HistoryLineEdit</class>
<extends>QComboBox</extends>
<header>calibre/gui2/widgets.h</header>
</customwidget>
</customwidgets>
</ui>

View File

@ -19,17 +19,23 @@ class MessageBox(QDialog, Ui_Dialog): # {{{
INFO = 2
QUESTION = 3
def __init__(self, type_, title, msg, det_msg='', show_copy_button=True,
parent=None):
def __init__(self, type_, title, msg,
det_msg='',
q_icon=None,
show_copy_button=True,
parent=None):
QDialog.__init__(self, parent)
icon = {
self.ERROR : 'error',
self.WARNING: 'warning',
self.INFO: 'information',
self.QUESTION: 'question',
}[type_]
icon = 'dialog_%s.png'%icon
self.icon = QIcon(I(icon))
if q_icon is None:
icon = {
self.ERROR : 'error',
self.WARNING: 'warning',
self.INFO: 'information',
self.QUESTION: 'question',
}[type_]
icon = 'dialog_%s.png'%icon
self.icon = QIcon(I(icon))
else:
self.icon = q_icon
self.setupUi(self)
self.setWindowTitle(title)
@ -44,7 +50,6 @@ class MessageBox(QDialog, Ui_Dialog): # {{{
self.bb.ActionRole)
self.ctc_button.clicked.connect(self.copy_to_clipboard)
self.show_det_msg = _('Show &details')
self.hide_det_msg = _('Hide &details')
self.det_msg_toggle = self.bb.addButton(self.show_det_msg, self.bb.ActionRole)

View File

@ -506,6 +506,9 @@ class BooksModel(QAbstractTableModel): # {{{
def id(self, row):
return self.db.id(getattr(row, 'row', lambda:row)())
def authors(self, row_number):
return self.db.authors(row_number)
def title(self, row_number):
return self.db.title(row_number)

View File

@ -439,10 +439,16 @@ class BooksView(QTableView): # {{{
if tweaks['sort_columns_at_startup'] is not None:
sh = []
for c,d in tweaks['sort_columns_at_startup']:
if not isinstance(d, bool):
d = True if d == 0 else False
sh.append((c, d))
try:
for c,d in tweaks['sort_columns_at_startup']:
if not isinstance(d, bool):
d = True if d == 0 else False
sh.append((c, d))
except:
# Ignore invalid tweak values as users seem to often get them
# wrong
import traceback
traceback.print_exc()
old_state['sort_history'] = sh
self.apply_state(old_state)

View File

@ -299,13 +299,13 @@ def run_gui(opts, args, actions, listener, app, gui_debug=None):
if getattr(runner.main, 'debug_on_restart', False):
run_in_debug_mode()
else:
import subprocess
print 'Restarting with:', e, sys.argv
if hasattr(sys, 'frameworks_dir'):
app = os.path.dirname(os.path.dirname(sys.frameworks_dir))
import subprocess
subprocess.Popen('sleep 3s; open '+app, shell=True)
else:
os.execvp(e, sys.argv)
subprocess.Popen([e] + sys.argv[1:])
else:
if iswindows:
try:

View File

@ -9,8 +9,8 @@ __docformat__ = 'restructuredtext en'
import textwrap, re, os
from PyQt4.Qt import (Qt, QDateEdit, QDate, pyqtSignal,
QIcon, QToolButton, QWidget, QLabel, QGridLayout,
from PyQt4.Qt import (Qt, QDateEdit, QDate, pyqtSignal, QMessageBox,
QIcon, QToolButton, QWidget, QLabel, QGridLayout, QApplication,
QDoubleSpinBox, QListWidgetItem, QSize, QPixmap,
QPushButton, QSpinBox, QLineEdit, QSizePolicy)
@ -19,10 +19,10 @@ from calibre.gui2.complete import MultiCompleteLineEdit, MultiCompleteComboBox
from calibre.utils.icu import sort_key
from calibre.utils.config import tweaks, prefs
from calibre.ebooks.metadata import (title_sort, authors_to_string,
string_to_authors, check_isbn)
string_to_authors, check_isbn, authors_to_sort_string)
from calibre.ebooks.metadata.meta import get_metadata
from calibre.gui2 import (file_icon_provider, UNDEFINED_QDATE, UNDEFINED_DATE,
choose_files, error_dialog, choose_images, question_dialog)
choose_files, error_dialog, choose_images)
from calibre.utils.date import local_tz, qt_to_dt
from calibre import strftime
from calibre.ebooks import BOOK_EXTENSIONS
@ -31,6 +31,16 @@ from calibre.utils.date import utcfromtimestamp
from calibre.gui2.comments_editor import Editor
from calibre.library.comments import comments_to_html
from calibre.gui2.dialogs.tag_editor import TagEditor
from calibre.utils.icu import strcmp
def save_dialog(parent, title, msg, det_msg=''):
d = QMessageBox(parent)
d.setWindowTitle(title)
d.setText(msg)
d.setStandardButtons(QMessageBox.Yes | QMessageBox.No | QMessageBox.Cancel)
return d.exec_()
'''
The interface common to all widgets used to set basic metadata
@ -156,7 +166,7 @@ class AuthorsEdit(MultiCompleteComboBox):
TOOLTIP = ''
LABEL = _('&Author(s):')
def __init__(self, parent):
def __init__(self, parent, manage_authors):
self.dialog = parent
self.books_to_refresh = set([])
MultiCompleteComboBox.__init__(self, parent)
@ -164,6 +174,28 @@ class AuthorsEdit(MultiCompleteComboBox):
self.setWhatsThis(self.TOOLTIP)
self.setEditable(True)
self.setSizeAdjustPolicy(self.AdjustToMinimumContentsLengthWithIcon)
manage_authors.triggered.connect(self.manage_authors)
def manage_authors(self):
if self.original_val != self.current_val:
d = save_dialog(self, _('Authors changed'),
_('You have changed the authors for this book. You must save '
'these changes before you can use Manage authors. Do you '
'want to save these changes?'))
if d == QMessageBox.Cancel:
return
if d == QMessageBox.Yes:
self.commit(self.db, self.id_)
self.db.commit()
self.original_val = self.current_val
else:
self.current_val = self.original_val
first_author = self.current_val[0] if len(self.current_val) else None
first_author_id = self.db.get_author_id(first_author) if first_author else None
self.dialog.parent().do_author_sort_edit(self, first_author_id,
select_sort=False)
self.initialize(self.db, self.id_)
self.dialog.author_sort.initialize(self.db, self.id_)
def get_default(self):
return _('Unknown')
@ -175,8 +207,8 @@ class AuthorsEdit(MultiCompleteComboBox):
self.clear()
for i in all_authors:
id, name = i
name = [name.strip().replace('|', ',') for n in name.split(',')]
self.addItem(authors_to_string(name))
name = name.strip().replace('|', ',')
self.addItem(name)
self.set_separator('&')
self.set_space_before_sep(True)
@ -188,6 +220,8 @@ class AuthorsEdit(MultiCompleteComboBox):
au = _('Unknown')
self.current_val = [a.strip().replace('|', ',') for a in au.split(',')]
self.original_val = self.current_val
self.id_ = id_
self.db = db
def commit(self, db, id_):
authors = self.current_val
@ -238,7 +272,7 @@ class AuthorSortEdit(EnLineEdit):
'No action is required if this is what you want.'))
self.tooltips = (ok_tooltip, bad_tooltip)
self.authors_edit.editTextChanged.connect(self.update_state)
self.authors_edit.editTextChanged.connect(self.update_state_and_val)
self.textChanged.connect(self.update_state)
autogen_button.clicked.connect(self.auto_generate)
@ -260,12 +294,19 @@ class AuthorSortEdit(EnLineEdit):
return property(fget=fget, fset=fset)
def update_state_and_val(self):
# Handle case change if the authors box changed
aus = authors_to_sort_string(self.authors_edit.current_val)
if strcmp(aus, self.current_val) == 0:
self.current_val = aus
self.update_state()
def update_state(self, *args):
au = unicode(self.authors_edit.text())
au = re.sub(r'\s+et al\.$', '', au)
au = self.db.author_sort_from_authors(string_to_authors(au))
normal = au == self.current_val
normal = strcmp(au, self.current_val) == 0
if normal:
col = 'rgb(0, 255, 0, 20%)'
else:
@ -900,10 +941,13 @@ class TagsEdit(MultiCompleteLineEdit): # {{{
def edit(self, db, id_):
if self.changed:
if question_dialog(self, _('Tags changed'),
d = save_dialog(self, _('Tags changed'),
_('You have changed the tags. In order to use the tags'
' editor, you must either discard or apply these '
'changes. Apply changes?'), show_copy_button=False):
'changes. Apply changes?'))
if d == QMessageBox.Cancel:
return
if d == QMessageBox.Yes:
self.commit(db, id_)
db.commit()
self.original_val = self.current_val
@ -993,6 +1037,13 @@ class IdentifiersEdit(QLineEdit): # {{{
self.setToolTip(tt+extra)
self.setStyleSheet('QLineEdit { background-color: %s }'%col)
def paste_isbn(self):
text = unicode(QApplication.clipboard().text()).strip()
if text:
vals = self.current_val
vals['isbn'] = text
self.current_val = vals
# }}}
class PublisherEdit(MultiCompleteComboBox): # {{{
@ -1075,7 +1126,7 @@ class DateEdit(QDateEdit): # {{{
@dynamic_property
def current_val(self):
def fget(self):
return qt_to_dt(self.date())
return qt_to_dt(self.date(), as_utc=False)
def fset(self, val):
if val is None:
val = UNDEFINED_DATE

View File

@ -10,7 +10,7 @@ __docformat__ = 'restructuredtext en'
import textwrap
from PyQt4.Qt import (QWidget, QGridLayout, QGroupBox, QListView, Qt, QSpinBox,
QDoubleSpinBox, QCheckBox, QLineEdit, QComboBox, QLabel)
QDoubleSpinBox, QCheckBox, QLineEdit, QComboBox, QLabel, QVariant)
from calibre.gui2.preferences.metadata_sources import FieldsModel as FM
@ -95,9 +95,9 @@ class ConfigWidget(QWidget):
widget.setChecked(bool(val))
elif opt.type == 'choices':
widget = QComboBox(self)
for x in opt.choices:
widget.addItem(x)
idx = opt.choices.index(val)
for key, label in opt.choices.iteritems():
widget.addItem(label, QVariant(key))
idx = widget.findData(QVariant(val))
widget.setCurrentIndex(idx)
widget.opt = opt
widget.setToolTip(textwrap.fill(opt.desc))
@ -124,7 +124,8 @@ class ConfigWidget(QWidget):
elif isinstance(w, QCheckBox):
val = w.isChecked()
elif isinstance(w, QComboBox):
val = unicode(w.currentText())
idx = w.currentIndex()
val = unicode(w.itemData(idx).toString())
self.plugin.prefs[w.opt.name] = val

View File

@ -31,6 +31,7 @@ class MetadataSingleDialogBase(ResizableDialog):
view_format = pyqtSignal(object, object)
cc_two_column = tweaks['metadata_single_use_2_cols_for_custom_fields']
one_line_comments_toolbar = False
use_toolbutton_for_config_metadata = True
def __init__(self, db, parent=None):
self.db = db
@ -69,7 +70,11 @@ class MetadataSingleDialogBase(ResizableDialog):
self.setLayout(self.l)
self.l.setMargin(0)
self.l.addWidget(self.scroll_area)
self.l.addWidget(self.button_box)
ll = self.button_box_layout = QHBoxLayout()
self.l.addLayout(ll)
ll.addSpacing(10)
ll.addWidget(self.button_box)
ll.addSpacing(10)
self.setWindowIcon(QIcon(I('edit_input.png')))
self.setWindowTitle(_('Edit Metadata'))
@ -103,16 +108,18 @@ class MetadataSingleDialogBase(ResizableDialog):
self.basic_metadata_widgets.extend([self.title, self.title_sort])
self.deduce_author_sort_button = b = QToolButton(self)
b.setToolTip(_(
'Automatically create the author sort entry based on the current'
' author entry.\n'
'Using this button to create author sort will change author sort from'
' red to green.'))
b.setToolTip('<p>' +
_('Automatically create the author sort entry based on the current '
'author entry. Using this button to create author sort will '
'change author sort from red to green. There is a menu of '
'functions available under this button. Click and hold '
'on the button to see it.') + '</p>')
b.m = m = QMenu()
ac = m.addAction(QIcon(I('forward.png')), _('Set author sort from author'))
ac2 = m.addAction(QIcon(I('back.png')), _('Set author from author sort'))
ac3 = m.addAction(QIcon(I('user_profile.png')), _('Manage authors'))
b.setMenu(m)
self.authors = AuthorsEdit(self)
self.authors = AuthorsEdit(self, ac3)
self.author_sort = AuthorSortEdit(self, self.authors, b, self.db, ac,
ac2)
self.basic_metadata_widgets.extend([self.authors, self.author_sort])
@ -123,6 +130,13 @@ class MetadataSingleDialogBase(ResizableDialog):
'Swap the author and title'))
self.swap_title_author_button.clicked.connect(self.swap_title_author)
self.manage_authors_button = QToolButton(self)
self.manage_authors_button.setIcon(QIcon(I('user_profile.png')))
self.manage_authors_button.setToolTip('<p>' + _(
'Manage authors. Use to rename authors and correct '
'individual author\'s sort values') + '</p>')
self.manage_authors_button.clicked.connect(self.authors.manage_authors)
self.series = SeriesEdit(self)
self.remove_unused_series_button = QToolButton(self)
self.remove_unused_series_button.setToolTip(
@ -159,6 +173,12 @@ class MetadataSingleDialogBase(ResizableDialog):
self.clear_identifiers_button = QToolButton(self)
self.clear_identifiers_button.setIcon(QIcon(I('trash.png')))
self.clear_identifiers_button.clicked.connect(self.identifiers.clear)
self.paste_isbn_button = QToolButton(self)
self.paste_isbn_button.setToolTip('<p>' +
_('Paste the contents of the clipboard into the '
'identifiers box prefixed with isbn:') + '</p>')
self.paste_isbn_button.setIcon(QIcon(I('edit-paste.png')))
self.paste_isbn_button.clicked.connect(self.identifiers.paste_isbn)
self.publisher = PublisherEdit(self)
self.basic_metadata_widgets.append(self.publisher)
@ -174,7 +194,12 @@ class MetadataSingleDialogBase(ResizableDialog):
font.setBold(True)
self.fetch_metadata_button.setFont(font)
self.config_metadata_button = QToolButton(self)
if self.use_toolbutton_for_config_metadata:
self.config_metadata_button = QToolButton(self)
self.config_metadata_button.setIcon(QIcon(I('config.png')))
else:
self.config_metadata_button = QPushButton(self)
self.config_metadata_button.setText(_('Configure download metadata'))
self.config_metadata_button.setIcon(QIcon(I('config.png')))
self.config_metadata_button.clicked.connect(self.configure_metadata)
self.config_metadata_button.setToolTip(
@ -290,13 +315,17 @@ class MetadataSingleDialogBase(ResizableDialog):
show=True)
return
def update_from_mi(self, mi):
def update_from_mi(self, mi, update_sorts=True):
if not mi.is_null('title'):
self.title.current_val = mi.title
if update_sorts:
self.title_sort.auto_generate()
if not mi.is_null('authors'):
self.authors.current_val = mi.authors
if not mi.is_null('author_sort'):
self.author_sort.current_val = mi.author_sort
elif update_sorts:
self.author_sort.auto_generate()
if not mi.is_null('rating'):
try:
self.rating.current_val = mi.rating
@ -307,7 +336,9 @@ class MetadataSingleDialogBase(ResizableDialog):
if not mi.is_null('tags'):
self.tags.current_val = mi.tags
if not mi.is_null('identifiers'):
self.identifiers.current_val = mi.identifiers
current = self.identifiers.current_val
current.update(mi.identifiers)
self.identifiers.current_val = current
if not mi.is_null('pubdate'):
self.pubdate.current_val = mi.pubdate
if not mi.is_null('series') and mi.series.strip():
@ -493,7 +524,8 @@ class MetadataSingleDialog(MetadataSingleDialogBase): # {{{
sto(one, two)
sto(two, three)
tl.addWidget(self.swap_title_author_button, 0, 0, 2, 1)
tl.addWidget(self.swap_title_author_button, 0, 0, 1, 1)
tl.addWidget(self.manage_authors_button, 1, 0, 1, 1)
create_row(0, self.title, self.deduce_title_sort_button, self.title_sort)
sto(self.title_sort, self.authors)
@ -502,6 +534,7 @@ class MetadataSingleDialog(MetadataSingleDialogBase): # {{{
create_row(2, self.series, self.remove_unused_series_button,
self.series_index, icon='trash.png')
sto(self.series_index, self.swap_title_author_button)
sto(self.swap_title_author_button, self.manage_authors_button)
tl.addWidget(self.formats_manager, 0, 6, 3, 1)
@ -512,7 +545,7 @@ class MetadataSingleDialog(MetadataSingleDialogBase): # {{{
self.tabs[0].gb = gb = QGroupBox(_('Change cover'), self)
gb.l = l = QGridLayout()
gb.setLayout(l)
sto(self.swap_title_author_button, self.cover.buttons[0])
sto(self.manage_authors_button, self.cover.buttons[0])
for i, b in enumerate(self.cover.buttons[:3]):
l.addWidget(b, 0, i, 1, 1)
sto(b, self.cover.buttons[i+1])
@ -526,10 +559,16 @@ class MetadataSingleDialog(MetadataSingleDialogBase): # {{{
w.setLayout(w.l)
l.setMargin(0)
self.splitter.addWidget(w)
def create_row2(row, widget, button=None):
def create_row2(row, widget, button=None, front_button=None):
row += 1
ql = BuddyLabel(widget)
l.addWidget(ql, row, 0, 1, 1)
if front_button:
ltl = QHBoxLayout()
ltl.addWidget(front_button)
ltl.addWidget(ql)
l.addLayout(ltl, row, 0, 1, 1)
else:
l.addWidget(ql, row, 0, 1, 1)
l.addWidget(widget, row, 1, 1, 2 if button is None else 1)
if button is not None:
l.addWidget(button, row, 2, 1, 1)
@ -544,8 +583,10 @@ class MetadataSingleDialog(MetadataSingleDialogBase): # {{{
create_row2(1, self.rating)
sto(self.rating, self.tags)
create_row2(2, self.tags, self.tags_editor_button)
sto(self.tags_editor_button, self.identifiers)
create_row2(3, self.identifiers, self.clear_identifiers_button)
sto(self.tags_editor_button, self.paste_isbn_button)
sto(self.paste_isbn_button, self.identifiers)
create_row2(3, self.identifiers, self.clear_identifiers_button,
front_button=self.paste_isbn_button)
sto(self.clear_identifiers_button, self.timestamp)
create_row2(4, self.timestamp, self.timestamp.clear_button)
sto(self.timestamp.clear_button, self.pubdate)
@ -583,6 +624,7 @@ class MetadataSingleDialogAlt1(MetadataSingleDialogBase): # {{{
cc_two_column = False
one_line_comments_toolbar = True
use_toolbutton_for_config_metadata = False
on_drag_enter = pyqtSignal()
@ -618,13 +660,11 @@ class MetadataSingleDialogAlt1(MetadataSingleDialogBase): # {{{
self.tabs[0].l.addWidget(gb, 0, 0, 1, 1)
gb.setLayout(tl)
self.button_box.addButton(self.fetch_metadata_button,
QDialogButtonBox.ActionRole)
self.config_metadata_button.setToolButtonStyle(Qt.ToolButtonTextOnly)
self.config_metadata_button.setText(_('Configure metadata downloading'))
self.button_box.addButton(self.config_metadata_button,
QDialogButtonBox.ActionRole)
sto(self.button_box, self.title)
self.button_box_layout.insertWidget(1, self.fetch_metadata_button)
self.button_box_layout.insertWidget(2, self.config_metadata_button)
sto(self.button_box, self.fetch_metadata_button)
sto(self.fetch_metadata_button, self.config_metadata_button)
sto(self.config_metadata_button, self.title)
def create_row(row, widget, tab_to, button=None, icon=None, span=1):
ql = BuddyLabel(widget)
@ -642,6 +682,8 @@ class MetadataSingleDialogAlt1(MetadataSingleDialogBase): # {{{
sto(widget, tab_to)
tl.addWidget(self.swap_title_author_button, 0, 0, 2, 1)
tl.addWidget(self.manage_authors_button, 2, 0, 1, 1)
tl.addWidget(self.paste_isbn_button, 11, 0, 1, 1)
create_row(0, self.title, self.title_sort,
button=self.deduce_title_sort_button, span=2,
@ -663,6 +705,9 @@ class MetadataSingleDialogAlt1(MetadataSingleDialogBase): # {{{
button=self.timestamp.clear_button, icon='trash.png')
create_row(11, self.identifiers, self.comments,
button=self.clear_identifiers_button, icon='trash.png')
sto(self.clear_identifiers_button, self.swap_title_author_button)
sto(self.swap_title_author_button, self.manage_authors_button)
sto(self.manage_authors_button, self.paste_isbn_button)
tl.addItem(QSpacerItem(1, 1, QSizePolicy.Fixed, QSizePolicy.Expanding),
12, 1, 1 ,1)
@ -702,7 +747,6 @@ class MetadataSingleDialogAlt1(MetadataSingleDialogBase): # {{{
gb = QGroupBox(_('Change cover'), tab1)
l = QGridLayout()
gb.setLayout(l)
sto(self.swap_title_author_button, self.cover.buttons[0])
for i, b in enumerate(self.cover.buttons[:3]):
l.addWidget(b, 0, i, 1, 1)
sto(b, self.cover.buttons[i+1])
@ -732,6 +776,7 @@ class MetadataSingleDialogAlt2(MetadataSingleDialogBase): # {{{
cc_two_column = False
one_line_comments_toolbar = True
use_toolbutton_for_config_metadata = False
def do_layout(self):
self.central_widget.clear()
@ -750,13 +795,11 @@ class MetadataSingleDialogAlt2(MetadataSingleDialogBase): # {{{
l.addWidget(gb, 0, 0, 1, 1)
gb.setLayout(tl)
self.button_box.addButton(self.fetch_metadata_button,
QDialogButtonBox.ActionRole)
self.config_metadata_button.setToolButtonStyle(Qt.ToolButtonTextOnly)
self.config_metadata_button.setText(_('Configure metadata downloading'))
self.button_box.addButton(self.config_metadata_button,
QDialogButtonBox.ActionRole)
sto(self.button_box, self.title)
self.button_box_layout.insertWidget(1, self.fetch_metadata_button)
self.button_box_layout.insertWidget(2, self.config_metadata_button)
sto(self.button_box, self.fetch_metadata_button)
sto(self.fetch_metadata_button, self.config_metadata_button)
sto(self.config_metadata_button, self.title)
def create_row(row, widget, tab_to, button=None, icon=None, span=1):
ql = BuddyLabel(widget)
@ -774,6 +817,8 @@ class MetadataSingleDialogAlt2(MetadataSingleDialogBase): # {{{
sto(widget, tab_to)
tl.addWidget(self.swap_title_author_button, 0, 0, 2, 1)
tl.addWidget(self.manage_authors_button, 2, 0, 2, 1)
tl.addWidget(self.paste_isbn_button, 11, 0, 1, 1)
create_row(0, self.title, self.title_sort,
button=self.deduce_title_sort_button, span=2,
@ -795,6 +840,9 @@ class MetadataSingleDialogAlt2(MetadataSingleDialogBase): # {{{
button=self.timestamp.clear_button, icon='trash.png')
create_row(11, self.identifiers, self.comments,
button=self.clear_identifiers_button, icon='trash.png')
sto(self.clear_identifiers_button, self.swap_title_author_button)
sto(self.swap_title_author_button, self.manage_authors_button)
sto(self.manage_authors_button, self.paste_isbn_button)
tl.addItem(QSpacerItem(1, 1, QSizePolicy.Fixed, QSizePolicy.Expanding),
12, 1, 1 ,1)
@ -814,7 +862,7 @@ class MetadataSingleDialogAlt2(MetadataSingleDialogBase): # {{{
l.addWidget(gb, 0, 1, 1, 1)
sp = QSizePolicy()
sp.setVerticalStretch(10)
sp.setHorizontalPolicy(QSizePolicy.Fixed)
sp.setHorizontalPolicy(QSizePolicy.Minimum)
sp.setVerticalPolicy(QSizePolicy.Expanding)
gb.setSizePolicy(sp)
self.set_custom_metadata_tab_order()
@ -836,7 +884,7 @@ class MetadataSingleDialogAlt2(MetadataSingleDialogBase): # {{{
lb = QGridLayout()
gb.setLayout(lb)
lb.addWidget(self.cover, 0, 0, 1, 3, alignment=Qt.AlignCenter)
sto(self.clear_identifiers_button, self.cover.buttons[0])
sto(self.manage_authors_button, self.cover.buttons[0])
for i, b in enumerate(self.cover.buttons[:3]):
lb.addWidget(b, 1, i, 1, 1)
sto(b, self.cover.buttons[i+1])

View File

@ -161,7 +161,11 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
def initialize(self):
ConfigWidgetBase.initialize(self)
self.current_font = self.initial_font = gprefs['font']
font = gprefs['font']
if font is not None:
font = list(font)
font.append(gprefs.get('font_stretch', QFont.Unstretched))
self.current_font = self.initial_font = font
self.update_font_display()
self.display_model.initialize()
@ -178,7 +182,8 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
def build_font_obj(self):
font_info = self.current_font
if font_info is not None:
font = QFont(*font_info)
font = QFont(*(font_info[:4]))
font.setStretch(font_info[4])
else:
font = qt_app.original_font
return font
@ -215,15 +220,18 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
if fd.exec_() == fd.Accepted:
font = fd.selectedFont()
fi = QFontInfo(font)
self.current_font = (unicode(fi.family()), fi.pointSize(),
fi.weight(), fi.italic())
self.current_font = [unicode(fi.family()), fi.pointSize(),
fi.weight(), fi.italic(), font.stretch()]
self.update_font_display()
self.changed_signal.emit()
def commit(self, *args):
rr = ConfigWidgetBase.commit(self, *args)
if self.current_font != self.initial_font:
gprefs['font'] = self.current_font
gprefs['font'] = (self.current_font[:4] if self.current_font else
None)
gprefs['font_stretch'] = (self.current_font[4] if self.current_font
is not None else QFont.Unstretched)
QApplication.setFont(self.font_display.font())
rr = True
self.display_model.commit()

View File

@ -71,9 +71,10 @@ class SourcesModel(QAbstractTableModel): # {{{
plugin.is_configured()):
return QIcon(I('list_remove.png'))
elif role == Qt.ToolTipRole:
base = plugin.description + '\n\n'
if plugin.is_configured():
return _('This source is configured and ready to go')
return _('This source needs configuration')
return base + _('This source is configured and ready to go')
return base + _('This source needs configuration')
return NONE
def setData(self, index, val, role):

View File

@ -75,6 +75,8 @@ class PluginModel(QAbstractItemModel, SearchQueryParser): # {{{
def find(self, query):
query = query.strip()
if not query:
return QModelIndex()
matches = self.parse(query)
if not matches:
return QModelIndex()
@ -87,6 +89,8 @@ class PluginModel(QAbstractItemModel, SearchQueryParser): # {{{
def find_next(self, idx, query, backwards=False):
query = query.strip()
if not query:
return idx
matches = self.parse(query)
if not matches:
return idx

View File

@ -9,7 +9,7 @@ __docformat__ = 'restructuredtext en'
class StorePlugin(object): # {{{
'''
A plugin representing an online ebook repository (store). The store can
be a comercial store that sells ebooks or a source of free downloadable
be a commercial store that sells ebooks or a source of free downloadable
ebooks.
Note that this class is the base class for these plugins, however, to
@ -43,6 +43,8 @@ class StorePlugin(object): # {{{
The easiest way to handle affiliate money payouts is to randomly select
between the author's affiliate id and calibre's affiliate id so that
70% of the time the author's id is used.
See declined.txt for a list of stores that do not want to be included.
'''
def __init__(self, gui, name):

View File

@ -0,0 +1,92 @@
# -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function)
__license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import urllib2
from contextlib import closing
from lxml import html
from PyQt4.Qt import QUrl
from calibre import browser
from calibre.gui2 import open_url
from calibre.gui2.store import StorePlugin
from calibre.gui2.store.basic_config import BasicStoreConfig
from calibre.gui2.store.search_result import SearchResult
from calibre.gui2.store.web_store_dialog import WebStoreDialog
class BeamEBooksDEStore(BasicStoreConfig, StorePlugin):
def open(self, parent=None, detail_item=None, external=False):
url = 'http://klick.affiliwelt.net/klick.php?bannerid=10072&pid=32307&prid=908'
url_details = ('http://klick.affiliwelt.net/klick.php?'
'bannerid=10730&pid=32307&prid=908&prodid={0}')
if external or self.config.get('open_external', False):
if detail_item:
url = url_details.format(detail_item)
open_url(QUrl(url))
else:
detail_url = None
if detail_item:
detail_url = url_details.format(detail_item)
d = WebStoreDialog(self.gui, url, parent, detail_url)
d.setWindowTitle(self.name)
d.set_tags(self.config.get('tags', ''))
d.exec_()
def search(self, query, max_results=10, timeout=60):
url = 'http://www.beam-ebooks.de/suchergebnis.php?Type=&sw=' + urllib2.quote(query)
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read())
for data in doc.xpath('//table[tr/td/div[@class="stil2"]]'):
if counter <= 0:
break
id = ''.join(data.xpath('./tr/td/div[@class="stil2"]/a/@href')).strip()
if not id:
continue
id = id[7:]
cover_url = ''.join(data.xpath('./tr/td[1]/a/img/@src'))
if cover_url:
cover_url = 'http://www.beam-ebooks.de' + cover_url
title = ''.join(data.xpath('./tr/td/div[@class="stil2"]/a/b/text()'))
author = ' '.join(data.xpath('./tr/td/div[@class="stil2"]/'
'child::b/text()'
'|'
'./tr/td/div[@class="stil2"]/'
'child::strong/text()'))
price = ''.join(data.xpath('./tr/td[3]/text()'))
pdf = data.xpath(
'boolean(./tr/td[3]/a/img[contains(@alt, "PDF")]/@alt)')
epub = data.xpath(
'boolean(./tr/td[3]/a/img[contains(@alt, "ePub")]/@alt)')
mobi = data.xpath(
'boolean(./tr/td[3]/a/img[contains(@alt, "Mobipocket")]/@alt)')
counter -= 1
s = SearchResult()
s.cover_url = cover_url
s.title = title.strip()
s.author = author.strip()
s.price = price
s.drm = SearchResult.DRM_UNLOCKED
s.detail_item = id
formats = []
if epub:
formats.append('ePub')
if pdf:
formats.append('PDF')
if mobi:
formats.append('MOBI')
s.formats = ', '.join(formats)
yield s

View File

@ -8,7 +8,7 @@ __docformat__ = 'restructuredtext en'
import random
import re
import urllib2
import urllib
from contextlib import closing
from lxml import html
@ -48,7 +48,7 @@ class BNStore(BasicStoreConfig, StorePlugin):
def search(self, query, max_results=10, timeout=60):
url = 'http://productsearch.barnesandnoble.com/search/results.aspx?STORE=EBOOK&SZE=%s&WRD=' % max_results
url += urllib2.quote(query)
url += urllib.quote_plus(query)
br = browser()

View File

@ -0,0 +1,5 @@
This is a list of stores that objected, declined
or asked not to be included in the store integration.
* Borders (http://www.borders.com/)
* WH Smith (http://www.whsmith.co.uk/)

View File

@ -0,0 +1,80 @@
# -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function)
__license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import urllib2
from contextlib import closing
from lxml import html
from PyQt4.Qt import QUrl
from calibre import browser
from calibre.gui2 import open_url
from calibre.gui2.store import StorePlugin
from calibre.gui2.store.basic_config import BasicStoreConfig
from calibre.gui2.store.search_result import SearchResult
from calibre.gui2.store.web_store_dialog import WebStoreDialog
class EPubBuyDEStore(BasicStoreConfig, StorePlugin):
def open(self, parent=None, detail_item=None, external=False):
url = 'http://klick.affiliwelt.net/klick.php?bannerid=47653&pid=32307&prid=2627'
url_details = ('http://klick.affiliwelt.net/klick.php?bannerid=47653'
'&pid=32307&prid=2627&prodid={0}')
if external or self.config.get('open_external', False):
if detail_item:
url = url_details.format(detail_item)
open_url(QUrl(url))
else:
detail_url = None
if detail_item:
detail_url = url_details.format(detail_item)
d = WebStoreDialog(self.gui, url, parent, detail_url)
d.setWindowTitle(self.name)
d.set_tags(self.config.get('tags', ''))
d.exec_()
def search(self, query, max_results=10, timeout=60):
url = 'http://www.epubbuy.com/search.php?search_query=' + urllib2.quote(query)
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read())
for data in doc.xpath('//li[contains(@class, "ajax_block_product")]'):
if counter <= 0:
break
id = ''.join(data.xpath('./div[@class="center_block"]'
'/p[contains(text(), "artnr:")]/text()')).strip()
if not id:
continue
id = id[6:].strip()
if not id:
continue
cover_url = ''.join(data.xpath('./div[@class="center_block"]'
'/a[@class="product_img_link"]/img/@src'))
if cover_url:
cover_url = 'http://www.epubbuy.com' + cover_url
title = ''.join(data.xpath('./div[@class="center_block"]'
'/a[@class="product_img_link"]/@title'))
author = ''.join(data.xpath('./div[@class="center_block"]/a[2]/text()'))
price = ''.join(data.xpath('.//span[@class="price"]/text()'))
counter -= 1
s = SearchResult()
s.cover_url = cover_url
s.title = title.strip()
s.author = author.strip()
s.price = price
s.drm = SearchResult.DRM_UNLOCKED
s.detail_item = id
s.formats = 'ePub'
yield s

View File

@ -73,6 +73,6 @@ class FoylesUKStore(BasicStoreConfig, StorePlugin):
s.price = price
s.detail_item = id
s.drm = SearchResult.DRM_LOCKED
s.formats = 'EPUB'
s.formats = 'ePub'
yield s

View File

@ -12,6 +12,7 @@ from threading import Thread
from Queue import Queue
from calibre import browser
from calibre.constants import DEBUG
from calibre.utils.magick.draw import thumbnail
class GenericDownloadThreadPool(object):
@ -119,7 +120,8 @@ class SearchThread(Thread):
self.results.put((res, store_plugin))
self.tasks.task_done()
except:
traceback.print_exc()
if DEBUG:
traceback.print_exc()
class CoverThreadPool(GenericDownloadThreadPool):
@ -157,7 +159,8 @@ class CoverThread(Thread):
callback()
self.tasks.task_done()
except:
continue
if DEBUG:
traceback.print_exc()
class DetailsThreadPool(GenericDownloadThreadPool):
@ -191,7 +194,8 @@ class DetailsThread(Thread):
callback(result)
self.tasks.task_done()
except:
continue
if DEBUG:
traceback.print_exc()
class CacheUpdateThreadPool(GenericDownloadThreadPool):
@ -221,4 +225,5 @@ class CacheUpdateThread(Thread):
store_plugin, timeout = self.tasks.get()
store_plugin.update_cache(timeout=timeout, suppress_progress=True)
except:
traceback.print_exc()
if DEBUG:
traceback.print_exc()

View File

@ -23,8 +23,8 @@ TIMEOUT = 75 # seconds
class SearchDialog(QDialog, Ui_Dialog):
def __init__(self, istores, *args):
QDialog.__init__(self, *args)
def __init__(self, istores, parent=None, query=''):
QDialog.__init__(self, parent)
self.setupUi(self)
self.config = JSONConfig('store/search')
@ -47,13 +47,16 @@ class SearchDialog(QDialog, Ui_Dialog):
# per search basis.
stores_group_layout = QVBoxLayout()
self.stores_group.setLayout(stores_group_layout)
for x in self.store_plugins:
for x in sorted(self.store_plugins.keys(), key=lambda x: x.lower()):
cbox = QCheckBox(x)
cbox.setChecked(True)
stores_group_layout.addWidget(cbox)
setattr(self, 'store_check_' + x, cbox)
stores_group_layout.addStretch()
# Set the search query
self.search_edit.setText(query)
# Create and add the progress indicator
self.pi = ProgressIndicator(self, 24)
self.top_layout.addWidget(self.pi)
@ -93,7 +96,7 @@ class SearchDialog(QDialog, Ui_Dialog):
# Store / Formats
self.results_view.setColumnWidth(4, int(total*.25))
def do_search(self, checked=False):
def do_search(self):
# Stop all running threads.
self.checker.stop()
self.search_pool.abort()
@ -136,14 +139,17 @@ class SearchDialog(QDialog, Ui_Dialog):
query = query.replace('>', '')
query = query.replace('<', '')
# Remove the prefix.
for loc in ( 'all', 'author', 'authors', 'title'):
query = re.sub(r'%s:"?(?P<a>[^\s"]+)"?' % loc, '\g<a>', query)
for loc in ('all', 'author', 'authors', 'title'):
query = re.sub(r'%s:"(?P<a>[^\s"]+)"' % loc, '\g<a>', query)
query = query.replace('%s:' % loc, '')
# Remove the prefix and search text.
for loc in ('cover', 'drm', 'format', 'formats', 'price', 'store'):
query = re.sub(r'%s:"[^"]"' % loc, '', query)
query = re.sub(r'%s:[^\s]*' % loc, '', query)
# Remove logic.
query = re.sub(r'(^|\s)(and|not|or)(\s|$)', ' ', query)
query = re.sub(r'(^|\s)(and|not|or|a|the|is|of)(\s|$)', ' ', query)
# Remove "
query = query.replace('"', '')
# Remove excess whitespace.
query = re.sub(r'\s{2,}', ' ', query)
query = query.strip()
@ -252,4 +258,9 @@ class SearchDialog(QDialog, Ui_Dialog):
self.search_pool.abort()
self.cache_pool.abort()
self.save_state()
def exec_(self):
if unicode(self.search_edit.text()).strip():
self.do_search()
return QDialog.exec_(self)

View File

@ -76,7 +76,7 @@ class WaterstonesUKStore(BasicStoreConfig, StorePlugin):
s.detail_item = id
formats = []
if epub:
formats.append('EPUB')
formats.append('ePub')
if pdf:
formats.append('PDF')
s.formats = ', '.join(formats)

Some files were not shown because too many files have changed in this diff Show More