Sync to trunk.

This commit is contained in:
John Schember 2011-07-06 18:36:51 -04:00
commit 10ef6cf086
239 changed files with 72629 additions and 59626 deletions

View File

@ -19,12 +19,92 @@
# new recipes: # new recipes:
# - title: # - title:
- version: 0.8.8
date: 2011-07-01
new features:
- title: "Make author names in the Book Details panel clickable. Clicking them takes you to the wikipedia page for the author by default. You may have to tell calibre to display author names in the Book details panel first via Preferences->Look & Feel->Book details. You can change the link for individual authors by right clicking on the author's name in the Tag Browser and selecting Manage Authors."
- title: "Get Books: Add 'Open Books' as an available book source"
- title: "Get Books: When a free download is available for a search result, for example, for public domain books, allow direct download of the book into your calibre library."
- title: "Support for detecting and mounting reader devices on FreeBSD."
tickets: [802708]
- title: "When creating a composite custom column, allow the use of HTML to create links and other markup that display in the Book details panel"
- title: "Add the swap_around_comma function to the template language."
- title: "Drivers for HTC G2, Advent Vega, iRiver Story HD, Lark FreeMe and Moovyman mp7"
- title: "Quick View: Survives changing libraries. Also allow sorting by series index as well as name."
- title: "Connect to iTunes: Add an option to control how the driver works depending on whether you have iTunes setup to copy files to its media directory or not. Set this option by customizing the Apple driver in Preferences->Plugins. Having iTunes copy media to its storage folder is no longer neccessary. See http://www.mobileread.com/forums/showthread.php?t=118559 for details"
- title: "Remove the delete library functionality from calibre, instead you can now remove a library, so calibre will forget about it, but you have to delete the files manually"
bug fixes:
- title: "Fix a regression introduced in 0.8.7 in the Tag Browser that could cause calibre to crash after performing various actions"
- title: "Fix an unhandled error when deleting all saved searches"
tickets: [804383]
- title: "Fix row numbers in a previous selection being incorrect after a sort operation."
- title: "Fix ISBN identifier type not recognized if it is in upper case"
tickets: [802288]
- title: "Fix a regression in 0.8.7 that broke reading metadata from MOBI files in the Edit metadata dialog."
tickets: [801981]
- title: "Fix handling of filenames that have an even number of periods before the file extension."
tickets: [801939]
- title: "Fix lack of thread saefty in template format system, that could lead to incorrect template evaluation in some cases."
tickets: [801944]
- title: "Fix conversion to PDB when the input document has no text"
tickets: [801888]
- title: "Fix clicking on first letter of author names generating incorrect search."
- title: "Also fix updating bulk metadata in custom column causing unnneccessary Tag Browser refreshes."
- title: "Fix a regression in 0.8.7 that broke renaming items via the Tag Browser"
- title: "Fix a regression in 0.8.7 that caused the regex builder wizard to fail with LIT files as the input"
improved recipes:
- Zaman Gazetesi
- Infobae
- El Cronista
- Critica de la Argentina
- Buenos Aires Economico
- El Universal (Venezuela)
- wprost
- Financial Times UK
new recipes:
- title: "Today's Zaman by thomass"
- title: "Athens News by Darko Miletic"
- title: "Catholic News Agency"
author: Jetkey
- title: "Arizona Republic"
author: Jim Olo
- title: "Add Ming Pao Vancouver and Toronto"
author: Eddie Lau
- version: 0.8.7 - version: 0.8.7
date: 2011-06-24 date: 2011-06-24
new features: new features:
- title: "Connect to iTunes: You now need to tell iTunes to keep its own copy of every ebook. Do this in iTunes by going to Preferences->Advanced and setting the 'Copy files to iTunes Media folder when adding to library' option. To learn about why this is necessary, see: http://www.mobileread.com/forums/showthread.php?t=140260" - title: "Connect to iTunes: You now need to tell iTunes to keep its own copy of every ebook. Do this in iTunes by going to Preferences->Advanced and setting the 'Copy files to iTunes Media folder when adding to library' option. To learn about why this is necessary, see: http://www.mobileread.com/forums/showthread.php?t=140260"
type: major
- title: "Add a couple of date related functions to the calibre template langauge to get 'todays' date and create text based on the value of a date type field" - title: "Add a couple of date related functions to the calibre template langauge to get 'todays' date and create text based on the value of a date type field"

View File

@ -0,0 +1,68 @@
__license__ = 'GPL v3'
__copyright__ = '2010, jolo'
'''
azrepublic.com
'''
from calibre.web.feeds.recipes import BasicNewsRecipe
class AdvancedUserRecipe1307301031(BasicNewsRecipe):
title = u'AZRepublic'
__author__ = 'Jim Olo'
language = 'en'
description = "The Arizona Republic is Arizona's leading provider of news and information, and has published a daily newspaper in Phoenix for more than 110 years"
publisher = 'AZRepublic/AZCentral'
masthead_url = 'http://freedom2t.com/wp-content/uploads/press_az_republic_v2.gif'
cover_url = 'http://www.valleyleadership.org/Common/Img/2line4c_AZRepublic%20with%20azcentral%20logo.jpg'
category = 'news, politics, USA, AZ, Arizona'
oldest_article = 7
max_articles_per_feed = 100
remove_empty_feeds = True
no_stylesheets = True
remove_javascript = True
# extra_css = '.headline {font-size: medium;} \n .fact { padding-top: 10pt }'
extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .headline {font-size: medium} .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
remove_attributes = ['width','height','h2','subHeadline','style']
remove_tags = [
dict(name='div', attrs={'id':['slidingBillboard', 'top728x90', 'subindex-header', 'topSearch']}),
dict(name='div', attrs={'id':['simplesearch', 'azcLoginBox', 'azcLoginBoxInner', 'topNav']}),
dict(name='div', attrs={'id':['carsDrop', 'homesDrop', 'rentalsDrop', 'classifiedDrop']}),
dict(name='div', attrs={'id':['nav', 'mp', 'subnav', 'jobsDrop']}),
dict(name='h6', attrs={'class':['section-header']}),
dict(name='a', attrs={'href':['#comments']}),
dict(name='div', attrs={'class':['articletools clearfix', 'floatRight']}),
dict(name='div', attrs={'id':['fbFrame', 'ob', 'storyComments', 'storyGoogleAdBox']}),
dict(name='div', attrs={'id':['storyTopHomes', 'openRight', 'footerwrap', 'copyright']}),
dict(name='div', attrs={'id':['blogsHed', 'blog_comments', 'blogByline','blogTopics']}),
dict(name='div', attrs={'id':['membersRightMain', 'dealsfooter', 'azrTopHed', 'azrRightCol']}),
dict(name='div', attrs={'id':['ttdHeader', 'ttdTimeWeather']}),
dict(name='div', attrs={'id':['membersRightMain', 'deals-header-wrap']}),
dict(name='div', attrs={'id':['todoTopSearchBar', 'byline clearfix', 'subdex-topnav']}),
dict(name='h1', attrs={'id':['SEOtext']}),
dict(name='table', attrs={'class':['ap-mediabox-table']}),
dict(name='p', attrs={'class':['ap_para']}),
dict(name='span', attrs={'class':['source-org vcard', 'org fn']}),
dict(name='a', attrs={'href':['http://hosted2.ap.org/APDEFAULT/privacy']}),
dict(name='a', attrs={'href':['http://hosted2.ap.org/APDEFAULT/terms']}),
dict(name='div', attrs={'id':['onespot_nextclick']}),
]
feeds = [
(u'FrontPage', u'http://www.azcentral.com/rss/feeds/republicfront.xml'),
(u'TopUS-News', u'http://hosted.ap.org/lineups/USHEADS.rss?SITE=AZPHG&SECTION=HOME'),
(u'WorldNews', u'http://hosted.ap.org/lineups/WORLDHEADS.rss?SITE=AZPHG&SECTION=HOME'),
(u'TopBusiness', u'http://hosted.ap.org/lineups/BUSINESSHEADS.rss?SITE=AZPHG&SECTION=HOME'),
(u'Entertainment', u'http://hosted.ap.org/lineups/ENTERTAINMENT.rss?SITE=AZPHG&SECTION=HOME'),
(u'ArizonaNews', u'http://www.azcentral.com/rss/feeds/news.xml'),
(u'Gilbert', u'http://www.azcentral.com/rss/feeds/gilbert.xml'),
(u'Chandler', u'http://www.azcentral.com/rss/feeds/chandler.xml'),
(u'DiningReviews', u'http://www.azcentral.com/rss/feeds/diningreviews.xml'),
(u'AZBusiness', u'http://www.azcentral.com/rss/feeds/business.xml'),
(u'ArizonaDeals', u'http://www.azcentral.com/members/Blog%7E/RealDealsblog'),
(u'GroceryDeals', u'http://www.azcentral.com/members/Blog%7E/RealDealsblog/tag/2646')
]

View File

@ -0,0 +1,70 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
'''
www.athensnews.gr
'''
from calibre.web.feeds.news import BasicNewsRecipe
class AthensNews(BasicNewsRecipe):
title = 'Athens News'
__author__ = 'Darko Miletic'
description = 'Greece in English since 1952'
publisher = 'NEP Publishing Company SA'
category = 'news, politics, Greece, Athens'
oldest_article = 1
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'utf8'
use_embedded_content = False
language = 'en_GR'
remove_empty_feeds = True
publication_type = 'newspaper'
masthead_url = 'http://www.athensnews.gr/sites/athensnews/themes/athensnewsv3/images/logo.jpg'
extra_css = """
body{font-family: Arial,Helvetica,sans-serif }
img{margin-bottom: 0.4em; display:block}
.big{font-size: xx-large; font-family: Georgia,serif}
.articlepubdate{font-size: small; color: gray; font-family: Georgia,serif}
.lezanta{font-size: x-small; font-weight: bold; text-align: left; margin-bottom: 1em; display: block}
"""
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
, 'linearize_tables' : True
}
remove_tags = [
dict(name=['meta','link'])
]
keep_only_tags=[
dict(name='span',attrs={'class':'big'})
,dict(name='td', attrs={'class':['articlepubdate','text']})
]
remove_attributes=['lang']
feeds = [
(u'News' , u'http://www.athensnews.gr/category/1/feed' )
,(u'Politics' , u'http://www.athensnews.gr/category/8/feed' )
,(u'Business' , u'http://www.athensnews.gr/category/2/feed' )
,(u'Economy' , u'http://www.athensnews.gr/category/11/feed')
,(u'Community' , u'http://www.athensnews.gr/category/5/feed' )
,(u'Arts' , u'http://www.athensnews.gr/category/3/feed' )
,(u'Living in Athens', u'http://www.athensnews.gr/category/7/feed' )
,(u'Sports' , u'http://www.athensnews.gr/category/4/feed' )
,(u'Travel' , u'http://www.athensnews.gr/category/6/feed' )
,(u'Letters' , u'http://www.athensnews.gr/category/44/feed')
,(u'Media' , u'http://www.athensnews.gr/multimedia/feed' )
]
def print_version(self, url):
return url + '?action=print'
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -1,72 +1,59 @@
#!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>'
''' '''
elargentino.com www.diariobae.com
''' '''
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
class BsAsEconomico(BasicNewsRecipe): class BsAsEconomico(BasicNewsRecipe):
title = 'Buenos Aires Economico' title = 'Buenos Aires Economico'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'Revista Argentina' description = 'Diario BAE es el diario economico-politico con mas influencia en la Argentina. Fuente de empresarios y politicos del pais y el exterior. El pozo estaria aportando en periodos breves un volumen equivalente a 800m3 diarios. Pero todavia deben efectuarse otras perforaciones adicionales.'
publisher = 'ElArgentino.com' publisher = 'Diario BAE'
category = 'news, politics, economy, Argentina' category = 'news, politics, economy, Argentina'
oldest_article = 2 oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
encoding = 'utf-8' encoding = 'utf-8'
language = 'es_AR' language = 'es_AR'
cover_url = strftime('http://www.diariobae.com/imgs_portadas/%Y%m%d_portadasBAE.jpg')
masthead_url = 'http://www.diariobae.com/img/logo_bae.png'
remove_empty_feeds = True
publication_type = 'newspaper'
extra_css = """
body{font-family: Georgia,"Times New Roman",Times,serif}
#titulo{font-size: x-large}
#epi{font-size: small; font-style: italic; font-weight: bold}
img{display: block; margin-top: 1em}
"""
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
lang = 'es-AR' remove_tags_before= dict(attrs={'id':'titulo'})
direction = 'ltr' remove_tags_after = dict(attrs={'id':'autor' })
INDEX = 'http://www.elargentino.com/medios/121/Buenos-Aires-Economico.html' remove_tags = [
extra_css = ' .titulo{font-size: x-large; font-weight: bold} .volantaImp{font-size: small; font-weight: bold} ' dict(name=['meta','base','iframe','link','lang'])
,dict(attrs={'id':'barra_tw'})
html2lrf_options = [
'--comment' , description
, '--category' , category
, '--publisher', publisher
] ]
remove_attributes = ['data-count','data-via']
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "' feeds = [
(u'Argentina' , u'http://www.diariobae.com/rss/argentina.xml' )
keep_only_tags = [dict(name='div', attrs={'class':'ContainerPop'})] ,(u'Valores' , u'http://www.diariobae.com/rss/valores.xml' )
,(u'Finanzas' , u'http://www.diariobae.com/rss/finanzas.xml' )
remove_tags = [dict(name='link')] ,(u'Negocios' , u'http://www.diariobae.com/rss/negocios.xml' )
,(u'Mundo' , u'http://www.diariobae.com/rss/mundo.xml' )
feeds = [(u'Articulos', u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=121&Content-Type=text/xml&ChannelDesc=Buenos%20Aires%20Econ%C3%B3mico')] ,(u'5 dias' , u'http://www.diariobae.com/rss/5dias.xml' )
,(u'Espectaculos', u'http://www.diariobae.com/rss/espectaculos.xml')
def print_version(self, url): ]
main, sep, article_part = url.partition('/nota-')
article_id, rsep, rrest = article_part.partition('-')
return u'http://www.elargentino.com/Impresion.aspx?Id=' + article_id
def preprocess_html(self, soup): def preprocess_html(self, soup):
for item in soup.findAll(style=True): for item in soup.findAll(style=True):
del item['style'] del item['style']
soup.html['lang'] = self.lang
soup.html['dir' ] = self.direction
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
soup.head.insert(0,mlang)
soup.head.insert(1,mcharset)
return soup return soup
def get_cover_url(self):
cover_url = None
soup = self.index_to_soup(self.INDEX)
cover_item = soup.find('div',attrs={'class':'colder'})
if cover_item:
clean_url = self.image_url_processor(None,cover_item.div.img['src'])
cover_url = 'http://www.elargentino.com' + clean_url + '&height=600'
return cover_url
def image_url_processor(self, baseurl, url):
base, sep, rest = url.rpartition('?Id=')
img, sep2, rrest = rest.partition('&')
return base + sep + img

View File

@ -0,0 +1,13 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1301972345(BasicNewsRecipe):
title = u'Catholic News Agency'
language = 'en'
__author__ = 'Jetkey'
oldest_article = 5
max_articles_per_feed = 20
feeds = [(u'U.S. News', u'http://feeds.feedburner.com/catholicnewsagency/dailynews-us'),
(u'Vatican', u'http://feeds.feedburner.com/catholicnewsagency/dailynews-vatican'),
(u'Bishops Corner', u'http://feeds.feedburner.com/catholicnewsagency/columns/bishopscorner'),
(u'Saint of the Day', u'http://feeds.feedburner.com/catholicnewsagency/saintoftheday')]

View File

@ -1,83 +1,63 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
import re
class Cracked(BasicNewsRecipe): class Cracked(BasicNewsRecipe):
title = u'Cracked.com' title = u'Cracked.com'
__author__ = u'Nudgenudge' __author__ = 'UnWeave'
language = 'en' language = 'en'
description = 'America''s Only Humor and Video Site, since 1958' description = "America's Only HumorSite since 1958"
publisher = 'Cracked' publisher = 'Cracked'
category = 'comedy, lists' category = 'comedy, lists'
oldest_article = 2 oldest_article = 3 #days
delay = 10 max_articles_per_feed = 100
max_articles_per_feed = 2
no_stylesheets = True no_stylesheets = True
encoding = 'cp1252' encoding = 'ascii'
remove_javascript = True remove_javascript = True
use_embedded_content = False use_embedded_content = False
INDEX = u'http://www.cracked.com'
extra_css = """ feeds = [ (u'Articles', u'http://feeds.feedburner.com/CrackedRSS/') ]
.pageheader_type{font-size: x-large; font-weight: bold; color: #828D74}
.pageheader_title{font-size: xx-large; color: #394128}
.pageheader_byline{font-size: small; font-weight: bold; color: #394128}
.score_bg {display: inline; width: 100%; margin-bottom: 2em}
.score_column_1{ padding-left: 10px; font-size: small; width: 50%}
.score_column_2{ padding-left: 10px; font-size: small; width: 50%}
.score_column_3{ padding-left: 10px; font-size: small; width: 50%}
.score_header{font-size: large; color: #50544A}
.bodytext{display: block}
body{font-family: Helvetica,Arial,sans-serif}
"""
conversion_options = { conversion_options = {
'comment' : description 'comment' : description
, 'tags' : category , 'tags' : category
, 'publisher' : publisher , 'publisher' : publisher
, 'language' : language , 'language' : language
, 'linearize_tables' : True
} }
keep_only_tags = [ remove_tags_before = dict(id='PrimaryContent')
dict(name='div', attrs={'class':['Column1']})
]
feeds = [(u'Articles', u'http://feeds.feedburner.com/CrackedRSS')] remove_tags_after = dict(name='div', attrs={'class':'shareBar'})
def get_article_url(self, article): remove_tags = [ dict(name='div', attrs={'class':['social',
return article.get('guid', None) 'FacebookLike',
'shareBar'
]}),
def cleanup_page(self, soup): dict(name='div', attrs={'id':['inline-share-buttons',
for item in soup.findAll(style=True): ]}),
del item['style']
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
for div_to_remove in soup.findAll('div', attrs={'id':['googlead_1','fb-like-article','comments_section']}):
div_to_remove.extract()
for div_to_remove in soup.findAll('div', attrs={'class':['share_buttons_col_1','GenericModule1']}):
div_to_remove.extract()
for div_to_remove in soup.findAll('div', attrs={'class':re.compile("prev_next")}):
div_to_remove.extract()
for ul_to_remove in soup.findAll('ul', attrs={'class':['Nav6']}):
ul_to_remove.extract()
for image in soup.findAll('img', attrs={'alt': 'article image'}):
image.extract()
def append_page(self, soup, appendtag, position): dict(name='span', attrs={'class':['views',
pager = soup.find('a',attrs={'class':'next_arrow_active'}) 'KonaFilter'
if pager: ]}),
nexturl = self.INDEX + pager['href'] #dict(name='img'),
soup2 = self.index_to_soup(nexturl) ]
texttag = soup2.find('div', attrs={'class':re.compile("userStyled")})
newpos = len(texttag.contents) def appendPage(self, soup, appendTag, position):
self.append_page(soup2,texttag,newpos) # Check if article has multiple pages
texttag.extract() pageNav = soup.find('nav', attrs={'class':'PaginationContent'})
self.cleanup_page(appendtag) if pageNav:
appendtag.insert(position,texttag) # Check not at last page
else: nextPage = pageNav.find('a', attrs={'class':'next'})
self.cleanup_page(appendtag) if nextPage:
nextPageURL = nextPage['href']
nextPageSoup = self.index_to_soup(nextPageURL)
# 8th <section> tag contains article content
nextPageContent = nextPageSoup.findAll('section')[7]
newPosition = len(nextPageContent.contents)
self.appendPage(nextPageSoup,nextPageContent,newPosition)
nextPageContent.extract()
pageNav.extract()
appendTag.insert(position,nextPageContent)
def preprocess_html(self, soup): def preprocess_html(self, soup):
self.append_page(soup, soup.body, 3) self.appendPage(soup, soup.body, 3)
return self.adeify_images(soup) return soup

View File

@ -1,69 +0,0 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
'''
criticadigital.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class CriticaDigital(BasicNewsRecipe):
title = 'Critica de la Argentina'
__author__ = 'Darko Miletic and Sujata Raman'
description = 'Noticias de Argentina'
oldest_article = 2
max_articles_per_feed = 100
language = 'es_AR'
no_stylesheets = True
use_embedded_content = False
encoding = 'cp1252'
extra_css = '''
h1{font-family:"Trebuchet MS";}
h3{color:#9A0000; font-family:Tahoma; font-size:x-small;}
h2{color:#504E53; font-family:Arial,Helvetica,sans-serif ;font-size:small;}
#epigrafe{font-family:Arial,Helvetica,sans-serif ;color:#666666 ; font-size:x-small;}
p {font-family:Arial,Helvetica,sans-serif;}
#fecha{color:#858585; font-family:Tahoma; font-size:x-small;}
#autor{color:#858585; font-family:Tahoma; font-size:x-small;}
#hora{color:#F00000;font-family:Tahoma; font-size:x-small;}
'''
keep_only_tags = [
dict(name='div', attrs={'class':['bloqueTitulosNoticia','cfotonota']})
,dict(name='div', attrs={'id':'boxautor'})
,dict(name='p', attrs={'id':'textoNota'})
]
remove_tags = [
dict(name='div', attrs={'class':'box300' })
,dict(name='div', style=True )
,dict(name='div', attrs={'class':'titcomentario'})
,dict(name='div', attrs={'class':'comentario' })
,dict(name='div', attrs={'class':'paginador' })
]
feeds = [
(u'Politica', u'http://www.criticadigital.com/herramientas/rss.php?ch=politica' )
,(u'Economia', u'http://www.criticadigital.com/herramientas/rss.php?ch=economia' )
,(u'Deportes', u'http://www.criticadigital.com/herramientas/rss.php?ch=deportes' )
,(u'Espectaculos', u'http://www.criticadigital.com/herramientas/rss.php?ch=espectaculos')
,(u'Mundo', u'http://www.criticadigital.com/herramientas/rss.php?ch=mundo' )
,(u'Policiales', u'http://www.criticadigital.com/herramientas/rss.php?ch=policiales' )
,(u'Sociedad', u'http://www.criticadigital.com/herramientas/rss.php?ch=sociedad' )
,(u'Salud', u'http://www.criticadigital.com/herramientas/rss.php?ch=salud' )
,(u'Tecnologia', u'http://www.criticadigital.com/herramientas/rss.php?ch=tecnologia' )
,(u'Santa Fe', u'http://www.criticadigital.com/herramientas/rss.php?ch=santa_fe' )
]
def get_cover_url(self):
cover_url = None
index = 'http://www.criticadigital.com/impresa/'
soup = self.index_to_soup(index)
link_item = soup.find('div',attrs={'class':'tapa'})
if link_item:
cover_url = index + link_item.img['src']
return cover_url

View File

@ -1,72 +1,59 @@
#!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
''' '''
cronista.com www.cronista.com
''' '''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class ElCronista(BasicNewsRecipe): class Pagina12(BasicNewsRecipe):
title = 'El Cronista' title = 'El Cronista Comercial'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'Noticias de Argentina' description = 'El Cronista Comercial es el Diario economico-politico mas valorado. Es la fuente mas confiable de informacion en temas de economia, finanzas y negocios enmarcados politicamente.'
publisher = 'Cronista.com'
category = 'news, politics, economy, finances, Argentina'
oldest_article = 2 oldest_article = 2
language = 'es_AR' max_articles_per_feed = 200
max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
encoding = 'utf8'
use_embedded_content = False use_embedded_content = False
encoding = 'cp1252' language = 'es_AR'
remove_empty_feeds = True
publication_type = 'newspaper'
masthead_url = 'http://www.cronista.com/export/sites/diarioelcronista/arte/header-logo.gif'
extra_css = """
body{font-family: Arial,Helvetica,sans-serif }
h2{font-family: Georgia,"Times New Roman",Times,serif }
img{margin-bottom: 0.4em; display:block}
.nom{font-weight: bold; vertical-align: baseline}
.autor-cfoto{border-bottom: 1px solid #D2D2D2;
border-top: 1px solid #D2D2D2;
display: inline-block;
margin: 0 10px 10px 0;
padding: 10px;
width: 210px}
.under{font-weight: bold}
.time{font-size: small}
"""
html2lrf_options = [ conversion_options = {
'--comment' , description 'comment' : description
, '--category' , 'news, Argentina' , 'tags' : category
, '--publisher' , title , 'publisher' : publisher
] , 'language' : language
}
keep_only_tags = [ remove_tags = [
dict(name='table', attrs={'width':'100%' }) dict(name=['meta','link','base','iframe','object','embed'])
,dict(name='h1' , attrs={'class':'Arialgris16normal'}) ,dict(attrs={'class':['user-tools','tabsmedia']})
] ]
remove_attributes = ['lang']
remove_tags_before = dict(attrs={'class':'top'})
remove_tags_after = dict(attrs={'class':'content-nota'})
feeds = [(u'Ultimas noticias', u'http://www.cronista.com/rss.html')]
remove_tags = [dict(name='a', attrs={'class':'Arialazul12'})]
feeds = [
(u'Economia' , u'http://www.cronista.com/adjuntos/8/rss/Economia_EI.xml' )
,(u'Negocios' , u'http://www.cronista.com/adjuntos/8/rss/negocios_EI.xml' )
,(u'Ultimo momento' , u'http://www.cronista.com/adjuntos/8/rss/ultimo_momento.xml' )
,(u'Finanzas y Mercados' , u'http://www.cronista.com/adjuntos/8/rss/Finanzas_Mercados_EI.xml' )
,(u'Financial Times' , u'http://www.cronista.com/adjuntos/8/rss/FT_EI.xml' )
,(u'Opinion edicion impresa' , u'http://www.cronista.com/adjuntos/8/rss/opinion_edicion_impresa.xml' )
,(u'Socialmente Responsables', u'http://www.cronista.com/adjuntos/8/rss/Socialmente_Responsables.xml')
,(u'Asuntos Legales' , u'http://www.cronista.com/adjuntos/8/rss/asuntoslegales.xml' )
,(u'IT Business' , u'http://www.cronista.com/adjuntos/8/rss/itbusiness.xml' )
,(u'Management y RR.HH.' , u'http://www.cronista.com/adjuntos/8/rss/management.xml' )
,(u'Inversiones Personales' , u'http://www.cronista.com/adjuntos/8/rss/inversionespersonales.xml' )
]
def print_version(self, url):
main, sep, rest = url.partition('.com/notas/')
article_id, lsep, rrest = rest.partition('-')
return 'http://www.cronista.com/interior/index.php?p=imprimir_nota&idNota=' + article_id
def preprocess_html(self, soup): def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">' for item in soup.findAll(style=True):
soup.head.insert(0,mtag) del item['style']
soup.head.base.extract()
htext = soup.find('h1',attrs={'class':'Arialgris16normal'})
htext.name = 'p'
soup.prettify()
return soup return soup
def get_cover_url(self):
cover_url = None
index = 'http://www.cronista.com/contenidos/'
soup = self.index_to_soup(index + 'ee.html')
link_item = soup.find('a',attrs={'href':"javascript:Close()"})
if link_item:
cover_url = index + link_item.img['src']
return cover_url

View File

@ -1,5 +1,5 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
''' '''
www.eluniversal.com www.eluniversal.com
''' '''
@ -15,12 +15,20 @@ class ElUniversal(BasicNewsRecipe):
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
remove_empty_feeds = True
encoding = 'cp1252' encoding = 'cp1252'
publisher = 'El Universal' publisher = 'El Universal'
category = 'news, Caracas, Venezuela, world' category = 'news, Caracas, Venezuela, world'
language = 'es_VE' language = 'es_VE'
publication_type = 'newspaper'
cover_url = strftime('http://static.eluniversal.com/%Y/%m/%d/portada.jpg') cover_url = strftime('http://static.eluniversal.com/%Y/%m/%d/portada.jpg')
extra_css = """
.txt60{font-family: Tahoma,Geneva,sans-serif; font-size: small}
.txt29{font-family: Tahoma,Geneva,sans-serif; font-size: small; color: gray}
.txt38{font-family: Georgia,"Times New Roman",Times,serif; font-size: xx-large}
.txt35{font-family: Georgia,"Times New Roman",Times,serif; font-size: large}
body{font-family: Verdana,Arial,Helvetica,sans-serif}
"""
conversion_options = { conversion_options = {
'comments' : description 'comments' : description
,'tags' : category ,'tags' : category
@ -28,10 +36,11 @@ class ElUniversal(BasicNewsRecipe):
,'publisher' : publisher ,'publisher' : publisher
} }
keep_only_tags = [dict(name='div', attrs={'class':'Nota'})] remove_tags_before=dict(attrs={'class':'header-print MB10'})
remove_tags_after= dict(attrs={'id':'SizeText'})
remove_tags = [ remove_tags = [
dict(name=['object','link','script','iframe']) dict(name=['object','link','script','iframe','meta'])
,dict(name='div',attrs={'class':'Herramientas'}) ,dict(attrs={'class':'header-print MB10'})
] ]
feeds = [ feeds = [

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008 - 2009, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = 'Copyright 2011 Starson17'
''' '''
engadget.com engadget.com
''' '''
@ -9,14 +9,29 @@ engadget.com
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Engadget(BasicNewsRecipe): class Engadget(BasicNewsRecipe):
title = u'Engadget' title = u'Engadget_Full'
__author__ = 'Darko Miletic' __author__ = 'Starson17'
__version__ = 'v1.00'
__date__ = '02, July 2011'
description = 'Tech news' description = 'Tech news'
language = 'en' language = 'en'
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = True use_embedded_content = False
remove_javascript = True
remove_empty_feeds = True
feeds = [ (u'Posts', u'http://www.engadget.com/rss.xml')] keep_only_tags = [dict(name='div', attrs={'class':['post_content permalink ','post_content permalink alt-post-full']})]
remove_tags = [dict(name='div', attrs={'class':['filed_under','post_footer']})]
remove_tags_after = [dict(name='div', attrs={'class':['post_footer']})]
feeds = [(u'Posts', u'http://www.engadget.com/rss.xml')]
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''

View File

@ -1,32 +1,41 @@
#!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
''' '''
ft.com www.ft.com
''' '''
import datetime
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class FinancialTimes(BasicNewsRecipe): class FinancialTimes_rss(BasicNewsRecipe):
title = u'Financial Times' title = 'Financial Times'
__author__ = 'Darko Miletic and Sujata Raman' __author__ = 'Darko Miletic'
description = ('Financial world news. Available after 5AM ' description = "The Financial Times (FT) is one of the world's leading business news and information organisations, recognised internationally for its authority, integrity and accuracy."
'GMT, daily.') publisher = 'The Financial Times Ltd.'
category = 'news, finances, politics, World'
oldest_article = 2 oldest_article = 2
language = 'en' language = 'en'
max_articles_per_feed = 250
max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
needs_subscription = True needs_subscription = True
simultaneous_downloads= 1 encoding = 'utf8'
delay = 1 publication_type = 'newspaper'
masthead_url = 'http://im.media.ft.com/m/img/masthead_main.jpg'
LOGIN = 'https://registration.ft.com/registration/barrier/login'
INDEX = 'http://www.ft.com'
LOGIN = 'https://registration.ft.com/registration/barrier/login' conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
, 'linearize_tables' : True
}
def get_browser(self): def get_browser(self):
br = BasicNewsRecipe.get_browser() br = BasicNewsRecipe.get_browser()
br.open(self.INDEX)
if self.username is not None and self.password is not None: if self.username is not None and self.password is not None:
br.open(self.LOGIN) br.open(self.LOGIN)
br.select_form(name='loginForm') br.select_form(name='loginForm')
@ -35,31 +44,63 @@ class FinancialTimes(BasicNewsRecipe):
br.submit() br.submit()
return br return br
keep_only_tags = [ dict(name='div', attrs={'id':'cont'}) ] keep_only_tags = [dict(name='div', attrs={'class':['fullstory fullstoryHeader','fullstory fullstoryBody','ft-story-header','ft-story-body','index-detail']})]
remove_tags_after = dict(name='p', attrs={'class':'copyright'})
remove_tags = [ remove_tags = [
dict(name='div', attrs={'id':'floating-con'}) dict(name='div', attrs={'id':'floating-con'})
,dict(name=['meta','iframe','base','object','embed','link'])
,dict(attrs={'class':['storyTools','story-package','screen-copy','story-package separator','expandable-image']})
] ]
remove_attributes = ['width','height','lang']
extra_css = ''' extra_css = """
body{font-family:Arial,Helvetica,sans-serif;} body{font-family: Georgia,Times,"Times New Roman",serif}
h2(font-size:large;} h2{font-size:large}
.ft-story-header(font-size:xx-small;} .ft-story-header{font-size: x-small}
.ft-story-body(font-size:small;}
a{color:#003399;}
.container{font-size:x-small;} .container{font-size:x-small;}
h3{font-size:x-small;color:#003399;} h3{font-size:x-small;color:#003399;}
''' .copyright{font-size: x-small}
img{margin-top: 0.8em; display: block}
.lastUpdated{font-family: Arial,Helvetica,sans-serif; font-size: x-small}
.byline,.ft-story-body,.ft-story-header{font-family: Arial,Helvetica,sans-serif}
"""
feeds = [ feeds = [
(u'UK' , u'http://www.ft.com/rss/home/uk' ) (u'UK' , u'http://www.ft.com/rss/home/uk' )
,(u'US' , u'http://www.ft.com/rss/home/us' ) ,(u'US' , u'http://www.ft.com/rss/home/us' )
,(u'Europe' , u'http://www.ft.com/rss/home/europe' )
,(u'Asia' , u'http://www.ft.com/rss/home/asia' ) ,(u'Asia' , u'http://www.ft.com/rss/home/asia' )
,(u'Middle East', u'http://www.ft.com/rss/home/middleeast') ,(u'Middle East', u'http://www.ft.com/rss/home/middleeast')
] ]
def preprocess_html(self, soup): def preprocess_html(self, soup):
content_type = soup.find('meta', {'http-equiv':'Content-Type'}) items = ['promo-box','promo-title',
if content_type: 'promo-headline','promo-image',
content_type['content'] = 'text/html; charset=utf-8' 'promo-intro','promo-link','subhead']
for item in items:
for it in soup.findAll(item):
it.name = 'div'
it.attrs = []
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('a'):
limg = item.find('img')
if item.string is not None:
str = item.string
item.replaceWith(str)
else:
if limg:
item.name = 'div'
item.attrs = []
else:
str = self.tag_to_string(item)
item.replaceWith(str)
for item in soup.findAll('img'):
if not item.has_key('alt'):
item['alt'] = 'image'
return soup return soup
def get_cover_url(self):
cdate = datetime.date.today()
if cdate.isoweekday() == 7:
cdate -= datetime.timedelta(days=1)
return cdate.strftime('http://specials.ft.com/vtf_pdf/%d%m%y_FRONT1_USA.pdf')

View File

@ -3,6 +3,8 @@ __copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
''' '''
www.ft.com/uk-edition www.ft.com/uk-edition
''' '''
import datetime
from calibre import strftime from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
@ -20,7 +22,6 @@ class FinancialTimes(BasicNewsRecipe):
needs_subscription = True needs_subscription = True
encoding = 'utf8' encoding = 'utf8'
publication_type = 'newspaper' publication_type = 'newspaper'
cover_url = strftime('http://specials.ft.com/vtf_pdf/%d%m%y_FRONT1_LON.pdf')
masthead_url = 'http://im.media.ft.com/m/img/masthead_main.jpg' masthead_url = 'http://im.media.ft.com/m/img/masthead_main.jpg'
LOGIN = 'https://registration.ft.com/registration/barrier/login' LOGIN = 'https://registration.ft.com/registration/barrier/login'
INDEX = 'http://www.ft.com/uk-edition' INDEX = 'http://www.ft.com/uk-edition'
@ -128,3 +129,10 @@ class FinancialTimes(BasicNewsRecipe):
if not item.has_key('alt'): if not item.has_key('alt'):
item['alt'] = 'image' item['alt'] = 'image'
return soup return soup
def get_cover_url(self):
cdate = datetime.date.today()
if cdate.isoweekday() == 7:
cdate -= datetime.timedelta(days=1)
return cdate.strftime('http://specials.ft.com/vtf_pdf/%d%m%y_FRONT1_LON.pdf')

View File

@ -0,0 +1,49 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.web.feeds import Feed
class GC_gl(BasicNewsRecipe):
title = u'Galicia Confidencial (RSS)'
__author__ = u'Susana Sotelo Docío'
description = u'Unha fiestra de información aberta a todos'
publisher = u'Galicia Confidencial'
category = u'news, society, politics, Galicia'
encoding = 'utf-8'
language = 'gl'
direction = 'ltr'
cover_url = 'http://galiciaconfidencial.com/imagenes/header/logo_gc.gif'
oldest_article = 5
max_articles_per_feed = 100
center_navbar = False
feeds = [(u'Novas no RSS', u'http://galiciaconfidencial.com/rss2/xeral.rss')]
extra_css = u' p{text-align:left} '
def print_version(self, url):
return url.replace('http://galiciaconfidencial.com/nova/', 'http://galiciaconfidencial.com/imprimir/')
def parse_index(self):
feeds = []
self.gc_parse_feeds(feeds)
return feeds
def gc_parse_feeds(self, feeds):
rssFeeds = Feed()
rssFeeds = BasicNewsRecipe.parse_feeds(self)
self.feed_to_index_append(rssFeeds[:], feeds)
def feed_to_index_append(self, feedObject, masterFeed):
for feed in feedObject:
newArticles = []
for article in feed.articles:
newArt = {
'title' : article.title,
'url' : article.url,
'date' : article.date
}
newArticles.append(newArt)
masterFeed.append((feed.title,newArticles))

View File

@ -1,5 +1,6 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
import re import re
from datetime import date, timedelta
class HBR(BasicNewsRecipe): class HBR(BasicNewsRecipe):
@ -12,13 +13,14 @@ class HBR(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
LOGIN_URL = 'http://hbr.org/login?request_url=/' LOGIN_URL = 'http://hbr.org/login?request_url=/'
INDEX = 'http://hbr.org/current' INDEX = 'http://hbr.org/archive-toc/BR'
keep_only_tags = [dict(name='div', id='pageContainer')] keep_only_tags = [dict(name='div', id='pageContainer')]
remove_tags = [dict(id=['mastheadContainer', 'magazineHeadline', remove_tags = [dict(id=['mastheadContainer', 'magazineHeadline',
'articleToolbarTopRD', 'pageRightSubColumn', 'pageRightColumn', 'articleToolbarTopRD', 'pageRightSubColumn', 'pageRightColumn',
'todayOnHBRListWidget', 'mostWidget', 'keepUpWithHBR', 'todayOnHBRListWidget', 'mostWidget', 'keepUpWithHBR',
'mailingListTout', 'partnerCenter', 'pageFooter', 'mailingListTout', 'partnerCenter', 'pageFooter',
'superNavHeadContainer', 'hbrDisqus',
'articleToolbarTop', 'articleToolbarBottom', 'articleToolbarRD']), 'articleToolbarTop', 'articleToolbarBottom', 'articleToolbarRD']),
dict(name='iframe')] dict(name='iframe')]
extra_css = ''' extra_css = '''
@ -55,9 +57,14 @@ class HBR(BasicNewsRecipe):
def hbr_get_toc(self): def hbr_get_toc(self):
soup = self.index_to_soup(self.INDEX) today = date.today()
url = soup.find('a', text=lambda t:'Full Table of Contents' in t).parent.get('href') future = today + timedelta(days=30)
return self.index_to_soup('http://hbr.org'+url) for x in [x.strftime('%y%m') for x in (future, today)]:
url = self.INDEX + x
soup = self.index_to_soup(url)
if not soup.find(text='Issue Not Found'):
return soup
raise Exception('Could not find current issue')
def hbr_parse_section(self, container, feeds): def hbr_parse_section(self, container, feeds):
current_section = None current_section = None

Binary file not shown.

After

Width:  |  Height:  |  Size: 514 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 400 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 770 B

After

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.4 KiB

View File

@ -6,7 +6,7 @@ class TheIndependent(BasicNewsRecipe):
language = 'en_GB' language = 'en_GB'
__author__ = 'Krittika Goyal' __author__ = 'Krittika Goyal'
oldest_article = 1 #days oldest_article = 1 #days
max_articles_per_feed = 25 max_articles_per_feed = 30
encoding = 'latin1' encoding = 'latin1'
no_stylesheets = True no_stylesheets = True
@ -25,24 +25,39 @@ class TheIndependent(BasicNewsRecipe):
'http://www.independent.co.uk/news/uk/rss'), 'http://www.independent.co.uk/news/uk/rss'),
('World', ('World',
'http://www.independent.co.uk/news/world/rss'), 'http://www.independent.co.uk/news/world/rss'),
('Sport',
'http://www.independent.co.uk/sport/rss'),
('Arts and Entertainment',
'http://www.independent.co.uk/arts-entertainment/rss'),
('Business', ('Business',
'http://www.independent.co.uk/news/business/rss'), 'http://www.independent.co.uk/news/business/rss'),
('Life and Style',
'http://www.independent.co.uk/life-style/gadgets-and-tech/news/rss'),
('Science',
'http://www.independent.co.uk/news/science/rss'),
('People', ('People',
'http://www.independent.co.uk/news/people/rss'), 'http://www.independent.co.uk/news/people/rss'),
('Science',
'http://www.independent.co.uk/news/science/rss'),
('Media', ('Media',
'http://www.independent.co.uk/news/media/rss'), 'http://www.independent.co.uk/news/media/rss'),
('Health and Families', ('Education',
'http://www.independent.co.uk/life-style/health-and-families/rss'), 'http://www.independent.co.uk/news/education/rss'),
('Obituaries', ('Obituaries',
'http://www.independent.co.uk/news/obituaries/rss'), 'http://www.independent.co.uk/news/obituaries/rss'),
('Opinion',
'http://www.independent.co.uk/opinion/rss'),
('Environment',
'http://www.independent.co.uk/environment/rss'),
('Sport',
'http://www.independent.co.uk/sport/rss'),
('Life and Style',
'http://www.independent.co.uk/life-style/rss'),
('Arts and Entertainment',
'http://www.independent.co.uk/arts-entertainment/rss'),
('Travel',
'http://www.independent.co.uk/travel/rss'),
('Money',
'http://www.independent.co.uk/money/rss'),
] ]
def preprocess_html(self, soup): def preprocess_html(self, soup):

View File

@ -1,5 +1,5 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
''' '''
infobae.com infobae.com
''' '''
@ -9,7 +9,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class Infobae(BasicNewsRecipe): class Infobae(BasicNewsRecipe):
title = 'Infobae.com' title = 'Infobae.com'
__author__ = 'Darko Miletic and Sujata Raman' __author__ = 'Darko Miletic and Sujata Raman'
description = 'Informacion Libre las 24 horas' description = 'Infobae.com es el sitio de noticias con mayor actualizacion de Latinoamérica. Noticias actualizadas las 24 horas, los 365 días del año.'
publisher = 'Infobae.com' publisher = 'Infobae.com'
category = 'news, politics, Argentina' category = 'news, politics, Argentina'
oldest_article = 1 oldest_article = 1
@ -17,13 +17,13 @@ class Infobae(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
language = 'es_AR' language = 'es_AR'
encoding = 'cp1252' encoding = 'utf8'
masthead_url = 'http://www.infobae.com/imgs/header/header.gif' masthead_url = 'http://www.infobae.com/media/img/static/logo-infobae.gif'
remove_javascript = True
remove_empty_feeds = True remove_empty_feeds = True
extra_css = ''' extra_css = '''
body{font-family:Arial,Helvetica,sans-serif;} body{font-family: Arial,Helvetica,sans-serif}
.popUpTitulo{color:#0D4261; font-size: xx-large} img{display: block}
.categoria{font-size: small; text-transform: uppercase}
''' '''
conversion_options = { conversion_options = {
@ -31,26 +31,44 @@ class Infobae(BasicNewsRecipe):
, 'tags' : category , 'tags' : category
, 'publisher' : publisher , 'publisher' : publisher
, 'language' : language , 'language' : language
, 'linearize_tables' : True
} }
keep_only_tags = [dict(attrs={'class':['titularnota','nota','post-title','post-entry','entry-title','entry-info','entry-content']})]
remove_tags_after = dict(attrs={'class':['interior-noticia','nota-desc','tags']})
remove_tags = [
dict(name=['base','meta','link','iframe','object','embed','ins'])
,dict(attrs={'class':['barranota','tags']})
]
feeds = [ feeds = [
(u'Noticias' , u'http://www.infobae.com/adjuntos/html/RSS/hoy.xml' ) (u'Saludable' , u'http://www.infobae.com/rss/saludable.xml')
,(u'Salud' , u'http://www.infobae.com/adjuntos/html/RSS/salud.xml' ) ,(u'Economia' , u'http://www.infobae.com/rss/economia.xml' )
,(u'Tecnologia', u'http://www.infobae.com/adjuntos/html/RSS/tecnologia.xml') ,(u'En Numeros', u'http://www.infobae.com/rss/rating.xml' )
,(u'Deportes' , u'http://www.infobae.com/adjuntos/html/RSS/deportes.xml' ) ,(u'Finanzas' , u'http://www.infobae.com/rss/finanzas.xml' )
,(u'Mundo' , u'http://www.infobae.com/rss/mundo.xml' )
,(u'Sociedad' , u'http://www.infobae.com/rss/sociedad.xml' )
,(u'Politica' , u'http://www.infobae.com/rss/politica.xml' )
,(u'Deportes' , u'http://www.infobae.com/rss/deportes.xml' )
] ]
def print_version(self, url): def preprocess_html(self, soup):
article_part = url.rpartition('/')[2] for item in soup.findAll(style=True):
article_id= article_part.partition('-')[0] del item['style']
return 'http://www.infobae.com/notas/nota_imprimir.php?Idx=' + article_id for item in soup.findAll('a'):
limg = item.find('img')
def postprocess_html(self, soup, first): if item.string is not None:
for tag in soup.findAll(name='strong'): str = item.string
tag.name = 'b' item.replaceWith(str)
else:
if limg:
item.name = 'div'
item.attrs = []
else:
str = self.tag_to_string(item)
item.replaceWith(str)
for item in soup.findAll('img'):
if not item.has_key('alt'):
item['alt'] = 'image'
return soup return soup

View File

@ -99,7 +99,7 @@ class LeMonde(BasicNewsRecipe):
keep_only_tags = [ keep_only_tags = [
dict(name='div', attrs={'class':['contenu']}) dict(name='div', attrs={'class':['contenu']})
] ]
remove_tags = [dict(name='div', attrs={'class':['LM_atome']})]
remove_tags_after = [dict(id='appel_temoignage')] remove_tags_after = [dict(id='appel_temoignage')]
def get_article_url(self, article): def get_article_url(self, article):

View File

@ -1,17 +1,23 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2010-2011, Eddie Lau' __copyright__ = '2010-2011, Eddie Lau'
# Region - Hong Kong, Vancouver, Toronto
__Region__ = 'Hong Kong'
# Users of Kindle 3 with limited system-level CJK support # Users of Kindle 3 with limited system-level CJK support
# please replace the following "True" with "False". # please replace the following "True" with "False".
__MakePeriodical__ = True __MakePeriodical__ = True
# Turn below to true if your device supports display of CJK titles # Turn below to true if your device supports display of CJK titles
__UseChineseTitle__ = False __UseChineseTitle__ = False
# Trun below to true if you wish to use life.mingpao.com as the main article source # Set it to False if you want to skip images
__KeepImages__ = True
# (HK only) Turn below to true if you wish to use life.mingpao.com as the main article source
__UseLife__ = True __UseLife__ = True
''' '''
Change Log: Change Log:
2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
provide options to remove all images in the file
2011/05/12: switch the main parse source to life.mingpao.com, which has more photos on the article pages 2011/05/12: switch the main parse source to life.mingpao.com, which has more photos on the article pages
2011/03/06: add new articles for finance section, also a new section "Columns" 2011/03/06: add new articles for finance section, also a new section "Columns"
2011/02/28: rearrange the sections 2011/02/28: rearrange the sections
@ -34,21 +40,96 @@ Change Log:
import os, datetime, re import os, datetime, re
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
from contextlib import nested from contextlib import nested
from calibre.ebooks.BeautifulSoup import BeautifulSoup from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.metadata.toc import TOC from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata import MetaInformation
class MPHKRecipe(BasicNewsRecipe): # MAIN CLASS
title = 'Ming Pao - Hong Kong' class MPRecipe(BasicNewsRecipe):
if __Region__ == 'Hong Kong':
title = 'Ming Pao - Hong Kong'
description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
category = 'Chinese, News, Hong Kong'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}'
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
keep_only_tags = [dict(name='h1'),
dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
dict(name='font', attrs={'color':['AA0000']}), # for column articles title
dict(attrs={'id':['newscontent']}), # entertainment and column page content
dict(attrs={'id':['newscontent01','newscontent02']}),
dict(attrs={'class':['photo']}),
dict(name='table', attrs={'width':['100%'], 'border':['0'], 'cellspacing':['5'], 'cellpadding':['0']}), # content in printed version of life.mingpao.com
dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}) # images for source from life.mingpao.com
]
if __KeepImages__:
remove_tags = [dict(name='style'),
dict(attrs={'id':['newscontent135']}), # for the finance page from mpfinance.com
dict(name='font', attrs={'size':['2'], 'color':['666666']}), # article date in life.mingpao.com article
#dict(name='table') # for content fetched from life.mingpao.com
]
else:
remove_tags = [dict(name='style'),
dict(attrs={'id':['newscontent135']}), # for the finance page from mpfinance.com
dict(name='font', attrs={'size':['2'], 'color':['666666']}), # article date in life.mingpao.com article
dict(name='img'),
#dict(name='table') # for content fetched from life.mingpao.com
]
remove_attributes = ['width']
preprocess_regexps = [
(re.compile(r'<h5>', re.DOTALL|re.IGNORECASE),
lambda match: '<h1>'),
(re.compile(r'</h5>', re.DOTALL|re.IGNORECASE),
lambda match: '</h1>'),
(re.compile(r'<p><a href=.+?</a></p>', re.DOTALL|re.IGNORECASE), # for entertainment page
lambda match: ''),
# skip <br> after title in life.mingpao.com fetched article
(re.compile(r"<div id='newscontent'><br>", re.DOTALL|re.IGNORECASE),
lambda match: "<div id='newscontent'>"),
(re.compile(r"<br><br></b>", re.DOTALL|re.IGNORECASE),
lambda match: "</b>")
]
elif __Region__ == 'Vancouver':
title = 'Ming Pao - Vancouver'
description = 'Vancouver Chinese Newspaper (http://www.mingpaovan.com)'
category = 'Chinese, News, Vancouver'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
masthead_url = 'http://www.mingpaovan.com/image/mainlogo2_VAN2.gif'
keep_only_tags = [dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['1']}),
dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['3'], 'cellpadding':['3'], 'id':['tblContent3']}),
dict(name='table', attrs={'width':['180'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['0'], 'bgcolor':['F0F0F0']}),
]
if __KeepImages__:
remove_tags = [dict(name='img', attrs={'src':['../../../image/magnifier.gif']})] # the magnifier icon
else:
remove_tags = [dict(name='img')]
remove_attributes = ['width']
preprocess_regexps = [(re.compile(r'&nbsp;', re.DOTALL|re.IGNORECASE),
lambda match: ''),
]
elif __Region__ == 'Toronto':
title = 'Ming Pao - Toronto'
description = 'Toronto Chinese Newspaper (http://www.mingpaotor.com)'
category = 'Chinese, News, Toronto'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
masthead_url = 'http://www.mingpaotor.com/image/mainlogo2_TOR2.gif'
keep_only_tags = [dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['1']}),
dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['3'], 'cellpadding':['3'], 'id':['tblContent3']}),
dict(name='table', attrs={'width':['180'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['0'], 'bgcolor':['F0F0F0']}),
]
if __KeepImages__:
remove_tags = [dict(name='img', attrs={'src':['../../../image/magnifier.gif']})] # the magnifier icon
else:
remove_tags = [dict(name='img')]
remove_attributes = ['width']
preprocess_regexps = [(re.compile(r'&nbsp;', re.DOTALL|re.IGNORECASE),
lambda match: ''),
]
oldest_article = 1 oldest_article = 1
max_articles_per_feed = 100 max_articles_per_feed = 100
__author__ = 'Eddie Lau' __author__ = 'Eddie Lau'
description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
publisher = 'MingPao' publisher = 'MingPao'
category = 'Chinese, News, Hong Kong'
remove_javascript = True remove_javascript = True
use_embedded_content = False use_embedded_content = False
no_stylesheets = True no_stylesheets = True
@ -57,33 +138,6 @@ class MPHKRecipe(BasicNewsRecipe):
recursions = 0 recursions = 0
conversion_options = {'linearize_tables':True} conversion_options = {'linearize_tables':True}
timefmt = '' timefmt = ''
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}'
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
keep_only_tags = [dict(name='h1'),
dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
dict(name='font', attrs={'color':['AA0000']}), # for column articles title
dict(attrs={'id':['newscontent']}), # entertainment and column page content
dict(attrs={'id':['newscontent01','newscontent02']}),
dict(attrs={'class':['photo']}),
dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}) # images for source from life.mingpao.com
]
remove_tags = [dict(name='style'),
dict(attrs={'id':['newscontent135']}), # for the finance page from mpfinance.com
dict(name='table')] # for content fetched from life.mingpao.com
remove_attributes = ['width']
preprocess_regexps = [
(re.compile(r'<h5>', re.DOTALL|re.IGNORECASE),
lambda match: '<h1>'),
(re.compile(r'</h5>', re.DOTALL|re.IGNORECASE),
lambda match: '</h1>'),
(re.compile(r'<p><a href=.+?</a></p>', re.DOTALL|re.IGNORECASE), # for entertainment page
lambda match: ''),
# skip <br> after title in life.mingpao.com fetched article
(re.compile(r"<div id='newscontent'><br>", re.DOTALL|re.IGNORECASE),
lambda match: "<div id='newscontent'>"),
(re.compile(r"<br><br></b>", re.DOTALL|re.IGNORECASE),
lambda match: "</b>")
]
def image_url_processor(cls, baseurl, url): def image_url_processor(cls, baseurl, url):
# trick: break the url at the first occurance of digit, add an additional # trick: break the url at the first occurance of digit, add an additional
@ -124,8 +178,18 @@ class MPHKRecipe(BasicNewsRecipe):
def get_dtlocal(self): def get_dtlocal(self):
dt_utc = datetime.datetime.utcnow() dt_utc = datetime.datetime.utcnow()
# convert UTC to local hk time - at around HKT 6.00am, all news are available if __Region__ == 'Hong Kong':
dt_local = dt_utc - datetime.timedelta(-2.0/24) # convert UTC to local hk time - at HKT 5.30am, all news are available
dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(5.5/24)
# dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(5.5/24)
elif __Region__ == 'Vancouver':
# convert UTC to local Vancouver time - at PST time 5.30am, all news are available
dt_local = dt_utc + datetime.timedelta(-8.0/24) - datetime.timedelta(5.5/24)
#dt_local = dt_utc.astimezone(pytz.timezone('America/Vancouver')) - datetime.timedelta(5.5/24)
elif __Region__ == 'Toronto':
# convert UTC to local Toronto time - at EST time 8.30am, all news are available
dt_local = dt_utc + datetime.timedelta(-5.0/24) - datetime.timedelta(8.5/24)
#dt_local = dt_utc.astimezone(pytz.timezone('America/Toronto')) - datetime.timedelta(8.5/24)
return dt_local return dt_local
def get_fetchdate(self): def get_fetchdate(self):
@ -135,13 +199,15 @@ class MPHKRecipe(BasicNewsRecipe):
return self.get_dtlocal().strftime("%Y-%m-%d") return self.get_dtlocal().strftime("%Y-%m-%d")
def get_fetchday(self): def get_fetchday(self):
# dt_utc = datetime.datetime.utcnow()
# convert UTC to local hk time - at around HKT 6.00am, all news are available
# dt_local = dt_utc - datetime.timedelta(-2.0/24)
return self.get_dtlocal().strftime("%d") return self.get_dtlocal().strftime("%d")
def get_cover_url(self): def get_cover_url(self):
cover = 'http://news.mingpao.com/' + self.get_fetchdate() + '/' + self.get_fetchdate() + '_' + self.get_fetchday() + 'gacov.jpg' if __Region__ == 'Hong Kong':
cover = 'http://news.mingpao.com/' + self.get_fetchdate() + '/' + self.get_fetchdate() + '_' + self.get_fetchday() + 'gacov.jpg'
elif __Region__ == 'Vancouver':
cover = 'http://www.mingpaovan.com/ftp/News/' + self.get_fetchdate() + '/' + self.get_fetchday() + 'pgva1s.jpg'
elif __Region__ == 'Toronto':
cover = 'http://www.mingpaotor.com/ftp/News/' + self.get_fetchdate() + '/' + self.get_fetchday() + 'pgtas.jpg'
br = BasicNewsRecipe.get_browser() br = BasicNewsRecipe.get_browser()
try: try:
br.open(cover) br.open(cover)
@ -153,76 +219,104 @@ class MPHKRecipe(BasicNewsRecipe):
feeds = [] feeds = []
dateStr = self.get_fetchdate() dateStr = self.get_fetchdate()
if __UseLife__: if __Region__ == 'Hong Kong':
for title, url, keystr in [(u'\u8981\u805e Headline', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalga', 'nal'), if __UseLife__:
(u'\u6e2f\u805e Local', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgb', 'nal'), for title, url, keystr in [(u'\u8981\u805e Headline', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalga', 'nal'),
(u'\u6559\u80b2 Education', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgf', 'nal'), (u'\u6e2f\u805e Local', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgb', 'nal'),
(u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr', 'nal'), (u'\u6559\u80b2 Education', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgf', 'nal'),
(u'\u8ad6\u58c7 Forum', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalfa', 'nal'), (u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr', 'nal'),
(u'\u4e2d\u570b China', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalca', 'nal'), (u'\u8ad6\u58c7 Forum', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalfa', 'nal'),
(u'\u570b\u969b World', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalta', 'nal'), (u'\u4e2d\u570b China', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalca', 'nal'),
(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal'), (u'\u570b\u969b World', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalta', 'nal'),
(u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'), (u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal'),
(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal'), (u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'),
(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')]: (u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal'),
articles = self.parse_section2(url, keystr) (u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')]:
articles = self.parse_section2(url, keystr)
if articles:
feeds.append((title, articles))
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
else:
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
(u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
# special- editorial
ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
if ed_articles:
feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
(u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
(u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
# special - finance
#fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
if fin_articles:
feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
(u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
# special - entertainment
ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
if ent_articles:
feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
# special- columns
col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
if col_articles:
feeds.append((u'\u5c08\u6b04 Columns', col_articles))
elif __Region__ == 'Vancouver':
for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VAindex.htm'),
(u'\u52a0\u570b Canada', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VBindex.htm'),
(u'\u793e\u5340 Local', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VDindex.htm'),
(u'\u6e2f\u805e Hong Kong', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/HK-VGindex.htm'),
(u'\u570b\u969b World', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VTindex.htm'),
(u'\u4e2d\u570b China', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VCindex.htm'),
(u'\u7d93\u6fdf Economics', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VEindex.htm'),
(u'\u9ad4\u80b2 Sports', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VSindex.htm'),
(u'\u5f71\u8996 Film/TV', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/HK-MAindex.htm'),
(u'\u526f\u520a Supplements', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/WWindex.htm'),]:
articles = self.parse_section3(url, 'http://www.mingpaovan.com/')
if articles: if articles:
feeds.append((title, articles)) feeds.append((title, articles))
elif __Region__ == 'Toronto':
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'), for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TAindex.htm'),
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]: (u'\u52a0\u570b Canada', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TDindex.htm'),
articles = self.parse_section(url) (u'\u793e\u5340 Local', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TFindex.htm'),
(u'\u4e2d\u570b China', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TCAindex.htm'),
(u'\u570b\u969b World', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TTAindex.htm'),
(u'\u6e2f\u805e Hong Kong', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/HK-GAindex.htm'),
(u'\u7d93\u6fdf Economics', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/THindex.htm'),
(u'\u9ad4\u80b2 Sports', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TSindex.htm'),
(u'\u5f71\u8996 Film/TV', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/HK-MAindex.htm'),
(u'\u526f\u520a Supplements', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/WWindex.htm'),]:
articles = self.parse_section3(url, 'http://www.mingpaotor.com/')
if articles: if articles:
feeds.append((title, articles)) feeds.append((title, articles))
else:
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
(u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
# special- editorial
ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
if ed_articles:
feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
(u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
(u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
# special - finance
#fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
if fin_articles:
feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
(u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
# special - entertainment
ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
if ent_articles:
feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
# special- columns
col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
if col_articles:
feeds.append((u'\u5c08\u6b04 Columns', col_articles))
return feeds return feeds
# parse from news.mingpao.com # parse from news.mingpao.com
@ -256,11 +350,30 @@ class MPHKRecipe(BasicNewsRecipe):
title = self.tag_to_string(i) title = self.tag_to_string(i)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False) url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1): if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article
current_articles.append({'title': title, 'url': url, 'description': ''}) current_articles.append({'title': title, 'url': url, 'description': ''})
included_urls.append(url) included_urls.append(url)
current_articles.reverse() current_articles.reverse()
return current_articles return current_articles
# parse from www.mingpaovan.com
def parse_section3(self, url, baseUrl):
self.get_fetchdate()
soup = self.index_to_soup(url)
divs = soup.findAll(attrs={'class': ['ListContentLargeLink']})
current_articles = []
included_urls = []
divs.reverse()
for i in divs:
title = self.tag_to_string(i)
urlstr = i.get('href', False)
urlstr = baseUrl + '/' + urlstr.replace('../../../', '')
if urlstr not in included_urls:
current_articles.append({'title': title, 'url': urlstr, 'description': '', 'date': ''})
included_urls.append(urlstr)
current_articles.reverse()
return current_articles
def parse_ed_section(self, url): def parse_ed_section(self, url):
self.get_fetchdate() self.get_fetchdate()
soup = self.index_to_soup(url) soup = self.index_to_soup(url)
@ -338,7 +451,12 @@ class MPHKRecipe(BasicNewsRecipe):
if dir is None: if dir is None:
dir = self.output_dir dir = self.output_dir
if __UseChineseTitle__ == True: if __UseChineseTitle__ == True:
title = u'\u660e\u5831 (\u9999\u6e2f)' if __Region__ == 'Hong Kong':
title = u'\u660e\u5831 (\u9999\u6e2f)'
elif __Region__ == 'Vancouver':
title = u'\u660e\u5831 (\u6eab\u54e5\u83ef)'
elif __Region__ == 'Toronto':
title = u'\u660e\u5831 (\u591a\u502b\u591a)'
else: else:
title = self.short_title() title = self.short_title()
# if not generating a periodical, force date to apply in title # if not generating a periodical, force date to apply in title

View File

@ -0,0 +1,594 @@
__license__ = 'GPL v3'
__copyright__ = '2010-2011, Eddie Lau'
# Region - Hong Kong, Vancouver, Toronto
__Region__ = 'Toronto'
# Users of Kindle 3 with limited system-level CJK support
# please replace the following "True" with "False".
__MakePeriodical__ = True
# Turn below to true if your device supports display of CJK titles
__UseChineseTitle__ = False
# Set it to False if you want to skip images
__KeepImages__ = True
# (HK only) Turn below to true if you wish to use life.mingpao.com as the main article source
__UseLife__ = True
'''
Change Log:
2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
provide options to remove all images in the file
2011/05/12: switch the main parse source to life.mingpao.com, which has more photos on the article pages
2011/03/06: add new articles for finance section, also a new section "Columns"
2011/02/28: rearrange the sections
[Disabled until Kindle has better CJK support and can remember last (section,article) read in Sections & Articles
View] make it the same title if generating a periodical, so past issue will be automatically put into "Past Issues"
folder in Kindle 3
2011/02/20: skip duplicated links in finance section, put photos which may extend a whole page to the back of the articles
clean up the indentation
2010/12/07: add entertainment section, use newspaper front page as ebook cover, suppress date display in section list
(to avoid wrong date display in case the user generates the ebook in a time zone different from HKT)
2010/11/22: add English section, remove eco-news section which is not updated daily, correct
ordering of articles
2010/11/12: add news image and eco-news section
2010/11/08: add parsing of finance section
2010/11/06: temporary work-around for Kindle device having no capability to display unicode
in section/article list.
2010/10/31: skip repeated articles in section pages
'''
import os, datetime, re
from calibre.web.feeds.recipes import BasicNewsRecipe
from contextlib import nested
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.metadata import MetaInformation
# MAIN CLASS
class MPRecipe(BasicNewsRecipe):
if __Region__ == 'Hong Kong':
title = 'Ming Pao - Hong Kong'
description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
category = 'Chinese, News, Hong Kong'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}'
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
keep_only_tags = [dict(name='h1'),
dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
dict(name='font', attrs={'color':['AA0000']}), # for column articles title
dict(attrs={'id':['newscontent']}), # entertainment and column page content
dict(attrs={'id':['newscontent01','newscontent02']}),
dict(attrs={'class':['photo']}),
dict(name='table', attrs={'width':['100%'], 'border':['0'], 'cellspacing':['5'], 'cellpadding':['0']}), # content in printed version of life.mingpao.com
dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}) # images for source from life.mingpao.com
]
if __KeepImages__:
remove_tags = [dict(name='style'),
dict(attrs={'id':['newscontent135']}), # for the finance page from mpfinance.com
dict(name='font', attrs={'size':['2'], 'color':['666666']}), # article date in life.mingpao.com article
#dict(name='table') # for content fetched from life.mingpao.com
]
else:
remove_tags = [dict(name='style'),
dict(attrs={'id':['newscontent135']}), # for the finance page from mpfinance.com
dict(name='font', attrs={'size':['2'], 'color':['666666']}), # article date in life.mingpao.com article
dict(name='img'),
#dict(name='table') # for content fetched from life.mingpao.com
]
remove_attributes = ['width']
preprocess_regexps = [
(re.compile(r'<h5>', re.DOTALL|re.IGNORECASE),
lambda match: '<h1>'),
(re.compile(r'</h5>', re.DOTALL|re.IGNORECASE),
lambda match: '</h1>'),
(re.compile(r'<p><a href=.+?</a></p>', re.DOTALL|re.IGNORECASE), # for entertainment page
lambda match: ''),
# skip <br> after title in life.mingpao.com fetched article
(re.compile(r"<div id='newscontent'><br>", re.DOTALL|re.IGNORECASE),
lambda match: "<div id='newscontent'>"),
(re.compile(r"<br><br></b>", re.DOTALL|re.IGNORECASE),
lambda match: "</b>")
]
elif __Region__ == 'Vancouver':
title = 'Ming Pao - Vancouver'
description = 'Vancouver Chinese Newspaper (http://www.mingpaovan.com)'
category = 'Chinese, News, Vancouver'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
masthead_url = 'http://www.mingpaovan.com/image/mainlogo2_VAN2.gif'
keep_only_tags = [dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['1']}),
dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['3'], 'cellpadding':['3'], 'id':['tblContent3']}),
dict(name='table', attrs={'width':['180'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['0'], 'bgcolor':['F0F0F0']}),
]
if __KeepImages__:
remove_tags = [dict(name='img', attrs={'src':['../../../image/magnifier.gif']})] # the magnifier icon
else:
remove_tags = [dict(name='img')]
remove_attributes = ['width']
preprocess_regexps = [(re.compile(r'&nbsp;', re.DOTALL|re.IGNORECASE),
lambda match: ''),
]
elif __Region__ == 'Toronto':
title = 'Ming Pao - Toronto'
description = 'Toronto Chinese Newspaper (http://www.mingpaotor.com)'
category = 'Chinese, News, Toronto'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
masthead_url = 'http://www.mingpaotor.com/image/mainlogo2_TOR2.gif'
keep_only_tags = [dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['1']}),
dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['3'], 'cellpadding':['3'], 'id':['tblContent3']}),
dict(name='table', attrs={'width':['180'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['0'], 'bgcolor':['F0F0F0']}),
]
if __KeepImages__:
remove_tags = [dict(name='img', attrs={'src':['../../../image/magnifier.gif']})] # the magnifier icon
else:
remove_tags = [dict(name='img')]
remove_attributes = ['width']
preprocess_regexps = [(re.compile(r'&nbsp;', re.DOTALL|re.IGNORECASE),
lambda match: ''),
]
oldest_article = 1
max_articles_per_feed = 100
__author__ = 'Eddie Lau'
publisher = 'MingPao'
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
language = 'zh'
encoding = 'Big5-HKSCS'
recursions = 0
conversion_options = {'linearize_tables':True}
timefmt = ''
def image_url_processor(cls, baseurl, url):
# trick: break the url at the first occurance of digit, add an additional
# '_' at the front
# not working, may need to move this to preprocess_html() method
# minIdx = 10000
# i0 = url.find('0')
# if i0 >= 0 and i0 < minIdx:
# minIdx = i0
# i1 = url.find('1')
# if i1 >= 0 and i1 < minIdx:
# minIdx = i1
# i2 = url.find('2')
# if i2 >= 0 and i2 < minIdx:
# minIdx = i2
# i3 = url.find('3')
# if i3 >= 0 and i0 < minIdx:
# minIdx = i3
# i4 = url.find('4')
# if i4 >= 0 and i4 < minIdx:
# minIdx = i4
# i5 = url.find('5')
# if i5 >= 0 and i5 < minIdx:
# minIdx = i5
# i6 = url.find('6')
# if i6 >= 0 and i6 < minIdx:
# minIdx = i6
# i7 = url.find('7')
# if i7 >= 0 and i7 < minIdx:
# minIdx = i7
# i8 = url.find('8')
# if i8 >= 0 and i8 < minIdx:
# minIdx = i8
# i9 = url.find('9')
# if i9 >= 0 and i9 < minIdx:
# minIdx = i9
return url
def get_dtlocal(self):
dt_utc = datetime.datetime.utcnow()
if __Region__ == 'Hong Kong':
# convert UTC to local hk time - at HKT 5.30am, all news are available
dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(5.5/24)
# dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(5.5/24)
elif __Region__ == 'Vancouver':
# convert UTC to local Vancouver time - at PST time 5.30am, all news are available
dt_local = dt_utc + datetime.timedelta(-8.0/24) - datetime.timedelta(5.5/24)
#dt_local = dt_utc.astimezone(pytz.timezone('America/Vancouver')) - datetime.timedelta(5.5/24)
elif __Region__ == 'Toronto':
# convert UTC to local Toronto time - at EST time 8.30am, all news are available
dt_local = dt_utc + datetime.timedelta(-5.0/24) - datetime.timedelta(8.5/24)
#dt_local = dt_utc.astimezone(pytz.timezone('America/Toronto')) - datetime.timedelta(8.5/24)
return dt_local
def get_fetchdate(self):
return self.get_dtlocal().strftime("%Y%m%d")
def get_fetchformatteddate(self):
return self.get_dtlocal().strftime("%Y-%m-%d")
def get_fetchday(self):
return self.get_dtlocal().strftime("%d")
def get_cover_url(self):
if __Region__ == 'Hong Kong':
cover = 'http://news.mingpao.com/' + self.get_fetchdate() + '/' + self.get_fetchdate() + '_' + self.get_fetchday() + 'gacov.jpg'
elif __Region__ == 'Vancouver':
cover = 'http://www.mingpaovan.com/ftp/News/' + self.get_fetchdate() + '/' + self.get_fetchday() + 'pgva1s.jpg'
elif __Region__ == 'Toronto':
cover = 'http://www.mingpaotor.com/ftp/News/' + self.get_fetchdate() + '/' + self.get_fetchday() + 'pgtas.jpg'
br = BasicNewsRecipe.get_browser()
try:
br.open(cover)
except:
cover = None
return cover
def parse_index(self):
feeds = []
dateStr = self.get_fetchdate()
if __Region__ == 'Hong Kong':
if __UseLife__:
for title, url, keystr in [(u'\u8981\u805e Headline', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalga', 'nal'),
(u'\u6e2f\u805e Local', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgb', 'nal'),
(u'\u6559\u80b2 Education', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgf', 'nal'),
(u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr', 'nal'),
(u'\u8ad6\u58c7 Forum', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalfa', 'nal'),
(u'\u4e2d\u570b China', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalca', 'nal'),
(u'\u570b\u969b World', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalta', 'nal'),
(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal'),
(u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'),
(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal'),
(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')]:
articles = self.parse_section2(url, keystr)
if articles:
feeds.append((title, articles))
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
else:
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
(u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
# special- editorial
ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
if ed_articles:
feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
(u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
(u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
# special - finance
#fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
if fin_articles:
feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
(u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
# special - entertainment
ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
if ent_articles:
feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
# special- columns
col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
if col_articles:
feeds.append((u'\u5c08\u6b04 Columns', col_articles))
elif __Region__ == 'Vancouver':
for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VAindex.htm'),
(u'\u52a0\u570b Canada', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VBindex.htm'),
(u'\u793e\u5340 Local', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VDindex.htm'),
(u'\u6e2f\u805e Hong Kong', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/HK-VGindex.htm'),
(u'\u570b\u969b World', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VTindex.htm'),
(u'\u4e2d\u570b China', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VCindex.htm'),
(u'\u7d93\u6fdf Economics', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VEindex.htm'),
(u'\u9ad4\u80b2 Sports', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VSindex.htm'),
(u'\u5f71\u8996 Film/TV', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/HK-MAindex.htm'),
(u'\u526f\u520a Supplements', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/WWindex.htm'),]:
articles = self.parse_section3(url, 'http://www.mingpaovan.com/')
if articles:
feeds.append((title, articles))
elif __Region__ == 'Toronto':
for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TAindex.htm'),
(u'\u52a0\u570b Canada', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TDindex.htm'),
(u'\u793e\u5340 Local', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TFindex.htm'),
(u'\u4e2d\u570b China', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TCAindex.htm'),
(u'\u570b\u969b World', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TTAindex.htm'),
(u'\u6e2f\u805e Hong Kong', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/HK-GAindex.htm'),
(u'\u7d93\u6fdf Economics', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/THindex.htm'),
(u'\u9ad4\u80b2 Sports', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TSindex.htm'),
(u'\u5f71\u8996 Film/TV', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/HK-MAindex.htm'),
(u'\u526f\u520a Supplements', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/WWindex.htm'),]:
articles = self.parse_section3(url, 'http://www.mingpaotor.com/')
if articles:
feeds.append((title, articles))
return feeds
# parse from news.mingpao.com
def parse_section(self, url):
dateStr = self.get_fetchdate()
soup = self.index_to_soup(url)
divs = soup.findAll(attrs={'class': ['bullet','bullet_grey']})
current_articles = []
included_urls = []
divs.reverse()
for i in divs:
a = i.find('a', href = True)
title = self.tag_to_string(a)
url = a.get('href', False)
url = 'http://news.mingpao.com/' + dateStr + '/' +url
if url not in included_urls and url.rfind('Redirect') == -1:
current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
included_urls.append(url)
current_articles.reverse()
return current_articles
# parse from life.mingpao.com
def parse_section2(self, url, keystr):
self.get_fetchdate()
soup = self.index_to_soup(url)
a = soup.findAll('a', href=True)
a.reverse()
current_articles = []
included_urls = []
for i in a:
title = self.tag_to_string(i)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article
current_articles.append({'title': title, 'url': url, 'description': ''})
included_urls.append(url)
current_articles.reverse()
return current_articles
# parse from www.mingpaovan.com
def parse_section3(self, url, baseUrl):
self.get_fetchdate()
soup = self.index_to_soup(url)
divs = soup.findAll(attrs={'class': ['ListContentLargeLink']})
current_articles = []
included_urls = []
divs.reverse()
for i in divs:
title = self.tag_to_string(i)
urlstr = i.get('href', False)
urlstr = baseUrl + '/' + urlstr.replace('../../../', '')
if urlstr not in included_urls:
current_articles.append({'title': title, 'url': urlstr, 'description': '', 'date': ''})
included_urls.append(urlstr)
current_articles.reverse()
return current_articles
def parse_ed_section(self, url):
self.get_fetchdate()
soup = self.index_to_soup(url)
a = soup.findAll('a', href=True)
a.reverse()
current_articles = []
included_urls = []
for i in a:
title = self.tag_to_string(i)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('nal') == -1):
current_articles.append({'title': title, 'url': url, 'description': ''})
included_urls.append(url)
current_articles.reverse()
return current_articles
def parse_fin_section(self, url):
self.get_fetchdate()
soup = self.index_to_soup(url)
a = soup.findAll('a', href= True)
current_articles = []
included_urls = []
for i in a:
#url = 'http://www.mpfinance.com/cfm/' + i.get('href', False)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
#if url not in included_urls and not url.rfind(dateStr) == -1 and url.rfind('index') == -1:
if url not in included_urls and (not url.rfind('txt') == -1) and (not url.rfind('nal') == -1):
title = self.tag_to_string(i)
current_articles.append({'title': title, 'url': url, 'description':''})
included_urls.append(url)
return current_articles
def parse_ent_section(self, url):
self.get_fetchdate()
soup = self.index_to_soup(url)
a = soup.findAll('a', href=True)
a.reverse()
current_articles = []
included_urls = []
for i in a:
title = self.tag_to_string(i)
url = 'http://ol.mingpao.com/cfm/' + i.get('href', False)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('star') == -1):
current_articles.append({'title': title, 'url': url, 'description': ''})
included_urls.append(url)
current_articles.reverse()
return current_articles
def parse_col_section(self, url):
self.get_fetchdate()
soup = self.index_to_soup(url)
a = soup.findAll('a', href=True)
a.reverse()
current_articles = []
included_urls = []
for i in a:
title = self.tag_to_string(i)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('ncl') == -1):
current_articles.append({'title': title, 'url': url, 'description': ''})
included_urls.append(url)
current_articles.reverse()
return current_articles
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll(style=True):
del item['width']
for item in soup.findAll(stype=True):
del item['absmiddle']
return soup
def create_opf(self, feeds, dir=None):
if dir is None:
dir = self.output_dir
if __UseChineseTitle__ == True:
if __Region__ == 'Hong Kong':
title = u'\u660e\u5831 (\u9999\u6e2f)'
elif __Region__ == 'Vancouver':
title = u'\u660e\u5831 (\u6eab\u54e5\u83ef)'
elif __Region__ == 'Toronto':
title = u'\u660e\u5831 (\u591a\u502b\u591a)'
else:
title = self.short_title()
# if not generating a periodical, force date to apply in title
if __MakePeriodical__ == False:
title = title + ' ' + self.get_fetchformatteddate()
if True:
mi = MetaInformation(title, [self.publisher])
mi.publisher = self.publisher
mi.author_sort = self.publisher
if __MakePeriodical__ == True:
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
else:
mi.publication_type = self.publication_type+':'+self.short_title()
#mi.timestamp = nowf()
mi.timestamp = self.get_dtlocal()
mi.comments = self.description
if not isinstance(mi.comments, unicode):
mi.comments = mi.comments.decode('utf-8', 'replace')
#mi.pubdate = nowf()
mi.pubdate = self.get_dtlocal()
opf_path = os.path.join(dir, 'index.opf')
ncx_path = os.path.join(dir, 'index.ncx')
opf = OPFCreator(dir, mi)
# Add mastheadImage entry to <guide> section
mp = getattr(self, 'masthead_path', None)
if mp is not None and os.access(mp, os.R_OK):
from calibre.ebooks.metadata.opf2 import Guide
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
ref.type = 'masthead'
ref.title = 'Masthead Image'
opf.guide.append(ref)
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
manifest.append(os.path.join(dir, 'index.html'))
manifest.append(os.path.join(dir, 'index.ncx'))
# Get cover
cpath = getattr(self, 'cover_path', None)
if cpath is None:
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
if self.default_cover(pf):
cpath = pf.name
if cpath is not None and os.access(cpath, os.R_OK):
opf.cover = cpath
manifest.append(cpath)
# Get masthead
mpath = getattr(self, 'masthead_path', None)
if mpath is not None and os.access(mpath, os.R_OK):
manifest.append(mpath)
opf.create_manifest_from_files_in(manifest)
for mani in opf.manifest:
if mani.path.endswith('.ncx'):
mani.id = 'ncx'
if mani.path.endswith('mastheadImage.jpg'):
mani.id = 'masthead-image'
entries = ['index.html']
toc = TOC(base_path=dir)
self.play_order_counter = 0
self.play_order_map = {}
def feed_index(num, parent):
f = feeds[num]
for j, a in enumerate(f):
if getattr(a, 'downloaded', False):
adir = 'feed_%d/article_%d/'%(num, j)
auth = a.author
if not auth:
auth = None
desc = a.text_summary
if not desc:
desc = None
else:
desc = self.description_limiter(desc)
entries.append('%sindex.html'%adir)
po = self.play_order_map.get(entries[-1], None)
if po is None:
self.play_order_counter += 1
po = self.play_order_counter
parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
play_order=po, author=auth, description=desc)
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
for sp in a.sub_pages:
prefix = os.path.commonprefix([opf_path, sp])
relp = sp[len(prefix):]
entries.append(relp.replace(os.sep, '/'))
last = sp
if os.path.exists(last):
with open(last, 'rb') as fi:
src = fi.read().decode('utf-8')
soup = BeautifulSoup(src)
body = soup.find('body')
if body is not None:
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
templ = self.navbar.generate(True, num, j, len(f),
not self.has_single_feed,
a.orig_url, self.publisher, prefix=prefix,
center=self.center_navbar)
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
body.insert(len(body.contents), elem)
with open(last, 'wb') as fi:
fi.write(unicode(soup).encode('utf-8'))
if len(feeds) == 0:
raise Exception('All feeds are empty, aborting.')
if len(feeds) > 1:
for i, f in enumerate(feeds):
entries.append('feed_%d/index.html'%i)
po = self.play_order_map.get(entries[-1], None)
if po is None:
self.play_order_counter += 1
po = self.play_order_counter
auth = getattr(f, 'author', None)
if not auth:
auth = None
desc = getattr(f, 'description', None)
if not desc:
desc = None
feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
f.title, play_order=po, description=desc, author=auth))
else:
entries.append('feed_%d/index.html'%0)
feed_index(0, toc)
for i, p in enumerate(entries):
entries[i] = os.path.join(dir, p.replace('/', os.sep))
opf.create_spine(entries)
opf.set_toc(toc)
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
opf.render(opf_file, ncx_file)

View File

@ -0,0 +1,594 @@
__license__ = 'GPL v3'
__copyright__ = '2010-2011, Eddie Lau'
# Region - Hong Kong, Vancouver, Toronto
__Region__ = 'Vancouver'
# Users of Kindle 3 with limited system-level CJK support
# please replace the following "True" with "False".
__MakePeriodical__ = True
# Turn below to true if your device supports display of CJK titles
__UseChineseTitle__ = False
# Set it to False if you want to skip images
__KeepImages__ = True
# (HK only) Turn below to true if you wish to use life.mingpao.com as the main article source
__UseLife__ = True
'''
Change Log:
2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
provide options to remove all images in the file
2011/05/12: switch the main parse source to life.mingpao.com, which has more photos on the article pages
2011/03/06: add new articles for finance section, also a new section "Columns"
2011/02/28: rearrange the sections
[Disabled until Kindle has better CJK support and can remember last (section,article) read in Sections & Articles
View] make it the same title if generating a periodical, so past issue will be automatically put into "Past Issues"
folder in Kindle 3
2011/02/20: skip duplicated links in finance section, put photos which may extend a whole page to the back of the articles
clean up the indentation
2010/12/07: add entertainment section, use newspaper front page as ebook cover, suppress date display in section list
(to avoid wrong date display in case the user generates the ebook in a time zone different from HKT)
2010/11/22: add English section, remove eco-news section which is not updated daily, correct
ordering of articles
2010/11/12: add news image and eco-news section
2010/11/08: add parsing of finance section
2010/11/06: temporary work-around for Kindle device having no capability to display unicode
in section/article list.
2010/10/31: skip repeated articles in section pages
'''
import os, datetime, re
from calibre.web.feeds.recipes import BasicNewsRecipe
from contextlib import nested
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.metadata import MetaInformation
# MAIN CLASS
class MPRecipe(BasicNewsRecipe):
if __Region__ == 'Hong Kong':
title = 'Ming Pao - Hong Kong'
description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
category = 'Chinese, News, Hong Kong'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}'
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
keep_only_tags = [dict(name='h1'),
dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
dict(name='font', attrs={'color':['AA0000']}), # for column articles title
dict(attrs={'id':['newscontent']}), # entertainment and column page content
dict(attrs={'id':['newscontent01','newscontent02']}),
dict(attrs={'class':['photo']}),
dict(name='table', attrs={'width':['100%'], 'border':['0'], 'cellspacing':['5'], 'cellpadding':['0']}), # content in printed version of life.mingpao.com
dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}) # images for source from life.mingpao.com
]
if __KeepImages__:
remove_tags = [dict(name='style'),
dict(attrs={'id':['newscontent135']}), # for the finance page from mpfinance.com
dict(name='font', attrs={'size':['2'], 'color':['666666']}), # article date in life.mingpao.com article
#dict(name='table') # for content fetched from life.mingpao.com
]
else:
remove_tags = [dict(name='style'),
dict(attrs={'id':['newscontent135']}), # for the finance page from mpfinance.com
dict(name='font', attrs={'size':['2'], 'color':['666666']}), # article date in life.mingpao.com article
dict(name='img'),
#dict(name='table') # for content fetched from life.mingpao.com
]
remove_attributes = ['width']
preprocess_regexps = [
(re.compile(r'<h5>', re.DOTALL|re.IGNORECASE),
lambda match: '<h1>'),
(re.compile(r'</h5>', re.DOTALL|re.IGNORECASE),
lambda match: '</h1>'),
(re.compile(r'<p><a href=.+?</a></p>', re.DOTALL|re.IGNORECASE), # for entertainment page
lambda match: ''),
# skip <br> after title in life.mingpao.com fetched article
(re.compile(r"<div id='newscontent'><br>", re.DOTALL|re.IGNORECASE),
lambda match: "<div id='newscontent'>"),
(re.compile(r"<br><br></b>", re.DOTALL|re.IGNORECASE),
lambda match: "</b>")
]
elif __Region__ == 'Vancouver':
title = 'Ming Pao - Vancouver'
description = 'Vancouver Chinese Newspaper (http://www.mingpaovan.com)'
category = 'Chinese, News, Vancouver'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
masthead_url = 'http://www.mingpaovan.com/image/mainlogo2_VAN2.gif'
keep_only_tags = [dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['1']}),
dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['3'], 'cellpadding':['3'], 'id':['tblContent3']}),
dict(name='table', attrs={'width':['180'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['0'], 'bgcolor':['F0F0F0']}),
]
if __KeepImages__:
remove_tags = [dict(name='img', attrs={'src':['../../../image/magnifier.gif']})] # the magnifier icon
else:
remove_tags = [dict(name='img')]
remove_attributes = ['width']
preprocess_regexps = [(re.compile(r'&nbsp;', re.DOTALL|re.IGNORECASE),
lambda match: ''),
]
elif __Region__ == 'Toronto':
title = 'Ming Pao - Toronto'
description = 'Toronto Chinese Newspaper (http://www.mingpaotor.com)'
category = 'Chinese, News, Toronto'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
masthead_url = 'http://www.mingpaotor.com/image/mainlogo2_TOR2.gif'
keep_only_tags = [dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['1']}),
dict(name='table', attrs={'width':['450'], 'border':['0'], 'cellspacing':['3'], 'cellpadding':['3'], 'id':['tblContent3']}),
dict(name='table', attrs={'width':['180'], 'border':['0'], 'cellspacing':['0'], 'cellpadding':['0'], 'bgcolor':['F0F0F0']}),
]
if __KeepImages__:
remove_tags = [dict(name='img', attrs={'src':['../../../image/magnifier.gif']})] # the magnifier icon
else:
remove_tags = [dict(name='img')]
remove_attributes = ['width']
preprocess_regexps = [(re.compile(r'&nbsp;', re.DOTALL|re.IGNORECASE),
lambda match: ''),
]
oldest_article = 1
max_articles_per_feed = 100
__author__ = 'Eddie Lau'
publisher = 'MingPao'
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
language = 'zh'
encoding = 'Big5-HKSCS'
recursions = 0
conversion_options = {'linearize_tables':True}
timefmt = ''
def image_url_processor(cls, baseurl, url):
# trick: break the url at the first occurance of digit, add an additional
# '_' at the front
# not working, may need to move this to preprocess_html() method
# minIdx = 10000
# i0 = url.find('0')
# if i0 >= 0 and i0 < minIdx:
# minIdx = i0
# i1 = url.find('1')
# if i1 >= 0 and i1 < minIdx:
# minIdx = i1
# i2 = url.find('2')
# if i2 >= 0 and i2 < minIdx:
# minIdx = i2
# i3 = url.find('3')
# if i3 >= 0 and i0 < minIdx:
# minIdx = i3
# i4 = url.find('4')
# if i4 >= 0 and i4 < minIdx:
# minIdx = i4
# i5 = url.find('5')
# if i5 >= 0 and i5 < minIdx:
# minIdx = i5
# i6 = url.find('6')
# if i6 >= 0 and i6 < minIdx:
# minIdx = i6
# i7 = url.find('7')
# if i7 >= 0 and i7 < minIdx:
# minIdx = i7
# i8 = url.find('8')
# if i8 >= 0 and i8 < minIdx:
# minIdx = i8
# i9 = url.find('9')
# if i9 >= 0 and i9 < minIdx:
# minIdx = i9
return url
def get_dtlocal(self):
dt_utc = datetime.datetime.utcnow()
if __Region__ == 'Hong Kong':
# convert UTC to local hk time - at HKT 5.30am, all news are available
dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(5.5/24)
# dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(5.5/24)
elif __Region__ == 'Vancouver':
# convert UTC to local Vancouver time - at PST time 5.30am, all news are available
dt_local = dt_utc + datetime.timedelta(-8.0/24) - datetime.timedelta(5.5/24)
#dt_local = dt_utc.astimezone(pytz.timezone('America/Vancouver')) - datetime.timedelta(5.5/24)
elif __Region__ == 'Toronto':
# convert UTC to local Toronto time - at EST time 8.30am, all news are available
dt_local = dt_utc + datetime.timedelta(-5.0/24) - datetime.timedelta(8.5/24)
#dt_local = dt_utc.astimezone(pytz.timezone('America/Toronto')) - datetime.timedelta(8.5/24)
return dt_local
def get_fetchdate(self):
return self.get_dtlocal().strftime("%Y%m%d")
def get_fetchformatteddate(self):
return self.get_dtlocal().strftime("%Y-%m-%d")
def get_fetchday(self):
return self.get_dtlocal().strftime("%d")
def get_cover_url(self):
if __Region__ == 'Hong Kong':
cover = 'http://news.mingpao.com/' + self.get_fetchdate() + '/' + self.get_fetchdate() + '_' + self.get_fetchday() + 'gacov.jpg'
elif __Region__ == 'Vancouver':
cover = 'http://www.mingpaovan.com/ftp/News/' + self.get_fetchdate() + '/' + self.get_fetchday() + 'pgva1s.jpg'
elif __Region__ == 'Toronto':
cover = 'http://www.mingpaotor.com/ftp/News/' + self.get_fetchdate() + '/' + self.get_fetchday() + 'pgtas.jpg'
br = BasicNewsRecipe.get_browser()
try:
br.open(cover)
except:
cover = None
return cover
def parse_index(self):
feeds = []
dateStr = self.get_fetchdate()
if __Region__ == 'Hong Kong':
if __UseLife__:
for title, url, keystr in [(u'\u8981\u805e Headline', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalga', 'nal'),
(u'\u6e2f\u805e Local', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgb', 'nal'),
(u'\u6559\u80b2 Education', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgf', 'nal'),
(u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr', 'nal'),
(u'\u8ad6\u58c7 Forum', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalfa', 'nal'),
(u'\u4e2d\u570b China', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalca', 'nal'),
(u'\u570b\u969b World', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalta', 'nal'),
(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal'),
(u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'),
(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal'),
(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')]:
articles = self.parse_section2(url, keystr)
if articles:
feeds.append((title, articles))
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
else:
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
(u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
# special- editorial
ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
if ed_articles:
feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
(u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
(u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
# special - finance
#fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
if fin_articles:
feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
(u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
# special - entertainment
ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
if ent_articles:
feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
# special- columns
col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
if col_articles:
feeds.append((u'\u5c08\u6b04 Columns', col_articles))
elif __Region__ == 'Vancouver':
for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VAindex.htm'),
(u'\u52a0\u570b Canada', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VBindex.htm'),
(u'\u793e\u5340 Local', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VDindex.htm'),
(u'\u6e2f\u805e Hong Kong', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/HK-VGindex.htm'),
(u'\u570b\u969b World', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VTindex.htm'),
(u'\u4e2d\u570b China', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VCindex.htm'),
(u'\u7d93\u6fdf Economics', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VEindex.htm'),
(u'\u9ad4\u80b2 Sports', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VSindex.htm'),
(u'\u5f71\u8996 Film/TV', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/HK-MAindex.htm'),
(u'\u526f\u520a Supplements', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/WWindex.htm'),]:
articles = self.parse_section3(url, 'http://www.mingpaovan.com/')
if articles:
feeds.append((title, articles))
elif __Region__ == 'Toronto':
for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TAindex.htm'),
(u'\u52a0\u570b Canada', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TDindex.htm'),
(u'\u793e\u5340 Local', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TFindex.htm'),
(u'\u4e2d\u570b China', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TCAindex.htm'),
(u'\u570b\u969b World', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TTAindex.htm'),
(u'\u6e2f\u805e Hong Kong', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/HK-GAindex.htm'),
(u'\u7d93\u6fdf Economics', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/THindex.htm'),
(u'\u9ad4\u80b2 Sports', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/TSindex.htm'),
(u'\u5f71\u8996 Film/TV', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/HK-MAindex.htm'),
(u'\u526f\u520a Supplements', 'http://www.mingpaotor.com/htm/News/' + dateStr + '/WWindex.htm'),]:
articles = self.parse_section3(url, 'http://www.mingpaotor.com/')
if articles:
feeds.append((title, articles))
return feeds
# parse from news.mingpao.com
def parse_section(self, url):
dateStr = self.get_fetchdate()
soup = self.index_to_soup(url)
divs = soup.findAll(attrs={'class': ['bullet','bullet_grey']})
current_articles = []
included_urls = []
divs.reverse()
for i in divs:
a = i.find('a', href = True)
title = self.tag_to_string(a)
url = a.get('href', False)
url = 'http://news.mingpao.com/' + dateStr + '/' +url
if url not in included_urls and url.rfind('Redirect') == -1:
current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
included_urls.append(url)
current_articles.reverse()
return current_articles
# parse from life.mingpao.com
def parse_section2(self, url, keystr):
self.get_fetchdate()
soup = self.index_to_soup(url)
a = soup.findAll('a', href=True)
a.reverse()
current_articles = []
included_urls = []
for i in a:
title = self.tag_to_string(i)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article
current_articles.append({'title': title, 'url': url, 'description': ''})
included_urls.append(url)
current_articles.reverse()
return current_articles
# parse from www.mingpaovan.com
def parse_section3(self, url, baseUrl):
self.get_fetchdate()
soup = self.index_to_soup(url)
divs = soup.findAll(attrs={'class': ['ListContentLargeLink']})
current_articles = []
included_urls = []
divs.reverse()
for i in divs:
title = self.tag_to_string(i)
urlstr = i.get('href', False)
urlstr = baseUrl + '/' + urlstr.replace('../../../', '')
if urlstr not in included_urls:
current_articles.append({'title': title, 'url': urlstr, 'description': '', 'date': ''})
included_urls.append(urlstr)
current_articles.reverse()
return current_articles
def parse_ed_section(self, url):
self.get_fetchdate()
soup = self.index_to_soup(url)
a = soup.findAll('a', href=True)
a.reverse()
current_articles = []
included_urls = []
for i in a:
title = self.tag_to_string(i)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('nal') == -1):
current_articles.append({'title': title, 'url': url, 'description': ''})
included_urls.append(url)
current_articles.reverse()
return current_articles
def parse_fin_section(self, url):
self.get_fetchdate()
soup = self.index_to_soup(url)
a = soup.findAll('a', href= True)
current_articles = []
included_urls = []
for i in a:
#url = 'http://www.mpfinance.com/cfm/' + i.get('href', False)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
#if url not in included_urls and not url.rfind(dateStr) == -1 and url.rfind('index') == -1:
if url not in included_urls and (not url.rfind('txt') == -1) and (not url.rfind('nal') == -1):
title = self.tag_to_string(i)
current_articles.append({'title': title, 'url': url, 'description':''})
included_urls.append(url)
return current_articles
def parse_ent_section(self, url):
self.get_fetchdate()
soup = self.index_to_soup(url)
a = soup.findAll('a', href=True)
a.reverse()
current_articles = []
included_urls = []
for i in a:
title = self.tag_to_string(i)
url = 'http://ol.mingpao.com/cfm/' + i.get('href', False)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('star') == -1):
current_articles.append({'title': title, 'url': url, 'description': ''})
included_urls.append(url)
current_articles.reverse()
return current_articles
def parse_col_section(self, url):
self.get_fetchdate()
soup = self.index_to_soup(url)
a = soup.findAll('a', href=True)
a.reverse()
current_articles = []
included_urls = []
for i in a:
title = self.tag_to_string(i)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('ncl') == -1):
current_articles.append({'title': title, 'url': url, 'description': ''})
included_urls.append(url)
current_articles.reverse()
return current_articles
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll(style=True):
del item['width']
for item in soup.findAll(stype=True):
del item['absmiddle']
return soup
def create_opf(self, feeds, dir=None):
if dir is None:
dir = self.output_dir
if __UseChineseTitle__ == True:
if __Region__ == 'Hong Kong':
title = u'\u660e\u5831 (\u9999\u6e2f)'
elif __Region__ == 'Vancouver':
title = u'\u660e\u5831 (\u6eab\u54e5\u83ef)'
elif __Region__ == 'Toronto':
title = u'\u660e\u5831 (\u591a\u502b\u591a)'
else:
title = self.short_title()
# if not generating a periodical, force date to apply in title
if __MakePeriodical__ == False:
title = title + ' ' + self.get_fetchformatteddate()
if True:
mi = MetaInformation(title, [self.publisher])
mi.publisher = self.publisher
mi.author_sort = self.publisher
if __MakePeriodical__ == True:
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
else:
mi.publication_type = self.publication_type+':'+self.short_title()
#mi.timestamp = nowf()
mi.timestamp = self.get_dtlocal()
mi.comments = self.description
if not isinstance(mi.comments, unicode):
mi.comments = mi.comments.decode('utf-8', 'replace')
#mi.pubdate = nowf()
mi.pubdate = self.get_dtlocal()
opf_path = os.path.join(dir, 'index.opf')
ncx_path = os.path.join(dir, 'index.ncx')
opf = OPFCreator(dir, mi)
# Add mastheadImage entry to <guide> section
mp = getattr(self, 'masthead_path', None)
if mp is not None and os.access(mp, os.R_OK):
from calibre.ebooks.metadata.opf2 import Guide
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
ref.type = 'masthead'
ref.title = 'Masthead Image'
opf.guide.append(ref)
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
manifest.append(os.path.join(dir, 'index.html'))
manifest.append(os.path.join(dir, 'index.ncx'))
# Get cover
cpath = getattr(self, 'cover_path', None)
if cpath is None:
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
if self.default_cover(pf):
cpath = pf.name
if cpath is not None and os.access(cpath, os.R_OK):
opf.cover = cpath
manifest.append(cpath)
# Get masthead
mpath = getattr(self, 'masthead_path', None)
if mpath is not None and os.access(mpath, os.R_OK):
manifest.append(mpath)
opf.create_manifest_from_files_in(manifest)
for mani in opf.manifest:
if mani.path.endswith('.ncx'):
mani.id = 'ncx'
if mani.path.endswith('mastheadImage.jpg'):
mani.id = 'masthead-image'
entries = ['index.html']
toc = TOC(base_path=dir)
self.play_order_counter = 0
self.play_order_map = {}
def feed_index(num, parent):
f = feeds[num]
for j, a in enumerate(f):
if getattr(a, 'downloaded', False):
adir = 'feed_%d/article_%d/'%(num, j)
auth = a.author
if not auth:
auth = None
desc = a.text_summary
if not desc:
desc = None
else:
desc = self.description_limiter(desc)
entries.append('%sindex.html'%adir)
po = self.play_order_map.get(entries[-1], None)
if po is None:
self.play_order_counter += 1
po = self.play_order_counter
parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
play_order=po, author=auth, description=desc)
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
for sp in a.sub_pages:
prefix = os.path.commonprefix([opf_path, sp])
relp = sp[len(prefix):]
entries.append(relp.replace(os.sep, '/'))
last = sp
if os.path.exists(last):
with open(last, 'rb') as fi:
src = fi.read().decode('utf-8')
soup = BeautifulSoup(src)
body = soup.find('body')
if body is not None:
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
templ = self.navbar.generate(True, num, j, len(f),
not self.has_single_feed,
a.orig_url, self.publisher, prefix=prefix,
center=self.center_navbar)
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
body.insert(len(body.contents), elem)
with open(last, 'wb') as fi:
fi.write(unicode(soup).encode('utf-8'))
if len(feeds) == 0:
raise Exception('All feeds are empty, aborting.')
if len(feeds) > 1:
for i, f in enumerate(feeds):
entries.append('feed_%d/index.html'%i)
po = self.play_order_map.get(entries[-1], None)
if po is None:
self.play_order_counter += 1
po = self.play_order_counter
auth = getattr(f, 'author', None)
if not auth:
auth = None
desc = getattr(f, 'description', None)
if not desc:
desc = None
feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
f.title, play_order=po, description=desc, author=auth))
else:
entries.append('feed_%d/index.html'%0)
feed_index(0, toc)
for i, p in enumerate(entries):
entries[i] = os.path.join(dir, p.replace('/', os.sep))
opf.create_spine(entries)
opf.set_toc(toc)
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
opf.render(opf_file, ncx_file)

80
recipes/scmp.recipe Normal file
View File

@ -0,0 +1,80 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
scmp.com
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class SCMP(BasicNewsRecipe):
title = 'South China Morning Post'
__author__ = 'llam'
description = "SCMP.com, Hong Kong's premier online English daily provides exclusive up-to-date news, audio video news, podcasts, RSS Feeds, Blogs, breaking news, top stories, award winning news and analysis on Hong Kong and China."
publisher = 'South China Morning Post Publishers Ltd.'
category = 'SCMP, Online news, Hong Kong News, China news, Business news, English newspaper, daily newspaper, Lifestyle news, Sport news, Audio Video news, Asia news, World news, economy news, investor relations news, RSS Feeds'
oldest_article = 2
delay = 1
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
language = 'en_CN'
remove_empty_feeds = True
needs_subscription = True
publication_type = 'newspaper'
masthead_url = 'http://www.scmp.com/images/logo_scmp_home.gif'
extra_css = ' body{font-family: Arial,Helvetica,sans-serif } '
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
def get_browser(self):
br = BasicNewsRecipe.get_browser()
#br.set_debug_http(True)
#br.set_debug_responses(True)
#br.set_debug_redirects(True)
if self.username is not None and self.password is not None:
br.open('http://www.scmp.com/portal/site/SCMP/')
br.select_form(name='loginForm')
br['Login' ] = self.username
br['Password'] = self.password
br.submit()
return br
remove_attributes=['width','height','border']
keep_only_tags = [
dict(attrs={'id':['ART','photoBox']})
,dict(attrs={'class':['article_label','article_byline','article_body']})
]
preprocess_regexps = [
(re.compile(r'<P><table((?!<table).)*class="embscreen"((?!</table>).)*</table>', re.DOTALL|re.IGNORECASE),
lambda match: ''),
]
feeds = [
(u'Business' , u'http://www.scmp.com/rss/business.xml' )
,(u'Hong Kong' , u'http://www.scmp.com/rss/hong_kong.xml' )
,(u'China' , u'http://www.scmp.com/rss/china.xml' )
,(u'Asia & World' , u'http://www.scmp.com/rss/news_asia_world.xml')
,(u'Opinion' , u'http://www.scmp.com/rss/opinion.xml' )
,(u'LifeSTYLE' , u'http://www.scmp.com/rss/lifestyle.xml' )
,(u'Sport' , u'http://www.scmp.com/rss/sport.xml' )
]
def print_version(self, url):
rpart, sep, rest = url.rpartition('&')
return rpart #+ sep + urllib.quote_plus(rest)
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
items = soup.findAll(src="/images/label_icon.gif")
[item.extract() for item in items]
return self.adeify_images(soup)

View File

@ -0,0 +1,40 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class TodaysZaman_en(BasicNewsRecipe):
title = u'Sızıntı Dergisi'
__author__ = u'thomass'
description = 'a Turkey based daily for national and international news in the fields of business, diplomacy, politics, culture, arts, sports and economics, in addition to commentaries, specials and features'
oldest_article = 30
max_articles_per_feed =80
no_stylesheets = True
#delay = 1
#use_embedded_content = False
encoding = 'utf-8'
#publisher = ' '
category = 'dergi, ilim, kültür, bilim,Türkçe'
language = 'tr'
publication_type = 'magazine'
#extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
#keep_only_tags = [dict(name='h1', attrs={'class':['georgia_30']})]
#remove_attributes = ['aria-describedby']
#remove_tags = [dict(name='div', attrs={'id':['renk10']}) ]
cover_img_url = 'http://www.sizinti.com.tr/images/sizintiprint.jpg'
masthead_url = 'http://www.sizinti.com.tr/images/sizintiprint.jpg'
remove_tags_before = dict(id='content-right')
#remove_empty_feeds= True
#remove_attributes = ['width','height']
feeds = [
( u'Sızıntı', u'http://www.sizinti.com.tr/rss'),
]
#def preprocess_html(self, soup):
# return self.adeify_images(soup)
#def print_version(self, url): #there is a probem caused by table format
#return url.replace('http://www.todayszaman.com/newsDetail_getNewsById.action?load=detay&', 'http://www.todayszaman.com/newsDetail_openPrintPage.action?')

View File

@ -1,94 +1,67 @@
#!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
''' '''
spiegel.de spiegel.de
''' '''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Spiegel_int(BasicNewsRecipe): class Spiegel_int(BasicNewsRecipe):
title = 'Spiegel Online International' title = 'Spiegel Online International'
__author__ = 'Darko Miletic and Sujata Raman' __author__ = 'Darko Miletic and Sujata Raman'
description = "News and POV from Europe's largest newsmagazine" description = "Daily news, analysis and opinion from Europe's leading newsmagazine and Germany's top news Web site"
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 100
language = 'en' language = 'en_DE'
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
encoding = 'cp1252'
publisher = 'SPIEGEL ONLINE GmbH' publisher = 'SPIEGEL ONLINE GmbH'
category = 'news, politics, Germany' category = 'news, politics, Germany'
lang = 'en' masthead_url = 'http://www.spiegel.de/static/sys/v9/spiegelonline_logo.png'
recursions = 1 publication_type = 'magazine'
match_regexps = [r'http://www.spiegel.de/.*-[1-9],00.html']
conversion_options = { conversion_options = {
'comments' : description 'comments' : description
,'tags' : category ,'tags' : category
,'language' : lang ,'language' : language
,'publisher' : publisher ,'publisher': publisher
,'pretty_print': True
} }
extra_css = ''' extra_css = '''
#spArticleColumn{font-family:verdana,arial,helvetica,geneva,sans-serif ; } #spArticleContent{font-family: Verdana,Arial,Helvetica,Geneva,sans-serif}
h1{color:#666666; font-weight:bold;} h1{color:#666666; font-weight:bold;}
h2{color:#990000;} h2{color:#990000;}
h3{color:#990000;} h3{color:#990000;}
h4 {color:#990000;} h4 {color:#990000;}
a{color:#990000;} a{color:#990000;}
.spAuthor{font-style:italic;} .spAuthor{font-style:italic;}
#spIntroTeaser{font-weight:bold;} #spIntroTeaser{font-weight:bold}
.spCredit{color:#666666; font-size:x-small;} .spCredit{color:#666666; font-size:x-small;}
.spShortDate{font-size:x-small;} .spShortDate{font-size:x-small;}
.spArticleImageBox {font-size:x-small;} .spArticleImageBox {font-size:x-small;}
.spPhotoGallery{font-size:x-small; color:#990000 ;} .spPhotoGallery{font-size:x-small; color:#990000 ;}
''' '''
keep_only_tags = [ keep_only_tags = [dict(attrs={'id':'spArticleContent'})]
dict(name ='div', attrs={'id': ['spArticleImageBox spAssetAlignleft','spArticleColumn']}), remove_tags_after = dict(attrs={'id':'spArticleBody'})
] remove_tags = [dict(name=['meta','base','iframe','embed','object'])]
remove_attributes = ['clear']
feeds = [(u'Spiegel Online', u'http://www.spiegel.de/international/index.rss')]
remove_tags = [ def print_version(self, url):
dict(name='div', attrs={'id':['spSocialBookmark','spArticleFunctions','spMultiPagerHeadlines',]}), main, sep, rest = url.rpartition(',')
dict(name='div', attrs={'class':['spCommercial spM520','spArticleCredit','spPicZoom']}), rmain, rsep, rrest = main.rpartition(',')
] return rmain + ',druck-' + rrest + ',' + rest
feeds = [(u'Spiegel Online', u'http://www.spiegel.de/schlagzeilen/rss/0,5291,676,00.xml')]
def postprocess_html(self, soup,first):
for tag in soup.findAll(name='div',attrs={'id':"spMultiPagerControl"}):
tag.extract()
p = soup.find(name = 'p', attrs={'id':'spIntroTeaser'})
if p.string is not None:
t = p.string.rpartition(':')[0]
if 'Part'in t:
if soup.h1 is not None:
soup.h1.extract()
if soup.h2 is not None:
soup.h2.extract()
functag = soup.find(name= 'div', attrs={'id':"spArticleFunctions"})
if functag is not None:
functag.extract()
auttag = soup.find(name= 'p', attrs={'class':"spAuthor"})
if auttag is not None:
auttag.extract()
pictag = soup.find(name= 'div', attrs={'id':"spArticleTopAsset"})
if pictag is not None:
pictag.extract()
return soup
# def print_version(self, url):
# main, sep, rest = url.rpartition(',')
# rmain, rsep, rrest = main.rpartition(',')
# return rmain + ',druck-' + rrest + ',' + rest
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('a'):
if item.string is not None:
str = item.string
item.replaceWith(str)
else:
str = self.tag_to_string(item)
item.replaceWith(str)
return soup

View File

@ -56,6 +56,7 @@ class TelegraphUK(BasicNewsRecipe):
,(u'Sport' , u'http://www.telegraph.co.uk/sport/rss' ) ,(u'Sport' , u'http://www.telegraph.co.uk/sport/rss' )
,(u'Earth News' , u'http://www.telegraph.co.uk/earth/earthnews/rss' ) ,(u'Earth News' , u'http://www.telegraph.co.uk/earth/earthnews/rss' )
,(u'Comment' , u'http://www.telegraph.co.uk/comment/rss' ) ,(u'Comment' , u'http://www.telegraph.co.uk/comment/rss' )
,(u'Travel' , u'http://www.telegraph.co.uk/travel/rss' )
,(u'How about that?', u'http://www.telegraph.co.uk/news/newstopics/howaboutthat/rss' ) ,(u'How about that?', u'http://www.telegraph.co.uk/news/newstopics/howaboutthat/rss' )
] ]

View File

@ -0,0 +1,53 @@
from calibre.web.feeds.news import BasicNewsRecipe
class TodaysZaman_en(BasicNewsRecipe):
title = u'Todays Zaman'
__author__ = u'thomass'
description = 'a Turkey based daily for national and international news in the fields of business, diplomacy, politics, culture, arts, sports and economics, in addition to commentaries, specials and features'
oldest_article = 2
max_articles_per_feed =100
no_stylesheets = True
#delay = 1
#use_embedded_content = False
encoding = 'utf-8'
#publisher = ' '
category = 'news, haberler,TR,gazete'
language = 'en_TR'
publication_type = 'newspaper'
#extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
#keep_only_tags = [dict(name='font', attrs={'class':['newsDetail','agenda2NewsSpot']}),dict(name='span', attrs={'class':['agenda2Title']}),dict(name='div', attrs={'id':['gallery']})]
keep_only_tags = [dict(name='h1', attrs={'class':['georgia_30']}),dict(name='span', attrs={'class':['left-date','detailDate','detailCName']}),dict(name='td', attrs={'id':['newsSpot','newsText']})] #resim ekleme: ,dict(name='div', attrs={'id':['gallery','detailDate',]})
remove_attributes = ['aria-describedby']
remove_tags = [dict(name='img', attrs={'src':['/images/icon_print.gif','http://gmodules.com/ig/images/plus_google.gif','/images/template/jazz/agenda/i1.jpg', 'http://medya.todayszaman.com/todayszaman/images/logo/logo.bmp']}),dict(name='hr', attrs={'class':[ 'interactive-hr']}),dict(name='div', attrs={'class':[ 'empty_height_18','empty_height_9']}) ,dict(name='td', attrs={'id':[ 'superTitle']}),dict(name='span', attrs={'class':[ 't-count enabled t-count-focus']}),dict(name='a', attrs={'id':[ 'count']}),dict(name='td', attrs={'class':[ 'left-date']}) ]
cover_img_url = 'http://medya.todayszaman.com/todayszaman/images/logo/logo.bmp'
masthead_url = 'http://medya.todayszaman.com/todayszaman/images/logo/logo.bmp'
remove_empty_feeds= True
# remove_attributes = ['width','height']
feeds = [
( u'Home', u'http://www.todayszaman.com/rss?sectionId=0'),
( u'News', u'http://www.todayszaman.com/rss?sectionId=100'),
( u'Business', u'http://www.todayszaman.com/rss?sectionId=105'),
( u'Interviews', u'http://www.todayszaman.com/rss?sectionId=8'),
( u'Columnists', u'http://www.todayszaman.com/rss?sectionId=6'),
( u'Op-Ed', u'http://www.todayszaman.com/rss?sectionId=109'),
( u'Arts & Culture', u'http://www.todayszaman.com/rss?sectionId=110'),
( u'Expat Zone', u'http://www.todayszaman.com/rss?sectionId=132'),
( u'Sports', u'http://www.todayszaman.com/rss?sectionId=5'),
( u'Features', u'http://www.todayszaman.com/rss?sectionId=116'),
( u'Travel', u'http://www.todayszaman.com/rss?sectionId=117'),
( u'Leisure', u'http://www.todayszaman.com/rss?sectionId=118'),
( u'Weird But True', u'http://www.todayszaman.com/rss?sectionId=134'),
( u'Life', u'http://www.todayszaman.com/rss?sectionId=133'),
( u'Health', u'http://www.todayszaman.com/rss?sectionId=126'),
( u'Press Review', u'http://www.todayszaman.com/rss?sectionId=130'),
( u'Todays think tanks', u'http://www.todayszaman.com/rss?sectionId=159'),
]
#def preprocess_html(self, soup):
# return self.adeify_images(soup)
#def print_version(self, url): #there is a probem caused by table format
#return url.replace('http://www.todayszaman.com/newsDetail_getNewsById.action?load=detay&', 'http://www.todayszaman.com/newsDetail_openPrintPage.action?')

View File

@ -2,90 +2,92 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2010, matek09, matek09@gmail.com' __copyright__ = '2010, matek09, matek09@gmail.com'
__copyright__ = 'Modified 2011, Mariusz Wolek <mariusz_dot_wolek @ gmail dot com>'
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
import re import re
class Wprost(BasicNewsRecipe): class Wprost(BasicNewsRecipe):
EDITION = 0 EDITION = 0
FIND_LAST_FULL_ISSUE = True FIND_LAST_FULL_ISSUE = True
EXCLUDE_LOCKED = True EXCLUDE_LOCKED = True
ICO_BLOCKED = 'http://www.wprost.pl/G/icons/ico_blocked.gif' ICO_BLOCKED = 'http://www.wprost.pl/G/icons/ico_blocked.gif'
title = u'Wprost' title = u'Wprost'
__author__ = 'matek09' __author__ = 'matek09'
description = 'Weekly magazine' description = 'Weekly magazine'
encoding = 'ISO-8859-2' encoding = 'ISO-8859-2'
no_stylesheets = True no_stylesheets = True
language = 'pl' language = 'pl'
remove_javascript = True remove_javascript = True
remove_tags_before = dict(dict(name = 'div', attrs = {'id' : 'print-layer'})) remove_tags_before = dict(dict(name = 'div', attrs = {'id' : 'print-layer'}))
remove_tags_after = dict(dict(name = 'div', attrs = {'id' : 'print-layer'})) remove_tags_after = dict(dict(name = 'div', attrs = {'id' : 'print-layer'}))
'''keep_only_tags =[] '''keep_only_tags =[]
keep_only_tags.append(dict(name = 'table', attrs = {'id' : 'title-table'})) keep_only_tags.append(dict(name = 'table', attrs = {'id' : 'title-table'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'div-header'})) keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'div-header'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'div-content'})) keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'div-content'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'def element-autor'}))''' keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'def element-autor'}))'''
preprocess_regexps = [(re.compile(r'style="display: none;"'), lambda match: ''), preprocess_regexps = [(re.compile(r'style="display: none;"'), lambda match: ''),
(re.compile(r'display: block;'), lambda match: '')] (re.compile(r'display: block;'), lambda match: ''),
(re.compile(r'\<td\>\<tr\>\<\/table\>'), lambda match: ''),
(re.compile(r'\<table .*?\>'), lambda match: ''),
(re.compile(r'\<tr>'), lambda match: ''),
(re.compile(r'\<td .*?\>'), lambda match: '')]
remove_tags =[]
remove_tags.append(dict(name = 'div', attrs = {'class' : 'def element-date'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'def silver'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'content-main-column-right'}))
remove_tags =[] extra_css = '''
remove_tags.append(dict(name = 'div', attrs = {'class' : 'def element-date'})) .div-header {font-size: x-small; font-weight: bold}
remove_tags.append(dict(name = 'div', attrs = {'class' : 'def silver'})) '''
remove_tags.append(dict(name = 'div', attrs = {'id' : 'content-main-column-right'}))
extra_css = '''
.div-header {font-size: x-small; font-weight: bold}
'''
#h2 {font-size: x-large; font-weight: bold} #h2 {font-size: x-large; font-weight: bold}
def is_blocked(self, a): def is_blocked(self, a):
if a.findNextSibling('img') is None: if a.findNextSibling('img') is None:
return False return False
else: else:
return True return True
def find_last_issue(self): def find_last_issue(self):
soup = self.index_to_soup('http://www.wprost.pl/archiwum/') soup = self.index_to_soup('http://www.wprost.pl/archiwum/')
a = 0 a = 0
if self.FIND_LAST_FULL_ISSUE: if self.FIND_LAST_FULL_ISSUE:
ico_blocked = soup.findAll('img', attrs={'src' : self.ICO_BLOCKED}) ico_blocked = soup.findAll('img', attrs={'src' : self.ICO_BLOCKED})
a = ico_blocked[-1].findNext('a', attrs={'title' : re.compile('Zobacz spis tre.ci')}) a = ico_blocked[-1].findNext('a', attrs={'title' : re.compile('Zobacz spis tre.ci')})
else: else:
a = soup.find('a', attrs={'title' : re.compile('Zobacz spis tre.ci')}) a = soup.find('a', attrs={'title' : re.compile('Zobacz spis tre.ci')})
self.EDITION = a['href'].replace('/tygodnik/?I=', '') self.EDITION = a['href'].replace('/tygodnik/?I=', '')
self.cover_url = a.img['src'] self.cover_url = a.img['src']
def parse_index(self): def parse_index(self):
self.find_last_issue() self.find_last_issue()
soup = self.index_to_soup('http://www.wprost.pl/tygodnik/?I=' + self.EDITION) soup = self.index_to_soup('http://www.wprost.pl/tygodnik/?I=' + self.EDITION)
feeds = [] feeds = []
for main_block in soup.findAll(attrs={'class':'main-block-s3 s3-head head-red3'}): for main_block in soup.findAll(attrs={'class':'main-block-s3 s3-head head-red3'}):
articles = list(self.find_articles(main_block)) articles = list(self.find_articles(main_block))
if len(articles) > 0: if len(articles) > 0:
section = self.tag_to_string(main_block) section = self.tag_to_string(main_block)
feeds.append((section, articles)) feeds.append((section, articles))
return feeds return feeds
def find_articles(self, main_block):
for a in main_block.findAllNext( attrs={'style':['','padding-top: 15px;']}):
if a.name in "td":
break
if self.EXCLUDE_LOCKED & self.is_blocked(a):
continue
yield {
'title' : self.tag_to_string(a),
'url' : 'http://www.wprost.pl' + a['href'],
'date' : '',
'description' : ''
}
def find_articles(self, main_block):
for a in main_block.findAllNext( attrs={'style':['','padding-top: 15px;']}):
if a.name in "td":
break
if self.EXCLUDE_LOCKED & self.is_blocked(a):
continue
yield {
'title' : self.tag_to_string(a),
'url' : 'http://www.wprost.pl' + a['href'],
'date' : '',
'description' : ''
}

View File

@ -1,20 +1,55 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class ZamanRecipe(BasicNewsRecipe): class Zaman (BasicNewsRecipe):
title = u'Zaman'
__author__ = u'Deniz Og\xfcz'
language = 'tr'
oldest_article = 1
max_articles_per_feed = 10
cover_url = 'http://medya.zaman.com.tr/zamantryeni/pics/zamanonline.gif' title = u'ZAMAN Gazetesi'
feeds = [(u'Gundem', u'http://www.zaman.com.tr/gundem.rss'), __author__ = u'thomass'
(u'Son Dakika', u'http://www.zaman.com.tr/sondakika.rss'), oldest_article = 2
(u'Spor', u'http://www.zaman.com.tr/spor.rss'), max_articles_per_feed =100
(u'Ekonomi', u'http://www.zaman.com.tr/ekonomi.rss'), # no_stylesheets = True
(u'Politika', u'http://www.zaman.com.tr/politika.rss'), #delay = 1
(u'D\u0131\u015f Haberler', u'http://www.zaman.com.tr/dishaberler.rss'), #use_embedded_content = False
(u'Yazarlar', u'http://www.zaman.com.tr/yazarlar.rss'),] encoding = 'ISO 8859-9'
publisher = 'Zaman'
category = 'news, haberler,TR,gazete'
language = 'tr'
publication_type = 'newspaper '
extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
conversion_options = {
'tags' : category
,'language' : language
,'publisher' : publisher
,'linearize_tables': False
}
cover_img_url = 'https://fbcdn-profile-a.akamaihd.net/hprofile-ak-snc4/188140_81722291869_2111820_n.jpg'
masthead_url = 'http://medya.zaman.com.tr/extentions/zaman.com.tr/img/section/logo-section.png'
def print_version(self, url):
return url.replace('www.zaman.com.tr/haber.do?', 'www.zaman.com.tr/yazdir.do?') keep_only_tags = [dict(name='div', attrs={'id':[ 'news-detail-content']}), dict(name='td', attrs={'class':['columnist-detail','columnist_head']}) ]
remove_tags = [ dict(name='div', attrs={'id':['news-detail-news-text-font-size','news-detail-gallery','news-detail-news-bottom-social']}),dict(name='div', attrs={'class':['radioEmbedBg','radyoProgramAdi']}),dict(name='a', attrs={'class':['webkit-html-attribute-value webkit-html-external-link']}),dict(name='table', attrs={'id':['yaziYorumTablosu']}),dict(name='img', attrs={'src':['http://medya.zaman.com.tr/pics/paylas.gif','http://medya.zaman.com.tr/extentions/zaman.com.tr/img/columnist/ma-16.png']})]
#remove_attributes = ['width','height']
remove_empty_feeds= True
feeds = [
( u'Anasayfa', u'http://www.zaman.com.tr/anasayfa.rss'),
( u'Son Dakika', u'http://www.zaman.com.tr/sondakika.rss'),
( u'En çok Okunanlar', u'http://www.zaman.com.tr/max_all.rss'),
( u'Gündem', u'http://www.zaman.com.tr/gundem.rss'),
( u'Yazarlar', u'http://www.zaman.com.tr/yazarlar.rss'),
( u'Politika', u'http://www.zaman.com.tr/politika.rss'),
( u'Ekonomi', u'http://www.zaman.com.tr/ekonomi.rss'),
( u'Dış Haberler', u'http://www.zaman.com.tr/dishaberler.rss'),
( u'Yorumlar', u'http://www.zaman.com.tr/yorumlar.rss'),
( u'Röportaj', u'http://www.zaman.com.tr/roportaj.rss'),
( u'Spor', u'http://www.zaman.com.tr/spor.rss'),
( u'Kürsü', u'http://www.zaman.com.tr/kursu.rss'),
( u'Kültür Sanat', u'http://www.zaman.com.tr/kultursanat.rss'),
( u'Televizyon', u'http://www.zaman.com.tr/televizyon.rss'),
( u'Manşet', u'http://www.zaman.com.tr/manset.rss'),
]

View File

@ -292,13 +292,17 @@ maximum_resort_levels = 5
generate_cover_title_font = None generate_cover_title_font = None
generate_cover_foot_font = None generate_cover_foot_font = None
#: Control behavior of double clicks on the book list #: Control behavior of the book list
# Behavior of doubleclick on the books list. Choices: open_viewer, do_nothing, # You can control the behavior of doubleclicks on the books list.
# Choices: open_viewer, do_nothing,
# edit_cell, edit_metadata. Selecting edit_metadata has the side effect of # edit_cell, edit_metadata. Selecting edit_metadata has the side effect of
# disabling editing a field using a single click. # disabling editing a field using a single click.
# Default: open_viewer. # Default: open_viewer.
# Example: doubleclick_on_library_view = 'do_nothing' # Example: doubleclick_on_library_view = 'do_nothing'
# You can also control whether the book list scrolls horizontal per column or
# per pixel. Default is per column.
doubleclick_on_library_view = 'open_viewer' doubleclick_on_library_view = 'open_viewer'
horizontal_scrolling_per_column = True
#: Language to use when sorting. #: Language to use when sorting.
# Setting this tweak will force sorting to use the # Setting this tweak will force sorting to use the

View File

@ -1,6 +1,7 @@
CREATE TABLE authors ( id INTEGER PRIMARY KEY, CREATE TABLE authors ( id INTEGER PRIMARY KEY,
name TEXT NOT NULL COLLATE NOCASE, name TEXT NOT NULL COLLATE NOCASE,
sort TEXT COLLATE NOCASE, sort TEXT COLLATE NOCASE,
link TEXT NOT NULL DEFAULT "",
UNIQUE(name) UNIQUE(name)
); );
CREATE TABLE books ( id INTEGER PRIMARY KEY AUTOINCREMENT, CREATE TABLE books ( id INTEGER PRIMARY KEY AUTOINCREMENT,
@ -545,4 +546,4 @@ CREATE TRIGGER series_update_trg
BEGIN BEGIN
UPDATE series SET sort=NEW.name WHERE id=NEW.id; UPDATE series SET sort=NEW.name WHERE id=NEW.id;
END; END;
pragma user_version=20; pragma user_version=21;

Binary file not shown.

View File

@ -53,6 +53,13 @@ SQLite
Put sqlite3*.h from the sqlite windows amlgamation in ~/sw/include Put sqlite3*.h from the sqlite windows amlgamation in ~/sw/include
APSW
-----
Download source from http://code.google.com/p/apsw/downloads/list and run in visual studio prompt
python setup.py fetch --all build --missing-checksum-ok --enable-all-extensions install test
OpenSSL OpenSSL
-------- --------

View File

@ -106,10 +106,12 @@ def sanitize_file_name(name, substitute='_', as_unicode=False):
name = name.encode(filesystem_encoding, 'ignore') name = name.encode(filesystem_encoding, 'ignore')
one = _filename_sanitize.sub(substitute, name) one = _filename_sanitize.sub(substitute, name)
one = re.sub(r'\s', ' ', one).strip() one = re.sub(r'\s', ' ', one).strip()
one = re.sub(r'^\.+$', '_', one) bname, ext = os.path.splitext(one)
one = re.sub(r'^\.+$', '_', bname)
if as_unicode: if as_unicode:
one = one.decode(filesystem_encoding) one = one.decode(filesystem_encoding)
one = one.replace('..', substitute) one = one.replace('..', substitute)
one += ext
# Windows doesn't like path components that end with a period # Windows doesn't like path components that end with a period
if one and one[-1] in ('.', ' '): if one and one[-1] in ('.', ' '):
one = one[:-1]+'_' one = one[:-1]+'_'
@ -132,8 +134,10 @@ def sanitize_file_name_unicode(name, substitute='_'):
name] name]
one = u''.join(chars) one = u''.join(chars)
one = re.sub(r'\s', ' ', one).strip() one = re.sub(r'\s', ' ', one).strip()
one = re.sub(r'^\.+$', '_', one) bname, ext = os.path.splitext(one)
one = re.sub(r'^\.+$', '_', bname)
one = one.replace('..', substitute) one = one.replace('..', substitute)
one += ext
# Windows doesn't like path components that end with a period or space # Windows doesn't like path components that end with a period or space
if one and one[-1] in ('.', ' '): if one and one[-1] in ('.', ' '):
one = one[:-1]+'_' one = one[:-1]+'_'

View File

@ -4,7 +4,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
__appname__ = u'calibre' __appname__ = u'calibre'
numeric_version = (0, 8, 7) numeric_version = (0, 8, 8)
__version__ = u'.'.join(map(unicode, numeric_version)) __version__ = u'.'.join(map(unicode, numeric_version))
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>" __author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"

View File

@ -611,7 +611,7 @@ from calibre.devices.teclast.driver import (TECLAST_K3, NEWSMY, IPAPYRUS,
from calibre.devices.sne.driver import SNE from calibre.devices.sne.driver import SNE
from calibre.devices.misc import (PALMPRE, AVANT, SWEEX, PDNOVEL, from calibre.devices.misc import (PALMPRE, AVANT, SWEEX, PDNOVEL,
GEMEI, VELOCITYMICRO, PDNOVEL_KOBO, LUMIREAD, ALURATEK_COLOR, GEMEI, VELOCITYMICRO, PDNOVEL_KOBO, LUMIREAD, ALURATEK_COLOR,
TREKSTOR, EEEREADER, NEXTBOOK, ADAM) TREKSTOR, EEEREADER, NEXTBOOK, ADAM, MOOVYBOOK)
from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
from calibre.devices.kobo.driver import KOBO from calibre.devices.kobo.driver import KOBO
from calibre.devices.bambook.driver import BAMBOOK from calibre.devices.bambook.driver import BAMBOOK
@ -746,6 +746,7 @@ plugins += [
EEEREADER, EEEREADER,
NEXTBOOK, NEXTBOOK,
ADAM, ADAM,
MOOVYBOOK,
ITUNES, ITUNES,
BOEYE_BEX, BOEYE_BEX,
BOEYE_BDX, BOEYE_BDX,
@ -1148,7 +1149,7 @@ plugins += [LookAndFeel, Behavior, Columns, Toolbar, Search, InputOptions,
class StoreAmazonKindleStore(StoreBase): class StoreAmazonKindleStore(StoreBase):
name = 'Amazon Kindle' name = 'Amazon Kindle'
description = u'Kindle books from Amazon.' description = u'Kindle books from Amazon.'
actual_plugin = 'calibre.gui2.store.amazon_plugin:AmazonKindleStore' actual_plugin = 'calibre.gui2.store.stores.amazon_plugin:AmazonKindleStore'
headquarters = 'US' headquarters = 'US'
formats = ['KINDLE'] formats = ['KINDLE']
@ -1158,7 +1159,7 @@ class StoreAmazonDEKindleStore(StoreBase):
name = 'Amazon DE Kindle' name = 'Amazon DE Kindle'
author = 'Charles Haley' author = 'Charles Haley'
description = u'Kindle Bücher von Amazon.' description = u'Kindle Bücher von Amazon.'
actual_plugin = 'calibre.gui2.store.amazon_de_plugin:AmazonDEKindleStore' actual_plugin = 'calibre.gui2.store.stores.amazon_de_plugin:AmazonDEKindleStore'
headquarters = 'DE' headquarters = 'DE'
formats = ['KINDLE'] formats = ['KINDLE']
@ -1168,7 +1169,7 @@ class StoreAmazonUKKindleStore(StoreBase):
name = 'Amazon UK Kindle' name = 'Amazon UK Kindle'
author = 'Charles Haley' author = 'Charles Haley'
description = u'Kindle books from Amazon\'s UK web site. Also, includes French language ebooks.' description = u'Kindle books from Amazon\'s UK web site. Also, includes French language ebooks.'
actual_plugin = 'calibre.gui2.store.amazon_uk_plugin:AmazonUKKindleStore' actual_plugin = 'calibre.gui2.store.stores.amazon_uk_plugin:AmazonUKKindleStore'
headquarters = 'UK' headquarters = 'UK'
formats = ['KINDLE'] formats = ['KINDLE']
@ -1177,7 +1178,7 @@ class StoreAmazonUKKindleStore(StoreBase):
class StoreArchiveOrgStore(StoreBase): class StoreArchiveOrgStore(StoreBase):
name = 'Archive.org' name = 'Archive.org'
description = u'An Internet library offering permanent access for researchers, historians, scholars, people with disabilities, and the general public to historical collections that exist in digital format.' description = u'An Internet library offering permanent access for researchers, historians, scholars, people with disabilities, and the general public to historical collections that exist in digital format.'
actual_plugin = 'calibre.gui2.store.archive_org_plugin:ArchiveOrgStore' actual_plugin = 'calibre.gui2.store.stores.archive_org_plugin:ArchiveOrgStore'
drm_free_only = True drm_free_only = True
headquarters = 'US' headquarters = 'US'
@ -1186,7 +1187,7 @@ class StoreArchiveOrgStore(StoreBase):
class StoreBaenWebScriptionStore(StoreBase): class StoreBaenWebScriptionStore(StoreBase):
name = 'Baen WebScription' name = 'Baen WebScription'
description = u'Sci-Fi & Fantasy brought to you by Jim Baen.' description = u'Sci-Fi & Fantasy brought to you by Jim Baen.'
actual_plugin = 'calibre.gui2.store.baen_webscription_plugin:BaenWebScriptionStore' actual_plugin = 'calibre.gui2.store.stores.baen_webscription_plugin:BaenWebScriptionStore'
drm_free_only = True drm_free_only = True
headquarters = 'US' headquarters = 'US'
@ -1195,7 +1196,7 @@ class StoreBaenWebScriptionStore(StoreBase):
class StoreBNStore(StoreBase): class StoreBNStore(StoreBase):
name = 'Barnes and Noble' name = 'Barnes and Noble'
description = u'The world\'s largest book seller. As the ultimate destination for book lovers, Barnes & Noble.com offers an incredible array of content.' description = u'The world\'s largest book seller. As the ultimate destination for book lovers, Barnes & Noble.com offers an incredible array of content.'
actual_plugin = 'calibre.gui2.store.bn_plugin:BNStore' actual_plugin = 'calibre.gui2.store.stores.bn_plugin:BNStore'
headquarters = 'US' headquarters = 'US'
formats = ['NOOK'] formats = ['NOOK']
@ -1205,7 +1206,7 @@ class StoreBeamEBooksDEStore(StoreBase):
name = 'Beam EBooks DE' name = 'Beam EBooks DE'
author = 'Charles Haley' author = 'Charles Haley'
description = u'Bei uns finden Sie: Tausende deutschsprachige eBooks; Alle eBooks ohne hartes DRM; PDF, ePub und Mobipocket Format; Sofortige Verfügbarkeit - 24 Stunden am Tag; Günstige Preise; eBooks für viele Lesegeräte, PC,Mac und Smartphones; Viele Gratis eBooks' description = u'Bei uns finden Sie: Tausende deutschsprachige eBooks; Alle eBooks ohne hartes DRM; PDF, ePub und Mobipocket Format; Sofortige Verfügbarkeit - 24 Stunden am Tag; Günstige Preise; eBooks für viele Lesegeräte, PC,Mac und Smartphones; Viele Gratis eBooks'
actual_plugin = 'calibre.gui2.store.beam_ebooks_de_plugin:BeamEBooksDEStore' actual_plugin = 'calibre.gui2.store.stores.beam_ebooks_de_plugin:BeamEBooksDEStore'
drm_free_only = True drm_free_only = True
headquarters = 'DE' headquarters = 'DE'
@ -1215,7 +1216,7 @@ class StoreBeamEBooksDEStore(StoreBase):
class StoreBeWriteStore(StoreBase): class StoreBeWriteStore(StoreBase):
name = 'BeWrite Books' name = 'BeWrite Books'
description = u'Publishers of fine books. Highly selective and editorially driven. Does not offer: books for children or exclusively YA, erotica, swords-and-sorcery fantasy and space-opera-style science fiction. All other genres are represented.' description = u'Publishers of fine books. Highly selective and editorially driven. Does not offer: books for children or exclusively YA, erotica, swords-and-sorcery fantasy and space-opera-style science fiction. All other genres are represented.'
actual_plugin = 'calibre.gui2.store.bewrite_plugin:BeWriteStore' actual_plugin = 'calibre.gui2.store.stores.bewrite_plugin:BeWriteStore'
drm_free_only = True drm_free_only = True
headquarters = 'US' headquarters = 'US'
@ -1224,7 +1225,7 @@ class StoreBeWriteStore(StoreBase):
class StoreDieselEbooksStore(StoreBase): class StoreDieselEbooksStore(StoreBase):
name = 'Diesel eBooks' name = 'Diesel eBooks'
description = u'Instant access to over 2.4 million titles from hundreds of publishers including Harlequin, HarperCollins, John Wiley & Sons, McGraw-Hill, Simon & Schuster and Random House.' description = u'Instant access to over 2.4 million titles from hundreds of publishers including Harlequin, HarperCollins, John Wiley & Sons, McGraw-Hill, Simon & Schuster and Random House.'
actual_plugin = 'calibre.gui2.store.diesel_ebooks_plugin:DieselEbooksStore' actual_plugin = 'calibre.gui2.store.stores.diesel_ebooks_plugin:DieselEbooksStore'
headquarters = 'US' headquarters = 'US'
formats = ['EPUB', 'PDF'] formats = ['EPUB', 'PDF']
@ -1233,7 +1234,7 @@ class StoreDieselEbooksStore(StoreBase):
class StoreEbookscomStore(StoreBase): class StoreEbookscomStore(StoreBase):
name = 'eBooks.com' name = 'eBooks.com'
description = u'Sells books in multiple electronic formats in all categories. Technical infrastructure is cutting edge, robust and scalable, with servers in the US and Europe.' description = u'Sells books in multiple electronic formats in all categories. Technical infrastructure is cutting edge, robust and scalable, with servers in the US and Europe.'
actual_plugin = 'calibre.gui2.store.ebooks_com_plugin:EbookscomStore' actual_plugin = 'calibre.gui2.store.stores.ebooks_com_plugin:EbookscomStore'
headquarters = 'US' headquarters = 'US'
formats = ['EPUB', 'LIT', 'MOBI', 'PDF'] formats = ['EPUB', 'LIT', 'MOBI', 'PDF']
@ -1243,7 +1244,7 @@ class StoreEPubBuyDEStore(StoreBase):
name = 'EPUBBuy DE' name = 'EPUBBuy DE'
author = 'Charles Haley' author = 'Charles Haley'
description = u'Bei EPUBBuy.com finden Sie ausschliesslich eBooks im weitverbreiteten EPUB-Format und ohne DRM. So haben Sie die freie Wahl, wo Sie Ihr eBook lesen: Tablet, eBook-Reader, Smartphone oder einfach auf Ihrem PC. So macht eBook-Lesen Spaß!' description = u'Bei EPUBBuy.com finden Sie ausschliesslich eBooks im weitverbreiteten EPUB-Format und ohne DRM. So haben Sie die freie Wahl, wo Sie Ihr eBook lesen: Tablet, eBook-Reader, Smartphone oder einfach auf Ihrem PC. So macht eBook-Lesen Spaß!'
actual_plugin = 'calibre.gui2.store.epubbuy_de_plugin:EPubBuyDEStore' actual_plugin = 'calibre.gui2.store.stores.epubbuy_de_plugin:EPubBuyDEStore'
drm_free_only = True drm_free_only = True
headquarters = 'DE' headquarters = 'DE'
@ -1254,7 +1255,7 @@ class StoreEBookShoppeUKStore(StoreBase):
name = 'ebookShoppe UK' name = 'ebookShoppe UK'
author = u'Charles Haley' author = u'Charles Haley'
description = u'We made this website in an attempt to offer the widest range of UK eBooks possible across and as many formats as we could manage.' description = u'We made this website in an attempt to offer the widest range of UK eBooks possible across and as many formats as we could manage.'
actual_plugin = 'calibre.gui2.store.ebookshoppe_uk_plugin:EBookShoppeUKStore' actual_plugin = 'calibre.gui2.store.stores.ebookshoppe_uk_plugin:EBookShoppeUKStore'
headquarters = 'UK' headquarters = 'UK'
formats = ['EPUB', 'PDF'] formats = ['EPUB', 'PDF']
@ -1263,7 +1264,7 @@ class StoreEBookShoppeUKStore(StoreBase):
class StoreEHarlequinStore(StoreBase): class StoreEHarlequinStore(StoreBase):
name = 'eHarlequin' name = 'eHarlequin'
description = u'A global leader in series romance and one of the world\'s leading publishers of books for women. Offers women a broad range of reading from romance to bestseller fiction, from young adult novels to erotic literature, from nonfiction to fantasy, from African-American novels to inspirational romance, and more.' description = u'A global leader in series romance and one of the world\'s leading publishers of books for women. Offers women a broad range of reading from romance to bestseller fiction, from young adult novels to erotic literature, from nonfiction to fantasy, from African-American novels to inspirational romance, and more.'
actual_plugin = 'calibre.gui2.store.eharlequin_plugin:EHarlequinStore' actual_plugin = 'calibre.gui2.store.stores.eharlequin_plugin:EHarlequinStore'
headquarters = 'CA' headquarters = 'CA'
formats = ['EPUB', 'PDF'] formats = ['EPUB', 'PDF']
@ -1272,7 +1273,7 @@ class StoreEHarlequinStore(StoreBase):
class StoreEpubBudStore(StoreBase): class StoreEpubBudStore(StoreBase):
name = 'ePub Bud' name = 'ePub Bud'
description = 'Well, it\'s pretty much just "YouTube for Children\'s eBooks. A not-for-profit organization devoted to brining self published childrens books to the world.' description = 'Well, it\'s pretty much just "YouTube for Children\'s eBooks. A not-for-profit organization devoted to brining self published childrens books to the world.'
actual_plugin = 'calibre.gui2.store.epubbud_plugin:EpubBudStore' actual_plugin = 'calibre.gui2.store.stores.epubbud_plugin:EpubBudStore'
drm_free_only = True drm_free_only = True
headquarters = 'US' headquarters = 'US'
@ -1281,7 +1282,7 @@ class StoreEpubBudStore(StoreBase):
class StoreFeedbooksStore(StoreBase): class StoreFeedbooksStore(StoreBase):
name = 'Feedbooks' name = 'Feedbooks'
description = u'Feedbooks is a cloud publishing and distribution service, connected to a large ecosystem of reading systems and social networks. Provides a variety of genres from independent and classic books.' description = u'Feedbooks is a cloud publishing and distribution service, connected to a large ecosystem of reading systems and social networks. Provides a variety of genres from independent and classic books.'
actual_plugin = 'calibre.gui2.store.feedbooks_plugin:FeedbooksStore' actual_plugin = 'calibre.gui2.store.stores.feedbooks_plugin:FeedbooksStore'
headquarters = 'FR' headquarters = 'FR'
formats = ['EPUB', 'MOBI', 'PDF'] formats = ['EPUB', 'MOBI', 'PDF']
@ -1290,7 +1291,7 @@ class StoreFoylesUKStore(StoreBase):
name = 'Foyles UK' name = 'Foyles UK'
author = 'Charles Haley' author = 'Charles Haley'
description = u'Foyles of London\'s ebook store. Provides extensive range covering all subjects.' description = u'Foyles of London\'s ebook store. Provides extensive range covering all subjects.'
actual_plugin = 'calibre.gui2.store.foyles_uk_plugin:FoylesUKStore' actual_plugin = 'calibre.gui2.store.stores.foyles_uk_plugin:FoylesUKStore'
headquarters = 'UK' headquarters = 'UK'
formats = ['EPUB', 'PDF'] formats = ['EPUB', 'PDF']
@ -1300,7 +1301,7 @@ class StoreGandalfStore(StoreBase):
name = 'Gandalf' name = 'Gandalf'
author = u'Tomasz Długosz' author = u'Tomasz Długosz'
description = u'Księgarnia internetowa Gandalf.' description = u'Księgarnia internetowa Gandalf.'
actual_plugin = 'calibre.gui2.store.gandalf_plugin:GandalfStore' actual_plugin = 'calibre.gui2.store.stores.gandalf_plugin:GandalfStore'
headquarters = 'PL' headquarters = 'PL'
formats = ['EPUB', 'PDF'] formats = ['EPUB', 'PDF']
@ -1308,7 +1309,7 @@ class StoreGandalfStore(StoreBase):
class StoreGoogleBooksStore(StoreBase): class StoreGoogleBooksStore(StoreBase):
name = 'Google Books' name = 'Google Books'
description = u'Google Books' description = u'Google Books'
actual_plugin = 'calibre.gui2.store.google_books_plugin:GoogleBooksStore' actual_plugin = 'calibre.gui2.store.stores.google_books_plugin:GoogleBooksStore'
headquarters = 'US' headquarters = 'US'
formats = ['EPUB', 'PDF', 'TXT'] formats = ['EPUB', 'PDF', 'TXT']
@ -1316,7 +1317,7 @@ class StoreGoogleBooksStore(StoreBase):
class StoreGutenbergStore(StoreBase): class StoreGutenbergStore(StoreBase):
name = 'Project Gutenberg' name = 'Project Gutenberg'
description = u'The first producer of free ebooks. Free in the United States because their copyright has expired. They may not be free of copyright in other countries. Readers outside of the United States must check the copyright laws of their countries before downloading or redistributing our ebooks.' description = u'The first producer of free ebooks. Free in the United States because their copyright has expired. They may not be free of copyright in other countries. Readers outside of the United States must check the copyright laws of their countries before downloading or redistributing our ebooks.'
actual_plugin = 'calibre.gui2.store.gutenberg_plugin:GutenbergStore' actual_plugin = 'calibre.gui2.store.stores.gutenberg_plugin:GutenbergStore'
drm_free_only = True drm_free_only = True
headquarters = 'US' headquarters = 'US'
@ -1325,7 +1326,7 @@ class StoreGutenbergStore(StoreBase):
class StoreKoboStore(StoreBase): class StoreKoboStore(StoreBase):
name = 'Kobo' name = 'Kobo'
description = u'With over 2.3 million eBooks to browse we have engaged readers in over 200 countries in Kobo eReading. Our eBook listings include New York Times Bestsellers, award winners, classics and more!' description = u'With over 2.3 million eBooks to browse we have engaged readers in over 200 countries in Kobo eReading. Our eBook listings include New York Times Bestsellers, award winners, classics and more!'
actual_plugin = 'calibre.gui2.store.kobo_plugin:KoboStore' actual_plugin = 'calibre.gui2.store.stores.kobo_plugin:KoboStore'
headquarters = 'CA' headquarters = 'CA'
formats = ['EPUB'] formats = ['EPUB']
@ -1335,7 +1336,7 @@ class StoreLegimiStore(StoreBase):
name = 'Legimi' name = 'Legimi'
author = u'Tomasz Długosz' author = u'Tomasz Długosz'
description = u'Tanie oraz darmowe ebooki, egazety i blogi w formacie EPUB, wprost na Twój e-czytnik, iPhone, iPad, Android i komputer' description = u'Tanie oraz darmowe ebooki, egazety i blogi w formacie EPUB, wprost na Twój e-czytnik, iPhone, iPad, Android i komputer'
actual_plugin = 'calibre.gui2.store.legimi_plugin:LegimiStore' actual_plugin = 'calibre.gui2.store.stores.legimi_plugin:LegimiStore'
headquarters = 'PL' headquarters = 'PL'
formats = ['EPUB'] formats = ['EPUB']
@ -1344,7 +1345,7 @@ class StoreLibreDEStore(StoreBase):
name = 'Libri DE' name = 'Libri DE'
author = 'Charles Haley' author = 'Charles Haley'
description = u'Sicher Bücher, Hörbücher und Downloads online bestellen.' description = u'Sicher Bücher, Hörbücher und Downloads online bestellen.'
actual_plugin = 'calibre.gui2.store.libri_de_plugin:LibreDEStore' actual_plugin = 'calibre.gui2.store.stores.libri_de_plugin:LibreDEStore'
headquarters = 'DE' headquarters = 'DE'
formats = ['EPUB', 'PDF'] formats = ['EPUB', 'PDF']
@ -1353,7 +1354,7 @@ class StoreLibreDEStore(StoreBase):
class StoreManyBooksStore(StoreBase): class StoreManyBooksStore(StoreBase):
name = 'ManyBooks' name = 'ManyBooks'
description = u'Public domain and creative commons works from many sources.' description = u'Public domain and creative commons works from many sources.'
actual_plugin = 'calibre.gui2.store.manybooks_plugin:ManyBooksStore' actual_plugin = 'calibre.gui2.store.stores.manybooks_plugin:ManyBooksStore'
drm_free_only = True drm_free_only = True
headquarters = 'US' headquarters = 'US'
@ -1362,7 +1363,7 @@ class StoreManyBooksStore(StoreBase):
class StoreMobileReadStore(StoreBase): class StoreMobileReadStore(StoreBase):
name = 'MobileRead' name = 'MobileRead'
description = u'Ebooks handcrafted with the utmost care.' description = u'Ebooks handcrafted with the utmost care.'
actual_plugin = 'calibre.gui2.store.mobileread.mobileread_plugin:MobileReadStore' actual_plugin = 'calibre.gui2.store.stores.mobileread.mobileread_plugin:MobileReadStore'
drm_free_only = True drm_free_only = True
headquarters = 'CH' headquarters = 'CH'
@ -1372,7 +1373,7 @@ class StoreNextoStore(StoreBase):
name = 'Nexto' name = 'Nexto'
author = u'Tomasz Długosz' author = u'Tomasz Długosz'
description = u'Największy w Polsce sklep internetowy z audiobookami mp3, ebookami pdf oraz prasą do pobrania on-line.' description = u'Największy w Polsce sklep internetowy z audiobookami mp3, ebookami pdf oraz prasą do pobrania on-line.'
actual_plugin = 'calibre.gui2.store.nexto_plugin:NextoStore' actual_plugin = 'calibre.gui2.store.stores.nexto_plugin:NextoStore'
headquarters = 'PL' headquarters = 'PL'
formats = ['EPUB', 'PDF'] formats = ['EPUB', 'PDF']
@ -1381,24 +1382,15 @@ class StoreNextoStore(StoreBase):
class StoreOpenBooksStore(StoreBase): class StoreOpenBooksStore(StoreBase):
name = 'Open Books' name = 'Open Books'
description = u'Comprehensive listing of DRM free ebooks from a variety of sources provided by users of calibre.' description = u'Comprehensive listing of DRM free ebooks from a variety of sources provided by users of calibre.'
actual_plugin = 'calibre.gui2.store.open_books_plugin:OpenBooksStore' actual_plugin = 'calibre.gui2.store.stores.open_books_plugin:OpenBooksStore'
drm_free_only = True drm_free_only = True
headquarters = 'US' headquarters = 'US'
class StoreOpenLibraryStore(StoreBase):
name = 'Open Library'
description = u'One web page for every book ever published. The goal is to be a true online library. Over 20 million records from a variety of large catalogs as well as single contributions, with more on the way.'
actual_plugin = 'calibre.gui2.store.open_library_plugin:OpenLibraryStore'
drm_free_only = True
headquarters = 'US'
formats = ['DAISY', 'DJVU', 'EPUB', 'MOBI', 'PDF', 'TXT']
class StoreOReillyStore(StoreBase): class StoreOReillyStore(StoreBase):
name = 'OReilly' name = 'OReilly'
description = u'Programming and tech ebooks from OReilly.' description = u'Programming and tech ebooks from OReilly.'
actual_plugin = 'calibre.gui2.store.oreilly_plugin:OReillyStore' actual_plugin = 'calibre.gui2.store.stores.oreilly_plugin:OReillyStore'
drm_free_only = True drm_free_only = True
headquarters = 'US' headquarters = 'US'
@ -1407,7 +1399,7 @@ class StoreOReillyStore(StoreBase):
class StorePragmaticBookshelfStore(StoreBase): class StorePragmaticBookshelfStore(StoreBase):
name = 'Pragmatic Bookshelf' name = 'Pragmatic Bookshelf'
description = u'The Pragmatic Bookshelf\'s collection of programming and tech books avaliable as ebooks.' description = u'The Pragmatic Bookshelf\'s collection of programming and tech books avaliable as ebooks.'
actual_plugin = 'calibre.gui2.store.pragmatic_bookshelf_plugin:PragmaticBookshelfStore' actual_plugin = 'calibre.gui2.store.stores.pragmatic_bookshelf_plugin:PragmaticBookshelfStore'
drm_free_only = True drm_free_only = True
headquarters = 'US' headquarters = 'US'
@ -1416,7 +1408,7 @@ class StorePragmaticBookshelfStore(StoreBase):
class StoreSmashwordsStore(StoreBase): class StoreSmashwordsStore(StoreBase):
name = 'Smashwords' name = 'Smashwords'
description = u'An ebook publishing and distribution platform for ebook authors, publishers and readers. Covers many genres and formats.' description = u'An ebook publishing and distribution platform for ebook authors, publishers and readers. Covers many genres and formats.'
actual_plugin = 'calibre.gui2.store.smashwords_plugin:SmashwordsStore' actual_plugin = 'calibre.gui2.store.stores.smashwords_plugin:SmashwordsStore'
drm_free_only = True drm_free_only = True
headquarters = 'US' headquarters = 'US'
@ -1427,7 +1419,7 @@ class StoreVirtualoStore(StoreBase):
name = 'Virtualo' name = 'Virtualo'
author = u'Tomasz Długosz' author = u'Tomasz Długosz'
description = u'Księgarnia internetowa, która oferuje bezpieczny i szeroki dostęp do książek w formie cyfrowej.' description = u'Księgarnia internetowa, która oferuje bezpieczny i szeroki dostęp do książek w formie cyfrowej.'
actual_plugin = 'calibre.gui2.store.virtualo_plugin:VirtualoStore' actual_plugin = 'calibre.gui2.store.stores.virtualo_plugin:VirtualoStore'
headquarters = 'PL' headquarters = 'PL'
formats = ['EPUB', 'PDF'] formats = ['EPUB', 'PDF']
@ -1436,7 +1428,7 @@ class StoreWaterstonesUKStore(StoreBase):
name = 'Waterstones UK' name = 'Waterstones UK'
author = 'Charles Haley' author = 'Charles Haley'
description = u'Waterstone\'s mission is to be the leading Bookseller on the High Street and online providing customers the widest choice, great value and expert advice from a team passionate about Bookselling.' description = u'Waterstone\'s mission is to be the leading Bookseller on the High Street and online providing customers the widest choice, great value and expert advice from a team passionate about Bookselling.'
actual_plugin = 'calibre.gui2.store.waterstones_uk_plugin:WaterstonesUKStore' actual_plugin = 'calibre.gui2.store.stores.waterstones_uk_plugin:WaterstonesUKStore'
headquarters = 'UK' headquarters = 'UK'
formats = ['EPUB', 'PDF'] formats = ['EPUB', 'PDF']
@ -1444,7 +1436,7 @@ class StoreWaterstonesUKStore(StoreBase):
class StoreWeightlessBooksStore(StoreBase): class StoreWeightlessBooksStore(StoreBase):
name = 'Weightless Books' name = 'Weightless Books'
description = u'An independent DRM-free ebooksite devoted to ebooks of all sorts.' description = u'An independent DRM-free ebooksite devoted to ebooks of all sorts.'
actual_plugin = 'calibre.gui2.store.weightless_books_plugin:WeightlessBooksStore' actual_plugin = 'calibre.gui2.store.stores.weightless_books_plugin:WeightlessBooksStore'
drm_free_only = True drm_free_only = True
headquarters = 'US' headquarters = 'US'
@ -1454,7 +1446,7 @@ class StoreWHSmithUKStore(StoreBase):
name = 'WH Smith UK' name = 'WH Smith UK'
author = 'Charles Haley' author = 'Charles Haley'
description = u"Shop for savings on Books, discounted Magazine subscriptions and great prices on Stationery, Toys & Games" description = u"Shop for savings on Books, discounted Magazine subscriptions and great prices on Stationery, Toys & Games"
actual_plugin = 'calibre.gui2.store.whsmith_uk_plugin:WHSmithUKStore' actual_plugin = 'calibre.gui2.store.stores.whsmith_uk_plugin:WHSmithUKStore'
headquarters = 'UK' headquarters = 'UK'
formats = ['EPUB', 'PDF'] formats = ['EPUB', 'PDF']
@ -1462,7 +1454,7 @@ class StoreWHSmithUKStore(StoreBase):
class StoreWizardsTowerBooksStore(StoreBase): class StoreWizardsTowerBooksStore(StoreBase):
name = 'Wizards Tower Books' name = 'Wizards Tower Books'
description = u'A science fiction and fantasy publisher. Concentrates mainly on making out-of-print works available once more as e-books, and helping other small presses exploit the e-book market. Also publishes a small number of limited-print-run anthologies with a view to encouraging diversity in the science fiction and fantasy field.' description = u'A science fiction and fantasy publisher. Concentrates mainly on making out-of-print works available once more as e-books, and helping other small presses exploit the e-book market. Also publishes a small number of limited-print-run anthologies with a view to encouraging diversity in the science fiction and fantasy field.'
actual_plugin = 'calibre.gui2.store.wizards_tower_books_plugin:WizardsTowerBooksStore' actual_plugin = 'calibre.gui2.store.stores.wizards_tower_books_plugin:WizardsTowerBooksStore'
drm_free_only = True drm_free_only = True
headquarters = 'UK' headquarters = 'UK'
@ -1472,7 +1464,7 @@ class StoreWoblinkStore(StoreBase):
name = 'Woblink' name = 'Woblink'
author = u'Tomasz Długosz' author = u'Tomasz Długosz'
description = u'Czytanie zdarza się wszędzie!' description = u'Czytanie zdarza się wszędzie!'
actual_plugin = 'calibre.gui2.store.woblink_plugin:WoblinkStore' actual_plugin = 'calibre.gui2.store.stores.woblink_plugin:WoblinkStore'
headquarters = 'PL' headquarters = 'PL'
formats = ['EPUB'] formats = ['EPUB']
@ -1481,7 +1473,7 @@ class StoreZixoStore(StoreBase):
name = 'Zixo' name = 'Zixo'
author = u'Tomasz Długosz' author = u'Tomasz Długosz'
description = u'Księgarnia z ebookami oraz książkami audio. Aby otwierać książki w formacie Zixo należy zainstalować program dostępny na stronie księgarni. Umożliwia on m.in. dodawanie zakładek i dostosowywanie rozmiaru czcionki.' description = u'Księgarnia z ebookami oraz książkami audio. Aby otwierać książki w formacie Zixo należy zainstalować program dostępny na stronie księgarni. Umożliwia on m.in. dodawanie zakładek i dostosowywanie rozmiaru czcionki.'
actual_plugin = 'calibre.gui2.store.zixo_plugin:ZixoStore' actual_plugin = 'calibre.gui2.store.stores.zixo_plugin:ZixoStore'
headquarters = 'PL' headquarters = 'PL'
formats = ['PDF, ZIXO'] formats = ['PDF, ZIXO']
@ -1513,7 +1505,6 @@ plugins += [
StoreMobileReadStore, StoreMobileReadStore,
StoreNextoStore, StoreNextoStore,
StoreOpenBooksStore, StoreOpenBooksStore,
StoreOpenLibraryStore,
StoreOReillyStore, StoreOReillyStore,
StorePragmaticBookshelfStore, StorePragmaticBookshelfStore,
StoreSmashwordsStore, StoreSmashwordsStore,

View File

@ -63,5 +63,4 @@ Various things that require other things before they can be migrated:
columns/categories/searches info into columns/categories/searches info into
self.field_metadata. Finally, implement metadata dirtied self.field_metadata. Finally, implement metadata dirtied
functionality. functionality.
''' '''

View File

@ -17,12 +17,13 @@ from calibre import isbytestring, force_unicode, prints
from calibre.constants import (iswindows, filesystem_encoding, from calibre.constants import (iswindows, filesystem_encoding,
preferred_encoding) preferred_encoding)
from calibre.ptempfile import PersistentTemporaryFile from calibre.ptempfile import PersistentTemporaryFile
from calibre.library.schema_upgrades import SchemaUpgrade from calibre.db.schema_upgrades import SchemaUpgrade
from calibre.library.field_metadata import FieldMetadata from calibre.library.field_metadata import FieldMetadata
from calibre.ebooks.metadata import title_sort, author_to_author_sort from calibre.ebooks.metadata import title_sort, author_to_author_sort
from calibre.utils.icu import strcmp from calibre.utils.icu import strcmp
from calibre.utils.config import to_json, from_json, prefs, tweaks from calibre.utils.config import to_json, from_json, prefs, tweaks
from calibre.utils.date import utcfromtimestamp from calibre.utils.date import utcfromtimestamp, parse_date
from calibre.utils.filenames import is_case_sensitive
from calibre.db.tables import (OneToOneTable, ManyToOneTable, ManyToManyTable, from calibre.db.tables import (OneToOneTable, ManyToOneTable, ManyToManyTable,
SizeTable, FormatsTable, AuthorsTable, IdentifiersTable) SizeTable, FormatsTable, AuthorsTable, IdentifiersTable)
# }}} # }}}
@ -30,7 +31,9 @@ from calibre.db.tables import (OneToOneTable, ManyToOneTable, ManyToManyTable,
''' '''
Differences in semantics from pysqlite: Differences in semantics from pysqlite:
1. execute/executemany/executescript operate in autocommit mode 1. execute/executemany operate in autocommit mode
2. There is no fetchone() method on cursor objects, instead use next()
3. There is no executescript
''' '''
@ -119,6 +122,66 @@ def icu_collator(s1, s2):
return strcmp(force_unicode(s1, 'utf-8'), force_unicode(s2, 'utf-8')) return strcmp(force_unicode(s1, 'utf-8'), force_unicode(s2, 'utf-8'))
# }}} # }}}
# Unused aggregators {{{
def Concatenate(sep=','):
'''String concatenation aggregator for sqlite'''
def step(ctxt, value):
if value is not None:
ctxt.append(value)
def finalize(ctxt):
if not ctxt:
return None
return sep.join(ctxt)
return ([], step, finalize)
def SortedConcatenate(sep=','):
'''String concatenation aggregator for sqlite, sorted by supplied index'''
def step(ctxt, ndx, value):
if value is not None:
ctxt[ndx] = value
def finalize(ctxt):
if len(ctxt) == 0:
return None
return sep.join(map(ctxt.get, sorted(ctxt.iterkeys())))
return ({}, step, finalize)
def IdentifiersConcat():
'''String concatenation aggregator for the identifiers map'''
def step(ctxt, key, val):
ctxt.append(u'%s:%s'%(key, val))
def finalize(ctxt):
return ','.join(ctxt)
return ([], step, finalize)
def AumSortedConcatenate():
'''String concatenation aggregator for the author sort map'''
def step(ctxt, ndx, author, sort, link):
if author is not None:
ctxt[ndx] = ':::'.join((author, sort, link))
def finalize(ctxt):
keys = list(ctxt.iterkeys())
l = len(keys)
if l == 0:
return None
if l == 1:
return ctxt[keys[0]]
return ':#:'.join([ctxt[v] for v in sorted(keys)])
return ({}, step, finalize)
# }}}
class Connection(apsw.Connection): # {{{ class Connection(apsw.Connection): # {{{
BUSY_TIMEOUT = 2000 # milliseconds BUSY_TIMEOUT = 2000 # milliseconds
@ -128,32 +191,46 @@ class Connection(apsw.Connection): # {{{
self.setbusytimeout(self.BUSY_TIMEOUT) self.setbusytimeout(self.BUSY_TIMEOUT)
self.execute('pragma cache_size=5000') self.execute('pragma cache_size=5000')
self.conn.execute('pragma temp_store=2') self.execute('pragma temp_store=2')
encoding = self.execute('pragma encoding').fetchone()[0] encoding = self.execute('pragma encoding').next()[0]
self.conn.create_collation('PYNOCASE', partial(pynocase, self.createcollation('PYNOCASE', partial(pynocase,
encoding=encoding)) encoding=encoding))
self.conn.create_function('title_sort', 1, title_sort) self.createscalarfunction('title_sort', title_sort, 1)
self.conn.create_function('author_to_author_sort', 1, self.createscalarfunction('author_to_author_sort',
_author_to_author_sort) _author_to_author_sort, 1)
self.createscalarfunction('uuid4', lambda : str(uuid.uuid4()),
self.conn.create_function('uuid4', 0, lambda : str(uuid.uuid4())) 0)
# Dummy functions for dynamically created filters # Dummy functions for dynamically created filters
self.conn.create_function('books_list_filter', 1, lambda x: 1) self.createscalarfunction('books_list_filter', lambda x: 1, 1)
self.conn.create_collation('icucollate', icu_collator) self.createcollation('icucollate', icu_collator)
# Legacy aggregators (never used) but present for backwards compat
self.createaggregatefunction('sortconcat', SortedConcatenate, 2)
self.createaggregatefunction('sortconcat_bar',
partial(SortedConcatenate, sep='|'), 2)
self.createaggregatefunction('sortconcat_amper',
partial(SortedConcatenate, sep='&'), 2)
self.createaggregatefunction('identifiers_concat',
IdentifiersConcat, 2)
self.createaggregatefunction('concat', Concatenate, 1)
self.createaggregatefunction('aum_sortconcat',
AumSortedConcatenate, 4)
def create_dynamic_filter(self, name): def create_dynamic_filter(self, name):
f = DynamicFilter(name) f = DynamicFilter(name)
self.conn.create_function(name, 1, f) self.createscalarfunction(name, f, 1)
def get(self, *args, **kw): def get(self, *args, **kw):
ans = self.cursor().execute(*args) ans = self.cursor().execute(*args)
if kw.get('all', True): if kw.get('all', True):
return ans.fetchall() return ans.fetchall()
for row in ans: try:
return ans[0] return ans.next()[0]
except (StopIteration, IndexError):
return None
def execute(self, sql, bindings=None): def execute(self, sql, bindings=None):
cursor = self.cursor() cursor = self.cursor()
@ -162,14 +239,9 @@ class Connection(apsw.Connection): # {{{
def executemany(self, sql, sequence_of_bindings): def executemany(self, sql, sequence_of_bindings):
return self.cursor().executemany(sql, sequence_of_bindings) return self.cursor().executemany(sql, sequence_of_bindings)
def executescript(self, sql):
with self:
# Use an explicit savepoint so that even if this is called
# while a transaction is active, it is atomic
return self.cursor().execute(sql)
# }}} # }}}
class DB(object, SchemaUpgrade): class DB(object):
PATH_LIMIT = 40 if iswindows else 100 PATH_LIMIT = 40 if iswindows else 100
WINDOWS_LIBRARY_PATH_LIMIT = 75 WINDOWS_LIBRARY_PATH_LIMIT = 75
@ -213,25 +285,24 @@ class DB(object, SchemaUpgrade):
shutil.copyfile(self.dbpath, pt.name) shutil.copyfile(self.dbpath, pt.name)
self.dbpath = pt.name self.dbpath = pt.name
self.is_case_sensitive = (not iswindows and if not os.path.exists(os.path.dirname(self.dbpath)):
not os.path.exists(self.dbpath.replace('metadata.db', os.makedirs(os.path.dirname(self.dbpath))
'MeTAdAtA.dB')))
self._conn = None self._conn = None
if self.user_version == 0: if self.user_version == 0:
self.initialize_database() self.initialize_database()
with self.conn: if not os.path.exists(self.library_path):
SchemaUpgrade.__init__(self) os.makedirs(self.library_path)
self.is_case_sensitive = is_case_sensitive(self.library_path)
SchemaUpgrade(self.conn, self.library_path, self.field_metadata)
# Guarantee that the library_id is set # Guarantee that the library_id is set
self.library_id self.library_id
self.initialize_prefs(default_prefs)
# Fix legacy triggers and columns # Fix legacy triggers and columns
self.conn.executescript(''' self.conn.execute('''
DROP TRIGGER IF EXISTS author_insert_trg; DROP TRIGGER IF EXISTS author_insert_trg;
CREATE TEMP TRIGGER author_insert_trg CREATE TEMP TRIGGER author_insert_trg
AFTER INSERT ON authors AFTER INSERT ON authors
@ -248,7 +319,11 @@ class DB(object, SchemaUpgrade):
UPDATE authors SET sort=author_to_author_sort(name) WHERE sort IS NULL; UPDATE authors SET sort=author_to_author_sort(name) WHERE sort IS NULL;
''') ''')
def initialize_prefs(self, default_prefs): self.initialize_prefs(default_prefs)
self.initialize_custom_columns()
self.initialize_tables()
def initialize_prefs(self, default_prefs): # {{{
self.prefs = DBPrefs(self) self.prefs = DBPrefs(self)
if default_prefs is not None and not self._exists: if default_prefs is not None and not self._exists:
@ -339,15 +414,236 @@ class DB(object, SchemaUpgrade):
cats_changed = True cats_changed = True
if cats_changed: if cats_changed:
self.prefs.set('user_categories', user_cats) self.prefs.set('user_categories', user_cats)
# }}}
def initialize_custom_columns(self): # {{{
with self.conn:
# Delete previously marked custom columns
for record in self.conn.get(
'SELECT id FROM custom_columns WHERE mark_for_delete=1'):
num = record[0]
table, lt = self.custom_table_names(num)
self.conn.execute('''\
DROP INDEX IF EXISTS {table}_idx;
DROP INDEX IF EXISTS {lt}_aidx;
DROP INDEX IF EXISTS {lt}_bidx;
DROP TRIGGER IF EXISTS fkc_update_{lt}_a;
DROP TRIGGER IF EXISTS fkc_update_{lt}_b;
DROP TRIGGER IF EXISTS fkc_insert_{lt};
DROP TRIGGER IF EXISTS fkc_delete_{lt};
DROP TRIGGER IF EXISTS fkc_insert_{table};
DROP TRIGGER IF EXISTS fkc_delete_{table};
DROP VIEW IF EXISTS tag_browser_{table};
DROP VIEW IF EXISTS tag_browser_filtered_{table};
DROP TABLE IF EXISTS {table};
DROP TABLE IF EXISTS {lt};
'''.format(table=table, lt=lt)
)
self.conn.execute('DELETE FROM custom_columns WHERE mark_for_delete=1')
# Load metadata for custom columns
self.custom_column_label_map, self.custom_column_num_map = {}, {}
triggers = []
remove = []
custom_tables = self.custom_tables
for record in self.conn.get(
'SELECT label,name,datatype,editable,display,normalized,id,is_multiple FROM custom_columns'):
data = {
'label':record[0],
'name':record[1],
'datatype':record[2],
'editable':bool(record[3]),
'display':json.loads(record[4]),
'normalized':bool(record[5]),
'num':record[6],
'is_multiple':bool(record[7]),
}
if data['display'] is None:
data['display'] = {}
# set up the is_multiple separator dict
if data['is_multiple']:
if data['display'].get('is_names', False):
seps = {'cache_to_list': '|', 'ui_to_list': '&', 'list_to_ui': ' & '}
elif data['datatype'] == 'composite':
seps = {'cache_to_list': ',', 'ui_to_list': ',', 'list_to_ui': ', '}
else:
seps = {'cache_to_list': '|', 'ui_to_list': ',', 'list_to_ui': ', '}
else:
seps = {}
data['multiple_seps'] = seps
table, lt = self.custom_table_names(data['num'])
if table not in custom_tables or (data['normalized'] and lt not in
custom_tables):
remove.append(data)
continue
self.custom_column_label_map[data['label']] = data['num']
self.custom_column_num_map[data['num']] = \
self.custom_column_label_map[data['label']] = data
# Create Foreign Key triggers
if data['normalized']:
trigger = 'DELETE FROM %s WHERE book=OLD.id;'%lt
else:
trigger = 'DELETE FROM %s WHERE book=OLD.id;'%table
triggers.append(trigger)
if remove:
with self.conn:
for data in remove:
prints('WARNING: Custom column %r not found, removing.' %
data['label'])
self.conn.execute('DELETE FROM custom_columns WHERE id=?',
(data['num'],))
if triggers:
with self.conn:
self.conn.execute('''\
CREATE TEMP TRIGGER custom_books_delete_trg
AFTER DELETE ON books
BEGIN
%s
END;
'''%(' \n'.join(triggers)))
# Setup data adapters
def adapt_text(x, d):
if d['is_multiple']:
if x is None:
return []
if isinstance(x, (str, unicode, bytes)):
x = x.split(d['multiple_seps']['ui_to_list'])
x = [y.strip() for y in x if y.strip()]
x = [y.decode(preferred_encoding, 'replace') if not isinstance(y,
unicode) else y for y in x]
return [u' '.join(y.split()) for y in x]
else:
return x if x is None or isinstance(x, unicode) else \
x.decode(preferred_encoding, 'replace')
def adapt_datetime(x, d):
if isinstance(x, (str, unicode, bytes)):
x = parse_date(x, assume_utc=False, as_utc=False)
return x
def adapt_bool(x, d):
if isinstance(x, (str, unicode, bytes)):
x = x.lower()
if x == 'true':
x = True
elif x == 'false':
x = False
elif x == 'none':
x = None
else:
x = bool(int(x))
return x
def adapt_enum(x, d):
v = adapt_text(x, d)
if not v:
v = None
return v
def adapt_number(x, d):
if x is None:
return None
if isinstance(x, (str, unicode, bytes)):
if x.lower() == 'none':
return None
if d['datatype'] == 'int':
return int(x)
return float(x)
self.custom_data_adapters = {
'float': adapt_number,
'int': adapt_number,
'rating':lambda x,d : x if x is None else min(10., max(0., float(x))),
'bool': adapt_bool,
'comments': lambda x,d: adapt_text(x, {'is_multiple':False}),
'datetime' : adapt_datetime,
'text':adapt_text,
'series':adapt_text,
'enumeration': adapt_enum
}
# Create Tag Browser categories for custom columns
for k in sorted(self.custom_column_label_map.iterkeys()):
v = self.custom_column_label_map[k]
if v['normalized']:
is_category = True
else:
is_category = False
is_m = v['multiple_seps']
tn = 'custom_column_{0}'.format(v['num'])
self.field_metadata.add_custom_field(label=v['label'],
table=tn, column='value', datatype=v['datatype'],
colnum=v['num'], name=v['name'], display=v['display'],
is_multiple=is_m, is_category=is_category,
is_editable=v['editable'], is_csp=False)
# }}}
def initialize_tables(self): # {{{
tables = self.tables = {}
for col in ('title', 'sort', 'author_sort', 'series_index', 'comments',
'timestamp', 'pubdate', 'uuid', 'path', 'cover',
'last_modified'):
metadata = self.field_metadata[col].copy()
if col == 'comments':
metadata['table'], metadata['column'] = 'comments', 'text'
if not metadata['table']:
metadata['table'], metadata['column'] = 'books', ('has_cover'
if col == 'cover' else col)
if not metadata['column']:
metadata['column'] = col
tables[col] = OneToOneTable(col, metadata)
for col in ('series', 'publisher', 'rating'):
tables[col] = ManyToOneTable(col, self.field_metadata[col].copy())
for col in ('authors', 'tags', 'formats', 'identifiers'):
cls = {
'authors':AuthorsTable,
'formats':FormatsTable,
'identifiers':IdentifiersTable,
}.get(col, ManyToManyTable)
tables[col] = cls(col, self.field_metadata[col].copy())
tables['size'] = SizeTable('size', self.field_metadata['size'].copy())
for label, data in self.custom_column_label_map.iteritems():
label = '#' + label
metadata = self.field_metadata[label].copy()
link_table = self.custom_table_names(data['num'])[1]
if data['normalized']:
if metadata['is_multiple']:
tables[label] = ManyToManyTable(label, metadata,
link_table=link_table)
else:
tables[label] = ManyToOneTable(label, metadata,
link_table=link_table)
if metadata['datatype'] == 'series':
# Create series index table
label += '_index'
metadata = self.field_metadata[label].copy()
metadata['column'] = 'extra'
metadata['table'] = link_table
tables[label] = OneToOneTable(label, metadata)
else:
tables[label] = OneToOneTable(label, metadata)
# }}}
@property @property
def conn(self): def conn(self):
if self._conn is None: if self._conn is None:
self._conn = apsw.Connection(self.dbpath) self._conn = Connection(self.dbpath)
if self._exists and self.user_version == 0: if self._exists and self.user_version == 0:
self._conn.close() self._conn.close()
os.remove(self.dbpath) os.remove(self.dbpath)
self._conn = apsw.Connection(self.dbpath) self._conn = Connection(self.dbpath)
return self._conn return self._conn
@dynamic_property @dynamic_property
@ -365,13 +661,29 @@ class DB(object, SchemaUpgrade):
def initialize_database(self): def initialize_database(self):
metadata_sqlite = P('metadata_sqlite.sql', data=True, metadata_sqlite = P('metadata_sqlite.sql', data=True,
allow_user_override=False).decode('utf-8') allow_user_override=False).decode('utf-8')
self.conn.executescript(metadata_sqlite) cur = self.conn.cursor()
cur.execute('BEGIN EXCLUSIVE TRANSACTION')
try:
cur.execute(metadata_sqlite)
except:
cur.execute('ROLLBACK')
else:
cur.execute('COMMIT')
if self.user_version == 0: if self.user_version == 0:
self.user_version = 1 self.user_version = 1
# }}} # }}}
# Database layer API {{{ # Database layer API {{{
def custom_table_names(self, num):
return 'custom_column_%d'%num, 'books_custom_column_%d_link'%num
@property
def custom_tables(self):
return set([x[0] for x in self.conn.get(
'SELECT name FROM sqlite_master WHERE type="table" AND '
'(name GLOB "custom_column_*" OR name GLOB "books_custom_column_*")')])
@classmethod @classmethod
def exists_at(cls, path): def exists_at(cls, path):
return path and os.path.exists(os.path.join(path, 'metadata.db')) return path and os.path.exists(os.path.join(path, 'metadata.db'))
@ -396,7 +708,7 @@ class DB(object, SchemaUpgrade):
self.conn.execute(''' self.conn.execute('''
DELETE FROM library_id; DELETE FROM library_id;
INSERT INTO library_id (uuid) VALUES (?); INSERT INTO library_id (uuid) VALUES (?);
''', self._library_id_) ''', (self._library_id_,))
return property(doc=doc, fget=fget, fset=fset) return property(doc=doc, fget=fget, fset=fset)
@ -405,39 +717,20 @@ class DB(object, SchemaUpgrade):
return utcfromtimestamp(os.stat(self.dbpath).st_mtime) return utcfromtimestamp(os.stat(self.dbpath).st_mtime)
def read_tables(self): def read_tables(self):
tables = {} '''
for col in ('title', 'sort', 'author_sort', 'series_index', 'comments', Read all data from the db into the python in-memory tables
'timestamp', 'published', 'uuid', 'path', 'cover', '''
'last_modified'):
metadata = self.field_metadata[col].copy()
if metadata['table'] is None:
metadata['table'], metadata['column'] == 'books', ('has_cover'
if col == 'cover' else col)
tables[col] = OneToOneTable(col, metadata)
for col in ('series', 'publisher', 'rating'):
tables[col] = ManyToOneTable(col, self.field_metadata[col].copy())
for col in ('authors', 'tags', 'formats', 'identifiers'):
cls = {
'authors':AuthorsTable,
'formats':FormatsTable,
'identifiers':IdentifiersTable,
}.get(col, ManyToManyTable)
tables[col] = cls(col, self.field_metadata[col].copy())
tables['size'] = SizeTable('size', self.field_metadata['size'].copy())
with self.conn: # Use a single transaction, to ensure nothing modifies with self.conn: # Use a single transaction, to ensure nothing modifies
# the db while we are reading # the db while we are reading
for table in tables.itervalues(): for table in self.tables.itervalues():
try: try:
table.read() table.read(self)
except: except:
prints('Failed to read table:', table.name) prints('Failed to read table:', table.name)
import pprint
pprint.pprint(table.metadata)
raise raise
return tables
# }}} # }}}

11
src/calibre/db/cache.py Normal file
View File

@ -0,0 +1,11 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'

331
src/calibre/db/locking.py Normal file
View File

@ -0,0 +1,331 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from threading import Lock, Condition, current_thread
class LockingError(RuntimeError):
pass
def create_locks():
'''
Return a pair of locks: (read_lock, write_lock)
The read_lock can be acquired by multiple threads simultaneously, it can
also be acquired multiple times by the same thread.
Only one thread can hold write_lock at a time, and only if there are no
current read_locks. While the write_lock is held no
other threads can acquire read locks. The write_lock can also be acquired
multiple times by the same thread.
Both read_lock and write_lock are meant to be used in with statements (they
operate on a single underlying lock.
WARNING: Be very careful to not try to acquire a read lock while the same
thread holds a write lock and vice versa. That is, a given thread should
always release *all* locks of type A before trying to acquire a lock of type
B. Bad things will happen if you violate this rule, the most benign of
which is the raising of a LockingError (I haven't been able to eliminate
the possibility of deadlocking in this scenario).
'''
l = SHLock()
return RWLockWrapper(l), RWLockWrapper(l, is_shared=False)
class SHLock(object):
'''
Shareable lock class. Used to implement the Multiple readers-single writer
paradigm. As best as I can tell, neither writer nor reader starvation
should be possible.
Based on code from: https://github.com/rfk/threading2
'''
def __init__(self):
self._lock = Lock()
# When a shared lock is held, is_shared will give the cumulative
# number of locks and _shared_owners maps each owning thread to
# the number of locks is holds.
self.is_shared = 0
self._shared_owners = {}
# When an exclusive lock is held, is_exclusive will give the number
# of locks held and _exclusive_owner will give the owning thread
self.is_exclusive = 0
self._exclusive_owner = None
# When someone is forced to wait for a lock, they add themselves
# to one of these queues along with a "waiter" condition that
# is used to wake them up.
self._shared_queue = []
self._exclusive_queue = []
# This is for recycling waiter objects.
self._free_waiters = []
def acquire(self, blocking=True, shared=False):
'''
Acquire the lock in shared or exclusive mode.
If blocking is False this method will return False if acquiring the
lock failed.
'''
with self._lock:
if shared:
return self._acquire_shared(blocking)
else:
return self._acquire_exclusive(blocking)
assert not (self.is_shared and self.is_exclusive)
def release(self):
''' Release the lock. '''
# This decrements the appropriate lock counters, and if the lock
# becomes free, it looks for a queued thread to hand it off to.
# By doing the handoff here we ensure fairness.
me = current_thread()
with self._lock:
if self.is_exclusive:
if self._exclusive_owner is not me:
raise LockingError("release() called on unheld lock")
self.is_exclusive -= 1
if not self.is_exclusive:
self._exclusive_owner = None
# If there are waiting shared locks, issue them
# all and them wake everyone up.
if self._shared_queue:
for (thread, waiter) in self._shared_queue:
self.is_shared += 1
self._shared_owners[thread] = 1
waiter.notify()
del self._shared_queue[:]
# Otherwise, if there are waiting exclusive locks,
# they get first dibbs on the lock.
elif self._exclusive_queue:
(thread, waiter) = self._exclusive_queue.pop(0)
self._exclusive_owner = thread
self.is_exclusive += 1
waiter.notify()
elif self.is_shared:
try:
self._shared_owners[me] -= 1
if self._shared_owners[me] == 0:
del self._shared_owners[me]
except KeyError:
raise LockingError("release() called on unheld lock")
self.is_shared -= 1
if not self.is_shared:
# If there are waiting exclusive locks,
# they get first dibbs on the lock.
if self._exclusive_queue:
(thread, waiter) = self._exclusive_queue.pop(0)
self._exclusive_owner = thread
self.is_exclusive += 1
waiter.notify()
else:
assert not self._shared_queue
else:
raise LockingError("release() called on unheld lock")
def _acquire_shared(self, blocking=True):
me = current_thread()
# Each case: acquiring a lock we already hold.
if self.is_shared and me in self._shared_owners:
self.is_shared += 1
self._shared_owners[me] += 1
return True
# If the lock is already spoken for by an exclusive, add us
# to the shared queue and it will give us the lock eventually.
if self.is_exclusive or self._exclusive_queue:
if self._exclusive_owner is me:
raise LockingError("can't downgrade SHLock object")
if not blocking:
return False
waiter = self._take_waiter()
try:
self._shared_queue.append((me, waiter))
waiter.wait()
assert not self.is_exclusive
finally:
self._return_waiter(waiter)
else:
self.is_shared += 1
self._shared_owners[me] = 1
return True
def _acquire_exclusive(self, blocking=True):
me = current_thread()
# Each case: acquiring a lock we already hold.
if self._exclusive_owner is me:
assert self.is_exclusive
self.is_exclusive += 1
return True
# Do not allow upgrade of lock
if self.is_shared and me in self._shared_owners:
raise LockingError("can't upgrade SHLock object")
# If the lock is already spoken for, add us to the exclusive queue.
# This will eventually give us the lock when it's our turn.
if self.is_shared or self.is_exclusive:
if not blocking:
return False
waiter = self._take_waiter()
try:
self._exclusive_queue.append((me, waiter))
waiter.wait()
finally:
self._return_waiter(waiter)
else:
self._exclusive_owner = me
self.is_exclusive += 1
return True
def _take_waiter(self):
try:
return self._free_waiters.pop()
except IndexError:
return Condition(self._lock)#, verbose=True)
def _return_waiter(self, waiter):
self._free_waiters.append(waiter)
class RWLockWrapper(object):
def __init__(self, shlock, is_shared=True):
self._shlock = shlock
self._is_shared = is_shared
def __enter__(self):
self._shlock.acquire(shared=self._is_shared)
return self
def __exit__(self, *args):
self._shlock.release()
# Tests {{{
if __name__ == '__main__':
import time, random, unittest
from threading import Thread
class TestSHLock(unittest.TestCase):
"""Testcases for SHLock class."""
def test_upgrade(self):
lock = SHLock()
lock.acquire(shared=True)
self.assertRaises(LockingError, lock.acquire, shared=False)
lock.release()
def test_downgrade(self):
lock = SHLock()
lock.acquire(shared=False)
self.assertRaises(LockingError, lock.acquire, shared=True)
lock.release()
def test_recursive(self):
lock = SHLock()
lock.acquire(shared=True)
lock.acquire(shared=True)
self.assertEqual(lock.is_shared, 2)
lock.release()
lock.release()
self.assertFalse(lock.is_shared)
lock.acquire(shared=False)
lock.acquire(shared=False)
self.assertEqual(lock.is_exclusive, 2)
lock.release()
lock.release()
self.assertFalse(lock.is_exclusive)
def test_release(self):
lock = SHLock()
self.assertRaises(LockingError, lock.release)
def get_lock(shared):
lock.acquire(shared=shared)
time.sleep(1)
lock.release()
threads = [Thread(target=get_lock, args=(x,)) for x in (True,
False)]
for t in threads:
t.daemon = True
t.start()
self.assertRaises(LockingError, lock.release)
t.join(2)
self.assertFalse(t.is_alive())
self.assertFalse(lock.is_shared)
self.assertFalse(lock.is_exclusive)
def test_acquire(self):
lock = SHLock()
def get_lock(shared):
lock.acquire(shared=shared)
time.sleep(1)
lock.release()
shared = Thread(target=get_lock, args=(True,))
shared.daemon = True
shared.start()
time.sleep(0.1)
self.assertTrue(lock.acquire(shared=True, blocking=False))
lock.release()
self.assertFalse(lock.acquire(shared=False, blocking=False))
lock.acquire(shared=False)
self.assertFalse(shared.is_alive())
lock.release()
self.assertTrue(lock.acquire(shared=False, blocking=False))
lock.release()
exclusive = Thread(target=get_lock, args=(False,))
exclusive.daemon = True
exclusive.start()
time.sleep(0.1)
self.assertFalse(lock.acquire(shared=False, blocking=False))
self.assertFalse(lock.acquire(shared=True, blocking=False))
lock.acquire(shared=True)
self.assertFalse(exclusive.is_alive())
lock.release()
lock.acquire(shared=False)
lock.release()
lock.acquire(shared=True)
lock.release()
self.assertFalse(lock.is_shared)
self.assertFalse(lock.is_exclusive)
def test_contention(self):
lock = SHLock()
done = []
def lots_of_acquires():
for _ in xrange(1000):
shared = random.choice([True,False])
lock.acquire(shared=shared)
lock.acquire(shared=shared)
time.sleep(random.random() * 0.0001)
lock.release()
time.sleep(random.random() * 0.0001)
lock.acquire(shared=shared)
time.sleep(random.random() * 0.0001)
lock.release()
lock.release()
done.append(True)
threads = [Thread(target=lots_of_acquires) for _ in xrange(10)]
for t in threads:
t.daemon = True
t.start()
for t in threads:
t.join(20)
live = [t for t in threads if t.is_alive()]
self.assertListEqual(live, [], 'ShLock hung')
self.assertEqual(len(done), len(threads), 'SHLock locking failed')
self.assertFalse(lock.is_shared)
self.assertFalse(lock.is_exclusive)
suite = unittest.TestLoader().loadTestsFromTestCase(TestSHLock)
unittest.TextTestRunner(verbosity=2).run(suite)
# }}}

View File

@ -0,0 +1,618 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os
from calibre import prints
from calibre.utils.date import isoformat, DEFAULT_DATE
class SchemaUpgrade(object):
def __init__(self, conn, library_path, field_metadata):
conn.execute('BEGIN EXCLUSIVE TRANSACTION')
self.conn = conn
self.library_path = library_path
self.field_metadata = field_metadata
# Upgrade database
try:
while True:
uv = self.conn.execute('pragma user_version').next()[0]
meth = getattr(self, 'upgrade_version_%d'%uv, None)
if meth is None:
break
else:
prints('Upgrading database to version %d...'%(uv+1))
meth()
self.conn.execute('pragma user_version=%d'%(uv+1))
except:
self.conn.execute('ROLLBACK')
raise
else:
self.conn.execute('COMMIT')
finally:
self.conn = self.field_metadata = None
def upgrade_version_1(self):
'''
Normalize indices.
'''
self.conn.execute('''\
DROP INDEX IF EXISTS authors_idx;
CREATE INDEX authors_idx ON books (author_sort COLLATE NOCASE, sort COLLATE NOCASE);
DROP INDEX IF EXISTS series_idx;
CREATE INDEX series_idx ON series (name COLLATE NOCASE);
DROP INDEX IF EXISTS series_sort_idx;
CREATE INDEX series_sort_idx ON books (series_index, id);
''')
def upgrade_version_2(self):
''' Fix Foreign key constraints for deleting from link tables. '''
script = '''\
DROP TRIGGER IF EXISTS fkc_delete_books_%(ltable)s_link;
CREATE TRIGGER fkc_delete_on_%(table)s
BEFORE DELETE ON %(table)s
BEGIN
SELECT CASE
WHEN (SELECT COUNT(id) FROM books_%(ltable)s_link WHERE %(ltable_col)s=OLD.id) > 0
THEN RAISE(ABORT, 'Foreign key violation: %(table)s is still referenced')
END;
END;
DELETE FROM %(table)s WHERE (SELECT COUNT(id) FROM books_%(ltable)s_link WHERE %(ltable_col)s=%(table)s.id) < 1;
'''
self.conn.execute(script%dict(ltable='authors', table='authors', ltable_col='author'))
self.conn.execute(script%dict(ltable='publishers', table='publishers', ltable_col='publisher'))
self.conn.execute(script%dict(ltable='tags', table='tags', ltable_col='tag'))
self.conn.execute(script%dict(ltable='series', table='series', ltable_col='series'))
def upgrade_version_3(self):
' Add path to result cache '
self.conn.execute('''
DROP VIEW IF EXISTS meta;
CREATE VIEW meta AS
SELECT id, title,
(SELECT concat(name) FROM authors WHERE authors.id IN (SELECT author from books_authors_link WHERE book=books.id)) authors,
(SELECT name FROM publishers WHERE publishers.id IN (SELECT publisher from books_publishers_link WHERE book=books.id)) publisher,
(SELECT rating FROM ratings WHERE ratings.id IN (SELECT rating from books_ratings_link WHERE book=books.id)) rating,
timestamp,
(SELECT MAX(uncompressed_size) FROM data WHERE book=books.id) size,
(SELECT concat(name) FROM tags WHERE tags.id IN (SELECT tag from books_tags_link WHERE book=books.id)) tags,
(SELECT text FROM comments WHERE book=books.id) comments,
(SELECT name FROM series WHERE series.id IN (SELECT series FROM books_series_link WHERE book=books.id)) series,
series_index,
sort,
author_sort,
(SELECT concat(format) FROM data WHERE data.book=books.id) formats,
isbn,
path
FROM books;
''')
def upgrade_version_4(self):
'Rationalize books table'
self.conn.execute('''
CREATE TEMPORARY TABLE
books_backup(id,title,sort,timestamp,series_index,author_sort,isbn,path);
INSERT INTO books_backup SELECT id,title,sort,timestamp,series_index,author_sort,isbn,path FROM books;
DROP TABLE books;
CREATE TABLE books ( id INTEGER PRIMARY KEY AUTOINCREMENT,
title TEXT NOT NULL DEFAULT 'Unknown' COLLATE NOCASE,
sort TEXT COLLATE NOCASE,
timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
pubdate TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
series_index REAL NOT NULL DEFAULT 1.0,
author_sort TEXT COLLATE NOCASE,
isbn TEXT DEFAULT "" COLLATE NOCASE,
lccn TEXT DEFAULT "" COLLATE NOCASE,
path TEXT NOT NULL DEFAULT "",
flags INTEGER NOT NULL DEFAULT 1
);
INSERT INTO
books (id,title,sort,timestamp,pubdate,series_index,author_sort,isbn,path)
SELECT id,title,sort,timestamp,timestamp,series_index,author_sort,isbn,path FROM books_backup;
DROP TABLE books_backup;
DROP VIEW IF EXISTS meta;
CREATE VIEW meta AS
SELECT id, title,
(SELECT concat(name) FROM authors WHERE authors.id IN (SELECT author from books_authors_link WHERE book=books.id)) authors,
(SELECT name FROM publishers WHERE publishers.id IN (SELECT publisher from books_publishers_link WHERE book=books.id)) publisher,
(SELECT rating FROM ratings WHERE ratings.id IN (SELECT rating from books_ratings_link WHERE book=books.id)) rating,
timestamp,
(SELECT MAX(uncompressed_size) FROM data WHERE book=books.id) size,
(SELECT concat(name) FROM tags WHERE tags.id IN (SELECT tag from books_tags_link WHERE book=books.id)) tags,
(SELECT text FROM comments WHERE book=books.id) comments,
(SELECT name FROM series WHERE series.id IN (SELECT series FROM books_series_link WHERE book=books.id)) series,
series_index,
sort,
author_sort,
(SELECT concat(format) FROM data WHERE data.book=books.id) formats,
isbn,
path,
lccn,
pubdate,
flags
FROM books;
''')
def upgrade_version_5(self):
'Update indexes/triggers for new books table'
self.conn.execute('''
CREATE INDEX authors_idx ON books (author_sort COLLATE NOCASE);
CREATE INDEX books_idx ON books (sort COLLATE NOCASE);
CREATE TRIGGER books_delete_trg
AFTER DELETE ON books
BEGIN
DELETE FROM books_authors_link WHERE book=OLD.id;
DELETE FROM books_publishers_link WHERE book=OLD.id;
DELETE FROM books_ratings_link WHERE book=OLD.id;
DELETE FROM books_series_link WHERE book=OLD.id;
DELETE FROM books_tags_link WHERE book=OLD.id;
DELETE FROM data WHERE book=OLD.id;
DELETE FROM comments WHERE book=OLD.id;
DELETE FROM conversion_options WHERE book=OLD.id;
END;
CREATE TRIGGER books_insert_trg
AFTER INSERT ON books
BEGIN
UPDATE books SET sort=title_sort(NEW.title) WHERE id=NEW.id;
END;
CREATE TRIGGER books_update_trg
AFTER UPDATE ON books
BEGIN
UPDATE books SET sort=title_sort(NEW.title) WHERE id=NEW.id;
END;
UPDATE books SET sort=title_sort(title) WHERE sort IS NULL;
'''
)
def upgrade_version_6(self):
'Show authors in order'
self.conn.execute('''
DROP VIEW IF EXISTS meta;
CREATE VIEW meta AS
SELECT id, title,
(SELECT sortconcat(bal.id, name) FROM books_authors_link AS bal JOIN authors ON(author = authors.id) WHERE book = books.id) authors,
(SELECT name FROM publishers WHERE publishers.id IN (SELECT publisher from books_publishers_link WHERE book=books.id)) publisher,
(SELECT rating FROM ratings WHERE ratings.id IN (SELECT rating from books_ratings_link WHERE book=books.id)) rating,
timestamp,
(SELECT MAX(uncompressed_size) FROM data WHERE book=books.id) size,
(SELECT concat(name) FROM tags WHERE tags.id IN (SELECT tag from books_tags_link WHERE book=books.id)) tags,
(SELECT text FROM comments WHERE book=books.id) comments,
(SELECT name FROM series WHERE series.id IN (SELECT series FROM books_series_link WHERE book=books.id)) series,
series_index,
sort,
author_sort,
(SELECT concat(format) FROM data WHERE data.book=books.id) formats,
isbn,
path,
lccn,
pubdate,
flags
FROM books;
''')
def upgrade_version_7(self):
'Add uuid column'
self.conn.execute('''
ALTER TABLE books ADD COLUMN uuid TEXT;
DROP TRIGGER IF EXISTS books_insert_trg;
DROP TRIGGER IF EXISTS books_update_trg;
UPDATE books SET uuid=uuid4();
CREATE TRIGGER books_insert_trg AFTER INSERT ON books
BEGIN
UPDATE books SET sort=title_sort(NEW.title),uuid=uuid4() WHERE id=NEW.id;
END;
CREATE TRIGGER books_update_trg AFTER UPDATE ON books
BEGIN
UPDATE books SET sort=title_sort(NEW.title) WHERE id=NEW.id;
END;
DROP VIEW IF EXISTS meta;
CREATE VIEW meta AS
SELECT id, title,
(SELECT sortconcat(bal.id, name) FROM books_authors_link AS bal JOIN authors ON(author = authors.id) WHERE book = books.id) authors,
(SELECT name FROM publishers WHERE publishers.id IN (SELECT publisher from books_publishers_link WHERE book=books.id)) publisher,
(SELECT rating FROM ratings WHERE ratings.id IN (SELECT rating from books_ratings_link WHERE book=books.id)) rating,
timestamp,
(SELECT MAX(uncompressed_size) FROM data WHERE book=books.id) size,
(SELECT concat(name) FROM tags WHERE tags.id IN (SELECT tag from books_tags_link WHERE book=books.id)) tags,
(SELECT text FROM comments WHERE book=books.id) comments,
(SELECT name FROM series WHERE series.id IN (SELECT series FROM books_series_link WHERE book=books.id)) series,
series_index,
sort,
author_sort,
(SELECT concat(format) FROM data WHERE data.book=books.id) formats,
isbn,
path,
lccn,
pubdate,
flags,
uuid
FROM books;
''')
def upgrade_version_8(self):
'Add Tag Browser views'
def create_tag_browser_view(table_name, column_name):
self.conn.execute('''
DROP VIEW IF EXISTS tag_browser_{tn};
CREATE VIEW tag_browser_{tn} AS SELECT
id,
name,
(SELECT COUNT(id) FROM books_{tn}_link WHERE {cn}={tn}.id) count
FROM {tn};
'''.format(tn=table_name, cn=column_name))
for tn in ('authors', 'tags', 'publishers', 'series'):
cn = tn[:-1]
if tn == 'series':
cn = tn
create_tag_browser_view(tn, cn)
def upgrade_version_9(self):
'Add custom columns'
self.conn.execute('''
CREATE TABLE custom_columns (
id INTEGER PRIMARY KEY AUTOINCREMENT,
label TEXT NOT NULL,
name TEXT NOT NULL,
datatype TEXT NOT NULL,
mark_for_delete BOOL DEFAULT 0 NOT NULL,
editable BOOL DEFAULT 1 NOT NULL,
display TEXT DEFAULT "{}" NOT NULL,
is_multiple BOOL DEFAULT 0 NOT NULL,
normalized BOOL NOT NULL,
UNIQUE(label)
);
CREATE INDEX IF NOT EXISTS custom_columns_idx ON custom_columns (label);
CREATE INDEX IF NOT EXISTS formats_idx ON data (format);
''')
def upgrade_version_10(self):
'Add restricted Tag Browser views'
def create_tag_browser_view(table_name, column_name, view_column_name):
script = ('''
DROP VIEW IF EXISTS tag_browser_{tn};
CREATE VIEW tag_browser_{tn} AS SELECT
id,
{vcn},
(SELECT COUNT(id) FROM books_{tn}_link WHERE {cn}={tn}.id) count
FROM {tn};
DROP VIEW IF EXISTS tag_browser_filtered_{tn};
CREATE VIEW tag_browser_filtered_{tn} AS SELECT
id,
{vcn},
(SELECT COUNT(books_{tn}_link.id) FROM books_{tn}_link WHERE
{cn}={tn}.id AND books_list_filter(book)) count
FROM {tn};
'''.format(tn=table_name, cn=column_name, vcn=view_column_name))
self.conn.execute(script)
for field in self.field_metadata.itervalues():
if field['is_category'] and not field['is_custom'] and 'link_column' in field:
table = self.conn.get(
'SELECT name FROM sqlite_master WHERE type="table" AND name=?',
('books_%s_link'%field['table'],), all=False)
if table is not None:
create_tag_browser_view(field['table'], field['link_column'], field['column'])
def upgrade_version_11(self):
'Add average rating to tag browser views'
def create_std_tag_browser_view(table_name, column_name,
view_column_name, sort_column_name):
script = ('''
DROP VIEW IF EXISTS tag_browser_{tn};
CREATE VIEW tag_browser_{tn} AS SELECT
id,
{vcn},
(SELECT COUNT(id) FROM books_{tn}_link WHERE {cn}={tn}.id) count,
(SELECT AVG(ratings.rating)
FROM books_{tn}_link AS tl, books_ratings_link AS bl, ratings
WHERE tl.{cn}={tn}.id AND bl.book=tl.book AND
ratings.id = bl.rating AND ratings.rating <> 0) avg_rating,
{scn} AS sort
FROM {tn};
DROP VIEW IF EXISTS tag_browser_filtered_{tn};
CREATE VIEW tag_browser_filtered_{tn} AS SELECT
id,
{vcn},
(SELECT COUNT(books_{tn}_link.id) FROM books_{tn}_link WHERE
{cn}={tn}.id AND books_list_filter(book)) count,
(SELECT AVG(ratings.rating)
FROM books_{tn}_link AS tl, books_ratings_link AS bl, ratings
WHERE tl.{cn}={tn}.id AND bl.book=tl.book AND
ratings.id = bl.rating AND ratings.rating <> 0 AND
books_list_filter(bl.book)) avg_rating,
{scn} AS sort
FROM {tn};
'''.format(tn=table_name, cn=column_name,
vcn=view_column_name, scn= sort_column_name))
self.conn.execute(script)
def create_cust_tag_browser_view(table_name, link_table_name):
script = '''
DROP VIEW IF EXISTS tag_browser_{table};
CREATE VIEW tag_browser_{table} AS SELECT
id,
value,
(SELECT COUNT(id) FROM {lt} WHERE value={table}.id) count,
(SELECT AVG(r.rating)
FROM {lt},
books_ratings_link AS bl,
ratings AS r
WHERE {lt}.value={table}.id AND bl.book={lt}.book AND
r.id = bl.rating AND r.rating <> 0) avg_rating,
value AS sort
FROM {table};
DROP VIEW IF EXISTS tag_browser_filtered_{table};
CREATE VIEW tag_browser_filtered_{table} AS SELECT
id,
value,
(SELECT COUNT({lt}.id) FROM {lt} WHERE value={table}.id AND
books_list_filter(book)) count,
(SELECT AVG(r.rating)
FROM {lt},
books_ratings_link AS bl,
ratings AS r
WHERE {lt}.value={table}.id AND bl.book={lt}.book AND
r.id = bl.rating AND r.rating <> 0 AND
books_list_filter(bl.book)) avg_rating,
value AS sort
FROM {table};
'''.format(lt=link_table_name, table=table_name)
self.conn.execute(script)
for field in self.field_metadata.itervalues():
if field['is_category'] and not field['is_custom'] and 'link_column' in field:
table = self.conn.get(
'SELECT name FROM sqlite_master WHERE type="table" AND name=?',
('books_%s_link'%field['table'],), all=False)
if table is not None:
create_std_tag_browser_view(field['table'], field['link_column'],
field['column'], field['category_sort'])
db_tables = self.conn.get('''SELECT name FROM sqlite_master
WHERE type='table'
ORDER BY name''')
tables = []
for (table,) in db_tables:
tables.append(table)
for table in tables:
link_table = 'books_%s_link'%table
if table.startswith('custom_column_') and link_table in tables:
create_cust_tag_browser_view(table, link_table)
self.conn.execute('UPDATE authors SET sort=author_to_author_sort(name)')
def upgrade_version_12(self):
'DB based preference store'
script = '''
DROP TABLE IF EXISTS preferences;
CREATE TABLE preferences(id INTEGER PRIMARY KEY,
key TEXT NON NULL,
val TEXT NON NULL,
UNIQUE(key));
'''
self.conn.execute(script)
def upgrade_version_13(self):
'Dirtied table for OPF metadata backups'
script = '''
DROP TABLE IF EXISTS metadata_dirtied;
CREATE TABLE metadata_dirtied(id INTEGER PRIMARY KEY,
book INTEGER NOT NULL,
UNIQUE(book));
INSERT INTO metadata_dirtied (book) SELECT id FROM books;
'''
self.conn.execute(script)
def upgrade_version_14(self):
'Cache has_cover'
self.conn.execute('ALTER TABLE books ADD COLUMN has_cover BOOL DEFAULT 0')
data = self.conn.get('SELECT id,path FROM books', all=True)
def has_cover(path):
if path:
path = os.path.join(self.library_path, path.replace('/', os.sep),
'cover.jpg')
return os.path.exists(path)
return False
ids = [(x[0],) for x in data if has_cover(x[1])]
self.conn.executemany('UPDATE books SET has_cover=1 WHERE id=?', ids)
def upgrade_version_15(self):
'Remove commas from tags'
self.conn.execute("UPDATE OR IGNORE tags SET name=REPLACE(name, ',', ';')")
self.conn.execute("UPDATE OR IGNORE tags SET name=REPLACE(name, ',', ';;')")
self.conn.execute("UPDATE OR IGNORE tags SET name=REPLACE(name, ',', '')")
def upgrade_version_16(self):
self.conn.execute('''
DROP TRIGGER IF EXISTS books_update_trg;
CREATE TRIGGER books_update_trg
AFTER UPDATE ON books
BEGIN
UPDATE books SET sort=title_sort(NEW.title)
WHERE id=NEW.id AND OLD.title <> NEW.title;
END;
''')
def upgrade_version_17(self):
'custom book data table (for plugins)'
script = '''
DROP TABLE IF EXISTS books_plugin_data;
CREATE TABLE books_plugin_data(id INTEGER PRIMARY KEY,
book INTEGER NON NULL,
name TEXT NON NULL,
val TEXT NON NULL,
UNIQUE(book,name));
DROP TRIGGER IF EXISTS books_delete_trg;
CREATE TRIGGER books_delete_trg
AFTER DELETE ON books
BEGIN
DELETE FROM books_authors_link WHERE book=OLD.id;
DELETE FROM books_publishers_link WHERE book=OLD.id;
DELETE FROM books_ratings_link WHERE book=OLD.id;
DELETE FROM books_series_link WHERE book=OLD.id;
DELETE FROM books_tags_link WHERE book=OLD.id;
DELETE FROM data WHERE book=OLD.id;
DELETE FROM comments WHERE book=OLD.id;
DELETE FROM conversion_options WHERE book=OLD.id;
DELETE FROM books_plugin_data WHERE book=OLD.id;
END;
'''
self.conn.execute(script)
def upgrade_version_18(self):
'''
Add a library UUID.
Add an identifiers table.
Add a languages table.
Add a last_modified column.
NOTE: You cannot downgrade after this update, if you do
any changes you make to book isbns will be lost.
'''
script = '''
DROP TABLE IF EXISTS library_id;
CREATE TABLE library_id ( id INTEGER PRIMARY KEY,
uuid TEXT NOT NULL,
UNIQUE(uuid)
);
DROP TABLE IF EXISTS identifiers;
CREATE TABLE identifiers ( id INTEGER PRIMARY KEY,
book INTEGER NON NULL,
type TEXT NON NULL DEFAULT "isbn" COLLATE NOCASE,
val TEXT NON NULL COLLATE NOCASE,
UNIQUE(book, type)
);
DROP TABLE IF EXISTS languages;
CREATE TABLE languages ( id INTEGER PRIMARY KEY,
lang_code TEXT NON NULL COLLATE NOCASE,
UNIQUE(lang_code)
);
DROP TABLE IF EXISTS books_languages_link;
CREATE TABLE books_languages_link ( id INTEGER PRIMARY KEY,
book INTEGER NOT NULL,
lang_code INTEGER NOT NULL,
item_order INTEGER NOT NULL DEFAULT 0,
UNIQUE(book, lang_code)
);
DROP TRIGGER IF EXISTS fkc_delete_on_languages;
CREATE TRIGGER fkc_delete_on_languages
BEFORE DELETE ON languages
BEGIN
SELECT CASE
WHEN (SELECT COUNT(id) FROM books_languages_link WHERE lang_code=OLD.id) > 0
THEN RAISE(ABORT, 'Foreign key violation: language is still referenced')
END;
END;
DROP TRIGGER IF EXISTS fkc_delete_on_languages_link;
CREATE TRIGGER fkc_delete_on_languages_link
BEFORE INSERT ON books_languages_link
BEGIN
SELECT CASE
WHEN (SELECT id from books WHERE id=NEW.book) IS NULL
THEN RAISE(ABORT, 'Foreign key violation: book not in books')
WHEN (SELECT id from languages WHERE id=NEW.lang_code) IS NULL
THEN RAISE(ABORT, 'Foreign key violation: lang_code not in languages')
END;
END;
DROP TRIGGER IF EXISTS fkc_update_books_languages_link_a;
CREATE TRIGGER fkc_update_books_languages_link_a
BEFORE UPDATE OF book ON books_languages_link
BEGIN
SELECT CASE
WHEN (SELECT id from books WHERE id=NEW.book) IS NULL
THEN RAISE(ABORT, 'Foreign key violation: book not in books')
END;
END;
DROP TRIGGER IF EXISTS fkc_update_books_languages_link_b;
CREATE TRIGGER fkc_update_books_languages_link_b
BEFORE UPDATE OF lang_code ON books_languages_link
BEGIN
SELECT CASE
WHEN (SELECT id from languages WHERE id=NEW.lang_code) IS NULL
THEN RAISE(ABORT, 'Foreign key violation: lang_code not in languages')
END;
END;
DROP INDEX IF EXISTS books_languages_link_aidx;
CREATE INDEX books_languages_link_aidx ON books_languages_link (lang_code);
DROP INDEX IF EXISTS books_languages_link_bidx;
CREATE INDEX books_languages_link_bidx ON books_languages_link (book);
DROP INDEX IF EXISTS languages_idx;
CREATE INDEX languages_idx ON languages (lang_code COLLATE NOCASE);
DROP TRIGGER IF EXISTS books_delete_trg;
CREATE TRIGGER books_delete_trg
AFTER DELETE ON books
BEGIN
DELETE FROM books_authors_link WHERE book=OLD.id;
DELETE FROM books_publishers_link WHERE book=OLD.id;
DELETE FROM books_ratings_link WHERE book=OLD.id;
DELETE FROM books_series_link WHERE book=OLD.id;
DELETE FROM books_tags_link WHERE book=OLD.id;
DELETE FROM books_languages_link WHERE book=OLD.id;
DELETE FROM data WHERE book=OLD.id;
DELETE FROM comments WHERE book=OLD.id;
DELETE FROM conversion_options WHERE book=OLD.id;
DELETE FROM books_plugin_data WHERE book=OLD.id;
DELETE FROM identifiers WHERE book=OLD.id;
END;
INSERT INTO identifiers (book, val) SELECT id,isbn FROM books WHERE isbn;
ALTER TABLE books ADD COLUMN last_modified TIMESTAMP NOT NULL DEFAULT "%s";
'''%isoformat(DEFAULT_DATE, sep=' ')
# Sqlite does not support non constant default values in alter
# statements
self.conn.execute(script)
def upgrade_version_19(self):
recipes = self.conn.get('SELECT id,title,script FROM feeds')
if recipes:
from calibre.web.feeds.recipes import (custom_recipes,
custom_recipe_filename)
bdir = os.path.dirname(custom_recipes.file_path)
for id_, title, script in recipes:
existing = frozenset(map(int, custom_recipes.iterkeys()))
if id_ in existing:
id_ = max(existing) + 1000
id_ = str(id_)
fname = custom_recipe_filename(id_, title)
custom_recipes[id_] = (title, fname)
if isinstance(script, unicode):
script = script.encode('utf-8')
with open(os.path.join(bdir, fname), 'wb') as f:
f.write(script)
def upgrade_version_20(self):
'''
Add a link column to the authors table.
'''
script = '''
ALTER TABLE authors ADD COLUMN link TEXT NOT NULL DEFAULT "";
'''
self.conn.execute(script)

View File

@ -32,11 +32,11 @@ def _c_convert_timestamp(val):
class Table(object): class Table(object):
def __init__(self, name, metadata): def __init__(self, name, metadata, link_table=None):
self.name, self.metadata = name, metadata self.name, self.metadata = name, metadata
# self.adapt() maps values from the db to python objects # self.unserialize() maps values from the db to python objects
self.adapt = \ self.unserialize = \
{ {
'datetime': _c_convert_timestamp, 'datetime': _c_convert_timestamp,
'bool': bool 'bool': bool
@ -44,16 +44,25 @@ class Table(object):
metadata['datatype'], lambda x: x) metadata['datatype'], lambda x: x)
if name == 'authors': if name == 'authors':
# Legacy # Legacy
self.adapt = lambda x: x.replace('|', ',') if x else None self.unserialize = lambda x: x.replace('|', ',') if x else None
self.link_table = (link_table if link_table else
'books_%s_link'%self.metadata['table'])
class OneToOneTable(Table): class OneToOneTable(Table):
'''
Represents data that is unique per book (it may not actually be unique) but
each item is assigned to a book in a one-to-one mapping. For example: uuid,
timestamp, size, etc.
'''
def read(self, db): def read(self, db):
self.book_col_map = {} self.book_col_map = {}
idcol = 'id' if self.metadata['table'] == 'books' else 'book' idcol = 'id' if self.metadata['table'] == 'books' else 'book'
for row in db.conn.execute('SELECT {0}, {1} FROM {2}'.format(idcol, for row in db.conn.execute('SELECT {0}, {1} FROM {2}'.format(idcol,
self.metadata['column'], self.metadata['table'])): self.metadata['column'], self.metadata['table'])):
self.book_col_map[row[0]] = self.adapt(row[1]) self.book_col_map[row[0]] = self.unserialize(row[1])
class SizeTable(OneToOneTable): class SizeTable(OneToOneTable):
@ -62,10 +71,17 @@ class SizeTable(OneToOneTable):
for row in db.conn.execute( for row in db.conn.execute(
'SELECT books.id, (SELECT MAX(uncompressed_size) FROM data ' 'SELECT books.id, (SELECT MAX(uncompressed_size) FROM data '
'WHERE data.book=books.id) FROM books'): 'WHERE data.book=books.id) FROM books'):
self.book_col_map[row[0]] = self.adapt(row[1]) self.book_col_map[row[0]] = self.unserialize(row[1])
class ManyToOneTable(Table): class ManyToOneTable(Table):
'''
Represents data where one data item can map to many books, for example:
series or publisher.
Each book however has only one value for data of this type.
'''
def read(self, db): def read(self, db):
self.id_map = {} self.id_map = {}
self.extra_map = {} self.extra_map = {}
@ -76,28 +92,34 @@ class ManyToOneTable(Table):
def read_id_maps(self, db): def read_id_maps(self, db):
for row in db.conn.execute('SELECT id, {0} FROM {1}'.format( for row in db.conn.execute('SELECT id, {0} FROM {1}'.format(
self.metadata['name'], self.metadata['table'])): self.metadata['column'], self.metadata['table'])):
if row[1]: if row[1]:
self.id_map[row[0]] = self.adapt(row[1]) self.id_map[row[0]] = self.unserialize(row[1])
def read_maps(self, db): def read_maps(self, db):
for row in db.conn.execute( for row in db.conn.execute(
'SELECT book, {0} FROM books_{1}_link'.format( 'SELECT book, {0} FROM {1}'.format(
self.metadata['link_column'], self.metadata['table'])): self.metadata['link_column'], self.link_table)):
if row[1] not in self.col_book_map: if row[1] not in self.col_book_map:
self.col_book_map[row[1]] = [] self.col_book_map[row[1]] = []
self.col_book_map.append(row[0]) self.col_book_map[row[1]].append(row[0])
self.book_col_map[row[0]] = row[1] self.book_col_map[row[0]] = row[1]
class ManyToManyTable(ManyToOneTable): class ManyToManyTable(ManyToOneTable):
'''
Represents data that has a many-to-many mapping with books. i.e. each book
can have more than one value and each value can be mapped to more than one
book. For example: tags or authors.
'''
def read_maps(self, db): def read_maps(self, db):
for row in db.conn.execute( for row in db.conn.execute(
'SELECT book, {0} FROM books_{1}_link'.format( 'SELECT book, {0} FROM {1}'.format(
self.metadata['link_column'], self.metadata['table'])): self.metadata['link_column'], self.link_table)):
if row[1] not in self.col_book_map: if row[1] not in self.col_book_map:
self.col_book_map[row[1]] = [] self.col_book_map[row[1]] = []
self.col_book_map.append(row[0]) self.col_book_map[row[1]].append(row[0])
if row[0] not in self.book_col_map: if row[0] not in self.book_col_map:
self.book_col_map[row[0]] = [] self.book_col_map[row[0]] = []
self.book_col_map[row[0]].append(row[1]) self.book_col_map[row[0]].append(row[1])
@ -105,11 +127,13 @@ class ManyToManyTable(ManyToOneTable):
class AuthorsTable(ManyToManyTable): class AuthorsTable(ManyToManyTable):
def read_id_maps(self, db): def read_id_maps(self, db):
self.alink_map = {}
for row in db.conn.execute( for row in db.conn.execute(
'SELECT id, name, sort FROM authors'): 'SELECT id, name, sort, link FROM authors'):
self.id_map[row[0]] = row[1] self.id_map[row[0]] = row[1]
self.extra_map[row[0]] = (row[2] if row[2] else self.extra_map[row[0]] = (row[2] if row[2] else
author_to_author_sort(row[1])) author_to_author_sort(row[1]))
self.alink_map[row[0]] = row[3]
class FormatsTable(ManyToManyTable): class FormatsTable(ManyToManyTable):
@ -121,7 +145,7 @@ class FormatsTable(ManyToManyTable):
if row[1] is not None: if row[1] is not None:
if row[1] not in self.col_book_map: if row[1] not in self.col_book_map:
self.col_book_map[row[1]] = [] self.col_book_map[row[1]] = []
self.col_book_map.append(row[0]) self.col_book_map[row[1]].append(row[0])
if row[0] not in self.book_col_map: if row[0] not in self.book_col_map:
self.book_col_map[row[0]] = [] self.book_col_map[row[0]] = []
self.book_col_map[row[0]].append((row[1], row[2])) self.book_col_map[row[0]].append((row[1], row[2]))
@ -136,7 +160,7 @@ class IdentifiersTable(ManyToManyTable):
if row[1] is not None and row[2] is not None: if row[1] is not None and row[2] is not None:
if row[1] not in self.col_book_map: if row[1] not in self.col_book_map:
self.col_book_map[row[1]] = [] self.col_book_map[row[1]] = []
self.col_book_map.append(row[0]) self.col_book_map[row[1]].append(row[0])
if row[0] not in self.book_col_map: if row[0] not in self.book_col_map:
self.book_col_map[row[0]] = [] self.book_col_map[row[0]] = []
self.book_col_map[row[0]].append((row[1], row[2])) self.book_col_map[row[0]].append((row[1], row[2]))

View File

@ -19,16 +19,17 @@ class ANDROID(USBMS):
VENDOR_ID = { VENDOR_ID = {
# HTC # HTC
0x0bb4 : { 0x0c02 : [0x100, 0x0227, 0x0226, 0x222], 0x0bb4 : { 0xc02 : [0x100, 0x0227, 0x0226, 0x222],
0x0c01 : [0x100, 0x0227, 0x0226], 0xc01 : [0x100, 0x0227, 0x0226],
0x0ff9 : [0x0100, 0x0227, 0x0226], 0xff9 : [0x0100, 0x0227, 0x0226],
0x0c87 : [0x0100, 0x0227, 0x0226], 0xc87 : [0x0100, 0x0227, 0x0226],
0xc92 : [0x100], 0xc91 : [0x0100, 0x0227, 0x0226],
0xc97 : [0x226], 0xc92 : [0x100, 0x0227, 0x0226, 0x222],
0xc99 : [0x0100], 0xc97 : [0x100, 0x0227, 0x0226, 0x222],
0xca2 : [0x226], 0xc99 : [0x100, 0x0227, 0x0226, 0x222],
0xca3 : [0x100], 0xca2 : [0x100, 0x0227, 0x0226, 0x222],
0xca4 : [0x226], 0xca3 : [0x100, 0x0227, 0x0226, 0x222],
0xca4 : [0x100, 0x0227, 0x0226, 0x222],
}, },
# Eken # Eken
@ -100,6 +101,9 @@ class ANDROID(USBMS):
# ZTE # ZTE
0x19d2 : { 0x1353 : [0x226] }, 0x19d2 : { 0x1353 : [0x226] },
# Advent
0x0955 : { 0x7100 : [0x9999] }, # This is the same as the Notion Ink Adam
} }
EBOOK_DIR_MAIN = ['eBooks/import', 'wordplayer/calibretransfer', 'Books'] EBOOK_DIR_MAIN = ['eBooks/import', 'wordplayer/calibretransfer', 'Books']
EXTRA_CUSTOMIZATION_MESSAGE = _('Comma separated list of directories to ' EXTRA_CUSTOMIZATION_MESSAGE = _('Comma separated list of directories to '

View File

@ -5,7 +5,7 @@ __copyright__ = '2010, Gregory Riker'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import cStringIO, ctypes, datetime, os, re, sys, tempfile, time import cStringIO, ctypes, datetime, os, re, shutil, sys, tempfile, time
from calibre.constants import __appname__, __version__, DEBUG from calibre.constants import __appname__, __version__, DEBUG
from calibre import fit_image, confirm_config_name from calibre import fit_image, confirm_config_name
from calibre.constants import isosx, iswindows from calibre.constants import isosx, iswindows
@ -119,11 +119,17 @@ class DriverBase(DeviceConfig, DevicePlugin):
'iBooks Category'), 'iBooks Category'),
_('Cache covers from iTunes/iBooks') + _('Cache covers from iTunes/iBooks') +
':::' + ':::' +
_('Enable to cache and display covers from iTunes/iBooks') _('Enable to cache and display covers from iTunes/iBooks'),
_(u'"Copy files to iTunes Media folder %s" is enabled in iTunes Preferences|Advanced')%u'\u2026' +
':::' +
_("<p>This setting should match your iTunes <i>Preferences</i>|<i>Advanced</i> setting.</p>"
"<p>Disabling will store copies of books transferred to iTunes in your calibre configuration directory.</p>"
"<p>Enabling indicates that iTunes is configured to store copies in your iTunes Media folder.</p>")
] ]
EXTRA_CUSTOMIZATION_DEFAULT = [ EXTRA_CUSTOMIZATION_DEFAULT = [
True, True,
True, True,
False,
] ]
@ -193,6 +199,7 @@ class ITUNES(DriverBase):
# EXTRA_CUSTOMIZATION_MESSAGE indexes # EXTRA_CUSTOMIZATION_MESSAGE indexes
USE_SERIES_AS_CATEGORY = 0 USE_SERIES_AS_CATEGORY = 0
CACHE_COVERS = 1 CACHE_COVERS = 1
USE_ITUNES_STORAGE = 2
OPEN_FEEDBACK_MESSAGE = _( OPEN_FEEDBACK_MESSAGE = _(
'Apple device detected, launching iTunes, please wait ...') 'Apple device detected, launching iTunes, please wait ...')
@ -281,6 +288,7 @@ class ITUNES(DriverBase):
description_prefix = "added by calibre" description_prefix = "added by calibre"
ejected = False ejected = False
iTunes= None iTunes= None
iTunes_local_storage = None
library_orphans = None library_orphans = None
log = Log() log = Log()
manual_sync_mode = False manual_sync_mode = False
@ -825,7 +833,7 @@ class ITUNES(DriverBase):
# Confirm/create thumbs archive # Confirm/create thumbs archive
if not os.path.exists(self.cache_dir): if not os.path.exists(self.cache_dir):
if DEBUG: if DEBUG:
self.log.info(" creating thumb cache '%s'" % self.cache_dir) self.log.info(" creating thumb cache at '%s'" % self.cache_dir)
os.makedirs(self.cache_dir) os.makedirs(self.cache_dir)
if not os.path.exists(self.archive_path): if not os.path.exists(self.archive_path):
@ -837,6 +845,17 @@ class ITUNES(DriverBase):
if DEBUG: if DEBUG:
self.log.info(" existing thumb cache at '%s'" % self.archive_path) self.log.info(" existing thumb cache at '%s'" % self.archive_path)
# If enabled in config options, create/confirm an iTunes storage folder
if not self.settings().extra_customization[self.USE_ITUNES_STORAGE]:
self.iTunes_local_storage = os.path.join(config_dir,'iTunes storage')
if not os.path.exists(self.iTunes_local_storage):
if DEBUG:
self.log(" creating iTunes_local_storage at '%s'" % self.iTunes_local_storage)
os.mkdir(self.iTunes_local_storage)
else:
if DEBUG:
self.log(" existing iTunes_local_storage at '%s'" % self.iTunes_local_storage)
def remove_books_from_metadata(self, paths, booklists): def remove_books_from_metadata(self, paths, booklists):
''' '''
Remove books from the metadata list. This function must not communicate Remove books from the metadata list. This function must not communicate
@ -1281,50 +1300,27 @@ class ITUNES(DriverBase):
if DEBUG: if DEBUG:
self.log.info(" ITUNES._add_new_copy()") self.log.info(" ITUNES._add_new_copy()")
def _save_last_known_iTunes_storage(lb_added):
if isosx:
fp = lb_added.location().path
index = fp.rfind('/Books') + len('/Books')
last_known_iTunes_storage = fp[:index]
elif iswindows:
fp = lb_added.Location
index = fp.rfind('\Books') + len('\Books')
last_known_iTunes_storage = fp[:index]
dynamic['last_known_iTunes_storage'] = last_known_iTunes_storage
self.log.warning(" last_known_iTunes_storage: %s" % last_known_iTunes_storage)
db_added = None db_added = None
lb_added = None lb_added = None
# If using iTunes_local_storage, copy the file, redirect iTunes to use local copy
if not self.settings().extra_customization[self.USE_ITUNES_STORAGE]:
local_copy = os.path.join(self.iTunes_local_storage, str(metadata.uuid) + os.path.splitext(fpath)[1])
shutil.copyfile(fpath,local_copy)
fpath = local_copy
if self.manual_sync_mode: if self.manual_sync_mode:
''' '''
This is the unsupported direct-connect mode. Unsupported direct-connect mode.
In an attempt to avoid resetting the iTunes library Media folder, don't try to
add the book to iTunes if the last_known_iTunes_storage path is inaccessible.
This means that the path has to be set at least once, probably by using
'Connect to iTunes' and doing a transfer.
''' '''
self.log.warning(" unsupported direct connect mode") self.log.warning(" unsupported direct connect mode")
db_added = self._add_device_book(fpath, metadata) db_added = self._add_device_book(fpath, metadata)
last_known_iTunes_storage = dynamic.get('last_known_iTunes_storage', None) lb_added = self._add_library_book(fpath, metadata)
if last_known_iTunes_storage is not None:
if os.path.exists(last_known_iTunes_storage):
if DEBUG:
self.log.warning(" iTunes storage online, adding to library")
lb_added = self._add_library_book(fpath, metadata)
else:
if DEBUG:
self.log.warning(" iTunes storage not online, can't add to library")
if lb_added:
_save_last_known_iTunes_storage(lb_added)
if not lb_added and DEBUG: if not lb_added and DEBUG:
self.log.warn(" failed to add '%s' to iTunes, iTunes Media folder inaccessible" % metadata.title) self.log.warn(" failed to add '%s' to iTunes, iTunes Media folder inaccessible" % metadata.title)
else: else:
lb_added = self._add_library_book(fpath, metadata) lb_added = self._add_library_book(fpath, metadata)
if lb_added: if not lb_added:
_save_last_known_iTunes_storage(lb_added)
else:
raise UserFeedback("iTunes Media folder inaccessible", raise UserFeedback("iTunes Media folder inaccessible",
details="Failed to add '%s' to iTunes" % metadata.title, details="Failed to add '%s' to iTunes" % metadata.title,
level=UserFeedback.WARN) level=UserFeedback.WARN)
@ -1520,7 +1516,7 @@ class ITUNES(DriverBase):
else: else:
self.log.error(" book_playlist not found") self.log.error(" book_playlist not found")
if len(dev_books): if dev_books is not None and len(dev_books):
first_book = dev_books[0] first_book = dev_books[0]
if False: if False:
self.log.info(" determing manual mode by modifying '%s' by %s" % (first_book.name(), first_book.artist())) self.log.info(" determing manual mode by modifying '%s' by %s" % (first_book.name(), first_book.artist()))
@ -1551,7 +1547,7 @@ class ITUNES(DriverBase):
dev_books = pl.Tracks dev_books = pl.Tracks
break break
if dev_books.Count: if dev_books is not None and dev_books.Count:
first_book = dev_books.Item(1) first_book = dev_books.Item(1)
#if DEBUG: #if DEBUG:
#self.log.info(" determing manual mode by modifying '%s' by %s" % (first_book.Name, first_book.Artist)) #self.log.info(" determing manual mode by modifying '%s' by %s" % (first_book.Name, first_book.Artist))
@ -2526,7 +2522,15 @@ class ITUNES(DriverBase):
self.log.info(" processing %s" % fp) self.log.info(" processing %s" % fp)
if fp.startswith(prefs['library_path']): if fp.startswith(prefs['library_path']):
self.log.info(" '%s' stored in calibre database, not removed" % cached_book['title']) self.log.info(" '%s' stored in calibre database, not removed" % cached_book['title'])
elif not self.settings().extra_customization[self.USE_ITUNES_STORAGE] and \
fp.startswith(self.iTunes_local_storage) and \
os.path.exists(fp):
# Delete the copy in iTunes_local_storage
os.remove(fp)
if DEBUG:
self.log(" removing from iTunes_local_storage")
else: else:
# Delete from iTunes Media folder
if os.path.exists(fp): if os.path.exists(fp):
os.remove(fp) os.remove(fp)
if DEBUG: if DEBUG:
@ -2544,12 +2548,6 @@ class ITUNES(DriverBase):
os.rmdir(author_storage_path) os.rmdir(author_storage_path)
if DEBUG: if DEBUG:
self.log.info(" removing empty author directory") self.log.info(" removing empty author directory")
'''
else:
if DEBUG:
self.log.info(" author_storage_path not empty:")
self.log.info(" %s" % '\n'.join(author_files))
'''
else: else:
self.log.info(" '%s' does not exist at storage location" % cached_book['title']) self.log.info(" '%s' does not exist at storage location" % cached_book['title'])
@ -2586,7 +2584,15 @@ class ITUNES(DriverBase):
self.log.info(" processing %s" % fp) self.log.info(" processing %s" % fp)
if fp.startswith(prefs['library_path']): if fp.startswith(prefs['library_path']):
self.log.info(" '%s' stored in calibre database, not removed" % cached_book['title']) self.log.info(" '%s' stored in calibre database, not removed" % cached_book['title'])
elif not self.settings().extra_customization[self.USE_ITUNES_STORAGE] and \
fp.startswith(self.iTunes_local_storage) and \
os.path.exists(fp):
# Delete the copy in iTunes_local_storage
os.remove(fp)
if DEBUG:
self.log(" removing from iTunes_local_storage")
else: else:
# Delete from iTunes Media folder
if os.path.exists(fp): if os.path.exists(fp):
os.remove(fp) os.remove(fp)
if DEBUG: if DEBUG:
@ -3234,6 +3240,17 @@ class ITUNES_ASYNC(ITUNES):
if DEBUG: if DEBUG:
self.log.info(" existing thumb cache at '%s'" % self.archive_path) self.log.info(" existing thumb cache at '%s'" % self.archive_path)
# If enabled in config options, create/confirm an iTunes storage folder
if not self.settings().extra_customization[self.USE_ITUNES_STORAGE]:
self.iTunes_local_storage = os.path.join(config_dir,'iTunes storage')
if not os.path.exists(self.iTunes_local_storage):
if DEBUG:
self.log(" creating iTunes_local_storage at '%s'" % self.iTunes_local_storage)
os.mkdir(self.iTunes_local_storage)
else:
if DEBUG:
self.log(" existing iTunes_local_storage at '%s'" % self.iTunes_local_storage)
def sync_booklists(self, booklists, end_session=True): def sync_booklists(self, booklists, end_session=True):
''' '''
Update metadata on device. Update metadata on device.

View File

@ -20,11 +20,11 @@ class IRIVER_STORY(USBMS):
FORMATS = ['epub', 'fb2', 'pdf', 'djvu', 'txt'] FORMATS = ['epub', 'fb2', 'pdf', 'djvu', 'txt']
VENDOR_ID = [0x1006] VENDOR_ID = [0x1006]
PRODUCT_ID = [0x4023, 0x4024, 0x4025] PRODUCT_ID = [0x4023, 0x4024, 0x4025, 0x4034]
BCD = [0x0323] BCD = [0x0323, 0x0326]
VENDOR_NAME = 'IRIVER' VENDOR_NAME = 'IRIVER'
WINDOWS_MAIN_MEM = ['STORY', 'STORY_EB05', 'STORY_WI-FI'] WINDOWS_MAIN_MEM = ['STORY', 'STORY_EB05', 'STORY_WI-FI', 'STORY_EB07']
WINDOWS_CARD_A_MEM = ['STORY', 'STORY_SD'] WINDOWS_CARD_A_MEM = ['STORY', 'STORY_SD']
#OSX_MAIN_MEM = 'Kindle Internal Storage Media' #OSX_MAIN_MEM = 'Kindle Internal Storage Media'

View File

@ -203,14 +203,25 @@ class KOBO(USBMS):
result = cursor.fetchone() result = cursor.fetchone()
self.dbversion = result[0] self.dbversion = result[0]
debug_print("Database Version: ", self.dbversion)
if self.dbversion >= 14: if self.dbversion >= 14:
query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \ query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \
'ImageID, ReadStatus, ___ExpirationStatus, FavouritesIndex from content where BookID is Null' 'ImageID, ReadStatus, ___ExpirationStatus, FavouritesIndex from content where BookID is Null and ( ___ExpirationStatus <> "3" or ___ExpirationStatus is Null)'
elif self.dbversion < 14 and self.dbversion >= 8:
query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \
'ImageID, ReadStatus, ___ExpirationStatus, "-1" as FavouritesIndex from content where BookID is Null and ( ___ExpirationStatus <> "3" or ___ExpirationStatus is Null)'
else: else:
query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \ query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \
'ImageID, ReadStatus, ___ExpirationStatus, "-1" as FavouritesIndex from content where BookID is Null' 'ImageID, ReadStatus, "-1" as ___ExpirationStatus, "-1" as FavouritesIndex from content where BookID is Null'
cursor.execute (query) try:
cursor.execute (query)
except Exception as e:
if '___ExpirationStatus' not in str(e):
raise
query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \
'ImageID, ReadStatus, "-1" as ___ExpirationStatus, "-1" as FavouritesIndex from content where BookID is Null'
cursor.execute(query)
changed = False changed = False
for i, row in enumerate(cursor): for i, row in enumerate(cursor):

View File

@ -64,14 +64,24 @@ int do_mount(const char *dev, const char *mp) {
snprintf(options, 1000, "rw,noexec,nosuid,sync,nodev"); snprintf(options, 1000, "rw,noexec,nosuid,sync,nodev");
snprintf(uids, 100, "%d", getuid()); snprintf(uids, 100, "%d", getuid());
snprintf(gids, 100, "%d", getgid()); snprintf(gids, 100, "%d", getgid());
#else
#ifdef __FreeBSD__
snprintf(options, 1000, "rw,noexec,nosuid,sync,-u=%d,-g=%d",getuid(),getgid());
#else #else
snprintf(options, 1000, "rw,noexec,nosuid,sync,nodev,quiet,shortname=mixed,uid=%d,gid=%d,umask=077,fmask=0177,dmask=0077,utf8,iocharset=iso8859-1", getuid(), getgid()); snprintf(options, 1000, "rw,noexec,nosuid,sync,nodev,quiet,shortname=mixed,uid=%d,gid=%d,umask=077,fmask=0177,dmask=0077,utf8,iocharset=iso8859-1", getuid(), getgid());
#endif #endif
#endif
ensure_root(); ensure_root();
#ifdef __NetBSD__ #ifdef __NetBSD__
execlp("mount_msdos", "mount_msdos", "-u", uids, "-g", gids, "-o", options, dev, mp, NULL); execlp("mount_msdos", "mount_msdos", "-u", uids, "-g", gids, "-o", options, dev, mp, NULL);
#else
#ifdef __FreeBSD__
execlp("mount", "mount", "-t", "msdosfs", "-o", options, dev, mp, NULL);
#else #else
execlp("mount", "mount", "-t", "auto", "-o", options, dev, mp, NULL); execlp("mount", "mount", "-t", "auto", "-o", options, dev, mp, NULL);
#endif
#endif #endif
errsv = errno; errsv = errno;
fprintf(stderr, "Failed to mount with error: %s\n", strerror(errsv)); fprintf(stderr, "Failed to mount with error: %s\n", strerror(errsv));
@ -91,8 +101,12 @@ int call_eject(const char *dev, const char *mp) {
ensure_root(); ensure_root();
#ifdef __NetBSD__ #ifdef __NetBSD__
execlp("eject", "eject", dev, NULL); execlp("eject", "eject", dev, NULL);
#else
#ifdef __FreeBSD__
execlp("umount", "umount", dev, NULL);
#else #else
execlp("eject", "eject", "-s", dev, NULL); execlp("eject", "eject", "-s", dev, NULL);
#endif
#endif #endif
/* execlp failed */ /* execlp failed */
errsv = errno; errsv = errno;
@ -121,7 +135,11 @@ int call_umount(const char *dev, const char *mp) {
if (pid == 0) { /* Child process */ if (pid == 0) { /* Child process */
ensure_root(); ensure_root();
#ifdef __FreeBSD__
execlp("umount", "umount", mp, NULL);
#else
execlp("umount", "umount", "-l", mp, NULL); execlp("umount", "umount", "-l", mp, NULL);
#endif
/* execlp failed */ /* execlp failed */
errsv = errno; errsv = errno;
fprintf(stderr, "Failed to umount with error: %s\n", strerror(errsv)); fprintf(stderr, "Failed to umount with error: %s\n", strerror(errsv));

View File

@ -329,3 +329,25 @@ class NEXTBOOK(USBMS):
f.write(metadata.thumbnail[-1]) f.write(metadata.thumbnail[-1])
''' '''
class MOOVYBOOK(USBMS):
name = 'Moovybook device interface'
gui_name = 'Moovybook'
description = _('Communicate with the Moovybook Reader')
author = 'Kovid Goyal'
supported_platforms = ['windows', 'osx', 'linux']
# Ordered list of supported formats
FORMATS = ['epub', 'txt', 'pdf']
VENDOR_ID = [0x1cae]
PRODUCT_ID = [0x9b08]
BCD = [0x02]
EBOOK_DIR_MAIN = ''
SUPPORTS_SUB_DIRS = True
def get_main_ebook_dir(self, for_upload=False):
return 'Books' if for_upload else self.EBOOK_DIR_MAIN

View File

@ -14,7 +14,7 @@ from calibre.constants import preferred_encoding
from calibre import isbytestring, force_unicode from calibre import isbytestring, force_unicode
from calibre.utils.config import prefs, tweaks from calibre.utils.config import prefs, tweaks
from calibre.utils.icu import strcmp from calibre.utils.icu import strcmp
from calibre.utils.formatter import eval_formatter from calibre.utils.formatter import EvalFormatter
class Book(Metadata): class Book(Metadata):
def __init__(self, prefix, lpath, size=None, other=None): def __init__(self, prefix, lpath, size=None, other=None):
@ -116,7 +116,7 @@ class CollectionsBookList(BookList):
field_name = field_meta['name'] field_name = field_meta['name']
else: else:
field_name = '' field_name = ''
cat_name = eval_formatter.safe_format( cat_name = EvalFormatter().safe_format(
fmt=tweaks['sony_collection_name_template'], fmt=tweaks['sony_collection_name_template'],
kwargs={'category':field_name, 'value':field_value}, kwargs={'category':field_name, 'value':field_value},
error_value='GET_CATEGORY', book=None) error_value='GET_CATEGORY', book=None)

View File

@ -17,7 +17,7 @@ from itertools import repeat
from calibre.devices.interface import DevicePlugin from calibre.devices.interface import DevicePlugin
from calibre.devices.errors import DeviceError, FreeSpaceError from calibre.devices.errors import DeviceError, FreeSpaceError
from calibre.devices.usbms.deviceconfig import DeviceConfig from calibre.devices.usbms.deviceconfig import DeviceConfig
from calibre.constants import iswindows, islinux, isosx, plugins from calibre.constants import iswindows, islinux, isosx, isfreebsd, plugins
from calibre.utils.filenames import ascii_filename as sanitize, shorten_components_to from calibre.utils.filenames import ascii_filename as sanitize, shorten_components_to
if isosx: if isosx:
@ -701,7 +701,152 @@ class Device(DeviceConfig, DevicePlugin):
self._card_a_prefix = self._card_b_prefix self._card_a_prefix = self._card_b_prefix
self._card_b_prefix = None self._card_b_prefix = None
# ------------------------------------------------------
#
# open for FreeBSD
# find the device node or nodes that match the S/N we already have from the scanner
# and attempt to mount each one
# 1. get list of disk devices from sysctl
# 2. compare that list with the one from camcontrol
# 3. and see if it has a matching s/n
# 6. find any partitions/slices associated with each node
# 7. attempt to mount, using calibre-mount-helper, each one
# 8. when finished, we have a list of mount points and associated device nodes
#
def open_freebsd(self):
# this gives us access to the S/N, etc. of the reader that the scanner has found
# and the match routines for some of that data, like s/n, vendor ID, etc.
d=self.detected_device
if not d.serial:
raise DeviceError("Device has no S/N. Can't continue")
return False
devs={}
di=0
ndevs=4 # number of possible devices per reader (main, carda, cardb, launcher)
#get list of disk devices
p=subprocess.Popen(["sysctl", "kern.disks"], stdout=subprocess.PIPE)
kdsks=subprocess.Popen(["sed", "s/kern.disks: //"], stdin=p.stdout, stdout=subprocess.PIPE).communicate()[0]
p.stdout.close()
#print kdsks
for dvc in kdsks.split():
# for each one that's also in the list of cam devices ...
p=subprocess.Popen(["camcontrol", "devlist"], stdout=subprocess.PIPE)
devmatch=subprocess.Popen(["grep", dvc], stdin=p.stdout, stdout=subprocess.PIPE).communicate()[0]
p.stdout.close()
if devmatch:
#print "Checking ", devmatch
# ... see if we can get a S/N from the actual device node
sn=subprocess.Popen(["camcontrol", "inquiry", dvc, "-S"], stdout=subprocess.PIPE).communicate()[0]
sn=sn[0:-1] # drop the trailing newline
#print "S/N = ", sn
if sn and d.match_serial(sn):
# we have a matching s/n, record this device node
#print "match found: ", dvc
devs[di]=dvc
di += 1
# sort the list of devices
for i in range(1,ndevs+1):
for j in reversed(range(1,i)):
if devs[j-1] > devs[j]:
x=devs[j-1]
devs[j-1]=devs[j]
devs[j]=x
#print devs
# now we need to see if any of these have slices/partitions
mtd=0
label="READER" # could use something more unique, like S/N or productID...
cmd = '/usr/local/bin/calibre-mount-helper'
cmd = [cmd, 'mount']
for i in range(0,ndevs):
cmd2="ls /dev/"+devs[i]+"*"
p=subprocess.Popen(cmd2, shell=True, stdout=subprocess.PIPE)
devs[i]=subprocess.Popen(["cut", "-d", "/", "-f" "3"], stdin=p.stdout, stdout=subprocess.PIPE).communicate()[0]
p.stdout.close()
# try all the nodes to see what we can mount
for dev in devs[i].split():
mp='/media/'+label+'-'+dev
#print "trying ", dev, "on", mp
try:
p = subprocess.Popen(cmd + ["/dev/"+dev, mp])
except OSError:
raise DeviceError(_('Could not find mount helper: %s.')%cmd[0])
while p.poll() is None:
time.sleep(0.1)
if p.returncode == 0:
#print " mounted", dev
if i == 0:
self._main_prefix = mp
self._main_dev = "/dev/"+dev
#print "main = ", self._main_dev, self._main_prefix
if i == 1:
self._card_a_prefix = mp
self._card_a_dev = "/dev/"+dev
#print "card a = ", self._card_a_dev, self._card_a_prefix
if i == 2:
self._card_b_prefix = mp
self._card_b_dev = "/dev/"+dev
#print "card b = ", self._card_b_dev, self._card_b_prefix
mtd += 1
break
if mtd > 0:
return True
else :
return False
#
# ------------------------------------------------------
#
# this one is pretty simple:
# just umount each of the previously
# mounted filesystems, using the mount helper
#
def eject_freebsd(self):
cmd = '/usr/local/bin/calibre-mount-helper'
cmd = [cmd, 'eject']
if self._main_prefix:
#print "umount main:", cmd, self._main_dev, self._main_prefix
try:
p = subprocess.Popen(cmd + [self._main_dev, self._main_prefix])
except OSError:
raise DeviceError(
_('Could not find mount helper: %s.')%cmd[0])
while p.poll() is None:
time.sleep(0.1)
if self._card_a_prefix:
#print "umount card a:", cmd, self._card_a_dev, self._card_a_prefix
try:
p = subprocess.Popen(cmd + [self._card_a_dev, self._card_a_prefix])
except OSError:
raise DeviceError(
_('Could not find mount helper: %s.')%cmd[0])
while p.poll() is None:
time.sleep(0.1)
if self._card_b_prefix:
#print "umount card b:", cmd, self._card_b_dev, self._card_b_prefix
try:
p = subprocess.Popen(cmd + [self._card_b_dev, self._card_b_prefix])
except OSError:
raise DeviceError(
_('Could not find mount helper: %s.')%cmd[0])
while p.poll() is None:
time.sleep(0.1)
self._main_prefix = None
self._card_a_prefix = None
self._card_b_prefix = None
# ------------------------------------------------------
def open(self, library_uuid): def open(self, library_uuid):
time.sleep(5) time.sleep(5)
@ -712,6 +857,14 @@ class Device(DeviceConfig, DevicePlugin):
except DeviceError: except DeviceError:
time.sleep(7) time.sleep(7)
self.open_linux() self.open_linux()
if isfreebsd:
self._main_dev = self._card_a_dev = self._card_b_dev = None
try:
self.open_freebsd()
except DeviceError:
subprocess.Popen(["camcontrol", "rescan", "all"])
time.sleep(2)
self.open_freebsd()
if iswindows: if iswindows:
try: try:
self.open_windows() self.open_windows()
@ -800,6 +953,11 @@ class Device(DeviceConfig, DevicePlugin):
self.eject_linux() self.eject_linux()
except: except:
pass pass
if isfreebsd:
try:
self.eject_freebsd()
except:
pass
if iswindows: if iswindows:
try: try:
self.eject_windows() self.eject_windows()

View File

@ -54,7 +54,7 @@ cpalmdoc_decompress(PyObject *self, PyObject *args) {
// Map chars to bytes // Map chars to bytes
for (j = 0; j < input_len; j++) for (j = 0; j < input_len; j++)
input[j] = (_input[j] < 0) ? _input[j]+256 : _input[j]; input[j] = (_input[j] < 0) ? _input[j]+256 : _input[j];
output = (char *)PyMem_Malloc(sizeof(char)*(MAX(BUFFER, 5*input_len))); output = (char *)PyMem_Malloc(sizeof(char)*(MAX(BUFFER, 8*input_len)));
if (output == NULL) return PyErr_NoMemory(); if (output == NULL) return PyErr_NoMemory();
while (i < input_len) { while (i < input_len) {

View File

@ -176,7 +176,7 @@ def add_pipeline_options(parser, plumber):
[ [
'level1_toc', 'level2_toc', 'level3_toc', 'level1_toc', 'level2_toc', 'level3_toc',
'toc_threshold', 'max_toc_links', 'no_chapters_in_toc', 'toc_threshold', 'max_toc_links', 'no_chapters_in_toc',
'use_auto_toc', 'toc_filter', 'use_auto_toc', 'toc_filter', 'duplicate_links_in_toc',
] ]
), ),

View File

@ -265,6 +265,14 @@ OptionRecommendation(name='toc_filter',
) )
), ),
OptionRecommendation(name='duplicate_links_in_toc',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('When creating a TOC from links in the input document, '
'allow duplicate entries, i.e. allow more than one entry '
'with the same text, provided that they point to a '
'different location.')
),
OptionRecommendation(name='chapter', OptionRecommendation(name='chapter',
recommended_value="//*[((name()='h1' or name()='h2') and " recommended_value="//*[((name()='h1' or name()='h2') and "

View File

@ -86,6 +86,8 @@ CALIBRE_METADATA_FIELDS = frozenset([
# a dict of user category names, where the value is a list of item names # a dict of user category names, where the value is a list of item names
# from the book that are in that category # from the book that are in that category
'user_categories', 'user_categories',
# a dict of author to an associated hyperlink
'author_link_map',
] ]
) )

View File

@ -34,6 +34,7 @@ NULL_VALUES = {
'authors' : [_('Unknown')], 'authors' : [_('Unknown')],
'title' : _('Unknown'), 'title' : _('Unknown'),
'user_categories' : {}, 'user_categories' : {},
'author_link_map' : {},
'language' : 'und' 'language' : 'und'
} }
@ -70,6 +71,7 @@ class SafeFormat(TemplateFormatter):
return '' return ''
return v return v
# DEPRECATED. This is not thread safe. Do not use.
composite_formatter = SafeFormat() composite_formatter = SafeFormat()
class Metadata(object): class Metadata(object):
@ -110,6 +112,7 @@ class Metadata(object):
# List of strings or [] # List of strings or []
self.author = list(authors) if authors else []# Needed for backward compatibility self.author = list(authors) if authors else []# Needed for backward compatibility
self.authors = list(authors) if authors else [] self.authors = list(authors) if authors else []
self.formatter = SafeFormat()
def is_null(self, field): def is_null(self, field):
''' '''
@ -146,7 +149,7 @@ class Metadata(object):
return val return val
if val is None: if val is None:
d['#value#'] = 'RECURSIVE_COMPOSITE FIELD (Metadata) ' + field d['#value#'] = 'RECURSIVE_COMPOSITE FIELD (Metadata) ' + field
val = d['#value#'] = composite_formatter.safe_format( val = d['#value#'] = self.formatter.safe_format(
d['display']['composite_template'], d['display']['composite_template'],
self, self,
_('TEMPLATE ERROR'), _('TEMPLATE ERROR'),
@ -423,11 +426,12 @@ class Metadata(object):
''' '''
if not ops: if not ops:
return return
formatter = SafeFormat()
for op in ops: for op in ops:
try: try:
src = op[0] src = op[0]
dest = op[1] dest = op[1]
val = composite_formatter.safe_format\ val = formatter.safe_format\
(src, other, 'PLUGBOARD TEMPLATE ERROR', other) (src, other, 'PLUGBOARD TEMPLATE ERROR', other)
if dest == 'tags': if dest == 'tags':
self.set(dest, [f.strip() for f in val.split(',') if f.strip()]) self.set(dest, [f.strip() for f in val.split(',') if f.strip()])

View File

@ -474,7 +474,7 @@ def serialize_user_metadata(metadata_elem, all_user_metadata, tail='\n'+(' '*8))
metadata_elem.append(meta) metadata_elem.append(meta)
def dump_user_categories(cats): def dump_dict(cats):
if not cats: if not cats:
cats = {} cats = {}
from calibre.ebooks.metadata.book.json_codec import object_to_unicode from calibre.ebooks.metadata.book.json_codec import object_to_unicode
@ -537,8 +537,9 @@ class OPF(object): # {{{
formatter=parse_date, renderer=isoformat) formatter=parse_date, renderer=isoformat)
user_categories = MetadataField('user_categories', is_dc=False, user_categories = MetadataField('user_categories', is_dc=False,
formatter=json.loads, formatter=json.loads,
renderer=dump_user_categories) renderer=dump_dict)
author_link_map = MetadataField('author_link_map', is_dc=False,
formatter=json.loads, renderer=dump_dict)
def __init__(self, stream, basedir=os.getcwdu(), unquote_urls=True, def __init__(self, stream, basedir=os.getcwdu(), unquote_urls=True,
populate_spine=True): populate_spine=True):
@ -1039,7 +1040,7 @@ class OPF(object): # {{{
for attr in ('title', 'authors', 'author_sort', 'title_sort', for attr in ('title', 'authors', 'author_sort', 'title_sort',
'publisher', 'series', 'series_index', 'rating', 'publisher', 'series', 'series_index', 'rating',
'isbn', 'tags', 'category', 'comments', 'isbn', 'tags', 'category', 'comments',
'pubdate', 'user_categories'): 'pubdate', 'user_categories', 'author_link_map'):
val = getattr(mi, attr, None) val = getattr(mi, attr, None)
if val is not None and val != [] and val != (None, None): if val is not None and val != [] and val != (None, None):
setattr(self, attr, val) setattr(self, attr, val)
@ -1336,6 +1337,8 @@ def metadata_to_opf(mi, as_string=True):
for tag in mi.tags: for tag in mi.tags:
factory(DC('subject'), tag) factory(DC('subject'), tag)
meta = lambda n, c: factory('meta', name='calibre:'+n, content=c) meta = lambda n, c: factory('meta', name='calibre:'+n, content=c)
if getattr(mi, 'author_link_map', None) is not None:
meta('author_link_map', dump_dict(mi.author_link_map))
if mi.series: if mi.series:
meta('series', mi.series) meta('series', mi.series)
if mi.series_index is not None: if mi.series_index is not None:
@ -1349,7 +1352,7 @@ def metadata_to_opf(mi, as_string=True):
if mi.title_sort: if mi.title_sort:
meta('title_sort', mi.title_sort) meta('title_sort', mi.title_sort)
if mi.user_categories: if mi.user_categories:
meta('user_categories', dump_user_categories(mi.user_categories)) meta('user_categories', dump_dict(mi.user_categories))
serialize_user_metadata(metadata, mi.get_all_user_metadata(False)) serialize_user_metadata(metadata, mi.get_all_user_metadata(False))

View File

@ -153,7 +153,8 @@ class Douban(Source):
author = 'Li Fanxi' author = 'Li Fanxi'
version = (2, 0, 0) version = (2, 0, 0)
description = _('Downloads metadata and covers from Douban.com') description = _('Downloads metadata and covers from Douban.com. '
'Useful only for chinese language books.')
capabilities = frozenset(['identify', 'cover']) capabilities = frozenset(['identify', 'cover'])
touched_fields = frozenset(['title', 'authors', 'tags', touched_fields = frozenset(['title', 'authors', 'tags',

View File

@ -19,7 +19,7 @@ from calibre.customize.ui import metadata_plugins, all_metadata_plugins
from calibre.ebooks.metadata.sources.base import create_log, msprefs from calibre.ebooks.metadata.sources.base import create_log, msprefs
from calibre.ebooks.metadata.xisbn import xisbn from calibre.ebooks.metadata.xisbn import xisbn
from calibre.ebooks.metadata.book.base import Metadata from calibre.ebooks.metadata.book.base import Metadata
from calibre.utils.date import utc_tz from calibre.utils.date import utc_tz, as_utc
from calibre.utils.html2text import html2text from calibre.utils.html2text import html2text
from calibre.utils.icu import lower from calibre.utils.icu import lower
@ -57,11 +57,34 @@ def is_worker_alive(workers):
# Merge results from different sources {{{ # Merge results from different sources {{{
class xISBN(Thread):
def __init__(self, isbn):
Thread.__init__(self)
self.isbn = isbn
self.isbns = frozenset()
self.min_year = None
self.daemon = True
self.exception = self.tb = None
def run(self):
try:
self.isbns, self.min_year = xisbn.get_isbn_pool(self.isbn)
except Exception as e:
import traceback
self.exception = e
self.tb = traceback.format_exception()
class ISBNMerge(object): class ISBNMerge(object):
def __init__(self): def __init__(self, log):
self.pools = {} self.pools = {}
self.isbnless_results = [] self.isbnless_results = []
self.results = []
self.log = log
self.use_xisbn = True
def isbn_in_pool(self, isbn): def isbn_in_pool(self, isbn):
if isbn: if isbn:
@ -82,7 +105,20 @@ class ISBNMerge(object):
if isbn: if isbn:
pool = self.isbn_in_pool(isbn) pool = self.isbn_in_pool(isbn)
if pool is None: if pool is None:
isbns, min_year = xisbn.get_isbn_pool(isbn) isbns = min_year = None
if self.use_xisbn:
xw = xISBN(isbn)
xw.start()
xw.join(10)
if xw.is_alive():
self.log.error('Query to xISBN timed out')
self.use_xisbn = False
else:
if xw.exception:
self.log.error('Query to xISBN failed:')
self.log.debug(xw.tb)
else:
isbns, min_year = xw.isbns, xw.min_year
if not isbns: if not isbns:
isbns = frozenset([isbn]) isbns = frozenset([isbn])
if isbns in self.pools: if isbns in self.pools:
@ -102,15 +138,19 @@ class ISBNMerge(object):
if results: if results:
has_isbn_result = True has_isbn_result = True
break break
self.has_isbn_result = has_isbn_result
isbn_sources = frozenset()
if has_isbn_result: if has_isbn_result:
self.merge_isbn_results() isbn_sources = self.merge_isbn_results()
else:
results = sorted(self.isbnless_results, # Now handle results that have no ISBNs
key=attrgetter('relevance_in_source')) results = sorted(self.isbnless_results,
key=attrgetter('relevance_in_source'))
# Only use results that are from sources that have not also returned a
# result with an ISBN
results = [r for r in results if r.identify_plugin not in isbn_sources]
if results:
# Pick only the most relevant result from each source # Pick only the most relevant result from each source
self.results = []
seen = set() seen = set()
for result in results: for result in results:
if result.identify_plugin not in seen: if result.identify_plugin not in seen:
@ -190,11 +230,15 @@ class ISBNMerge(object):
def merge_isbn_results(self): def merge_isbn_results(self):
self.results = [] self.results = []
sources = set()
for min_year, results in self.pools.itervalues(): for min_year, results in self.pools.itervalues():
if results: if results:
for r in results:
sources.add(r.identify_plugin)
self.results.append(self.merge(results, min_year)) self.results.append(self.merge(results, min_year))
self.results.sort(key=attrgetter('average_source_relevance')) self.results.sort(key=attrgetter('average_source_relevance'))
return sources
def length_merge(self, attr, results, null_value=None, shortest=True): def length_merge(self, attr, results, null_value=None, shortest=True):
values = [getattr(x, attr) for x in results if not x.is_null(attr)] values = [getattr(x, attr) for x in results if not x.is_null(attr)]
@ -254,13 +298,23 @@ class ISBNMerge(object):
# Published date # Published date
if min_year: if min_year:
min_date = datetime(min_year, 1, 2, tzinfo=utc_tz) for r in results:
year = getattr(r.pubdate, 'year', None)
if year == min_year:
ans.pubdate = r.pubdate
break
if getattr(ans.pubdate, 'year', None) == min_year:
min_date = datetime(min_year, ans.pubdate.month, ans.pubdate.day)
else:
min_date = datetime(min_year, 1, 2, tzinfo=utc_tz)
ans.pubdate = min_date ans.pubdate = min_date
else: else:
min_date = datetime(3001, 1, 1, tzinfo=utc_tz) min_date = datetime(3001, 1, 1, tzinfo=utc_tz)
for r in results: for r in results:
if r.pubdate is not None and r.pubdate < min_date: if r.pubdate is not None:
min_date = r.pubdate candidate = as_utc(r.pubdate)
if candidate < min_date:
min_date = candidate
if min_date.year < 3000: if min_date.year < 3000:
ans.pubdate = min_date ans.pubdate = min_date
@ -293,7 +347,7 @@ class ISBNMerge(object):
def merge_identify_results(result_map, log): def merge_identify_results(result_map, log):
isbn_merge = ISBNMerge() isbn_merge = ISBNMerge(log)
for plugin, results in result_map.iteritems(): for plugin, results in result_map.iteritems():
for result in results: for result in results:
isbn_merge.add_result(result) isbn_merge.add_result(result)

View File

@ -957,7 +957,10 @@ def get_metadata(stream):
return get_metadata(stream) return get_metadata(stream)
from calibre.utils.logging import Log from calibre.utils.logging import Log
log = Log() log = Log()
mi = MetaInformation(os.path.basename(stream.name), [_('Unknown')]) try:
mi = MetaInformation(os.path.basename(stream.name), [_('Unknown')])
except:
mi = MetaInformation(_('Unknown'), [_('Unknown')])
mh = MetadataHeader(stream, log) mh = MetadataHeader(stream, log)
if mh.title and mh.title != _('Unknown'): if mh.title and mh.title != _('Unknown'):
mi.title = mh.title mi.title = mh.title

View File

@ -121,14 +121,16 @@ class DetectStructure(object):
if not self.oeb.toc.has_href(href): if not self.oeb.toc.has_href(href):
text = xml2text(a) text = xml2text(a)
text = text[:100].strip() text = text[:100].strip()
if not self.oeb.toc.has_text(text): if (not self.opts.duplicate_links_in_toc and
num += 1 self.oeb.toc.has_text(text)):
self.oeb.toc.add(text, href, continue
play_order=self.oeb.toc.next_play_order()) num += 1
if self.opts.max_toc_links > 0 and \ self.oeb.toc.add(text, href,
num >= self.opts.max_toc_links: play_order=self.oeb.toc.next_play_order())
self.log('Maximum TOC links reached, stopping.') if self.opts.max_toc_links > 0 and \
return num >= self.opts.max_toc_links:
self.log('Maximum TOC links reached, stopping.')
return

View File

@ -7,12 +7,13 @@ from urllib import unquote
from PyQt4.Qt import (QVariant, QFileInfo, QObject, SIGNAL, QBuffer, Qt, from PyQt4.Qt import (QVariant, QFileInfo, QObject, SIGNAL, QBuffer, Qt,
QByteArray, QTranslator, QCoreApplication, QThread, QByteArray, QTranslator, QCoreApplication, QThread,
QEvent, QTimer, pyqtSignal, QDate, QDesktopServices, QEvent, QTimer, pyqtSignal, QDate, QDesktopServices,
QFileDialog, QFileIconProvider, QFileDialog, QFileIconProvider, QSettings,
QIcon, QApplication, QDialog, QUrl, QFont) QIcon, QApplication, QDialog, QUrl, QFont)
ORG_NAME = 'KovidsBrain' ORG_NAME = 'KovidsBrain'
APP_UID = 'libprs500' APP_UID = 'libprs500'
from calibre.constants import islinux, iswindows, isbsd, isfrozen, isosx from calibre.constants import (islinux, iswindows, isbsd, isfrozen, isosx,
config_dir)
from calibre.utils.config import Config, ConfigProxy, dynamic, JSONConfig from calibre.utils.config import Config, ConfigProxy, dynamic, JSONConfig
from calibre.utils.localization import set_qt_translator from calibre.utils.localization import set_qt_translator
from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata import MetaInformation
@ -82,13 +83,14 @@ gprefs.defaults['tags_browser_partition_method'] = 'first letter'
gprefs.defaults['tags_browser_collapse_at'] = 100 gprefs.defaults['tags_browser_collapse_at'] = 100
gprefs.defaults['edit_metadata_single_layout'] = 'default' gprefs.defaults['edit_metadata_single_layout'] = 'default'
gprefs.defaults['book_display_fields'] = [ gprefs.defaults['book_display_fields'] = [
('title', False), ('authors', False), ('formats', True), ('title', False), ('authors', True), ('formats', True),
('series', True), ('identifiers', True), ('tags', True), ('series', True), ('identifiers', True), ('tags', True),
('path', True), ('publisher', False), ('rating', False), ('path', True), ('publisher', False), ('rating', False),
('author_sort', False), ('sort', False), ('timestamp', False), ('author_sort', False), ('sort', False), ('timestamp', False),
('uuid', False), ('comments', True), ('id', False), ('pubdate', False), ('uuid', False), ('comments', True), ('id', False), ('pubdate', False),
('last_modified', False), ('size', False), ('last_modified', False), ('size', False),
] ]
gprefs.defaults['default_author_link'] = 'http://en.wikipedia.org/w/index.php?search={author}'
# }}} # }}}
@ -192,6 +194,11 @@ def _config(): # {{{
config = _config() config = _config()
# }}} # }}}
QSettings.setPath(QSettings.IniFormat, QSettings.UserScope, config_dir)
QSettings.setPath(QSettings.IniFormat, QSettings.SystemScope,
config_dir)
QSettings.setDefaultFormat(QSettings.IniFormat)
# Turn off DeprecationWarnings in windows GUI # Turn off DeprecationWarnings in windows GUI
if iswindows: if iswindows:
import warnings import warnings

View File

@ -260,7 +260,8 @@ class ChooseLibraryAction(InterfaceAction):
'The files remain on your computer, if you want ' 'The files remain on your computer, if you want '
'to delete them, you will have to do so manually.') % loc, 'to delete them, you will have to do so manually.') % loc,
show=True) show=True)
open_local_file(loc) if os.path.exists(loc):
open_local_file(loc)
def backup_status(self, location): def backup_status(self, location):
dirty_text = 'no' dirty_text = 'no'

View File

@ -38,3 +38,6 @@ class ShowQuickviewAction(InterfaceAction):
Quickview(self.gui, self.gui.library_view, index) Quickview(self.gui, self.gui.library_view, index)
self.current_instance.show() self.current_instance.show()
def library_changed(self, db):
if self.current_instance and not self.current_instance.is_closed:
self.current_instance.set_database(db)

View File

@ -5,7 +5,6 @@ __license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
from PyQt4.Qt import (QPixmap, QSize, QWidget, Qt, pyqtSignal, QUrl, from PyQt4.Qt import (QPixmap, QSize, QWidget, Qt, pyqtSignal, QUrl,
QPropertyAnimation, QEasingCurve, QApplication, QFontInfo, QPropertyAnimation, QEasingCurve, QApplication, QFontInfo,
QSizePolicy, QPainter, QRect, pyqtProperty, QLayout, QPalette, QMenu) QSizePolicy, QPainter, QRect, pyqtProperty, QLayout, QPalette, QMenu)
@ -23,6 +22,7 @@ from calibre.library.comments import comments_to_html
from calibre.gui2 import (config, open_local_file, open_url, pixmap_to_data, from calibre.gui2 import (config, open_local_file, open_url, pixmap_to_data,
gprefs) gprefs)
from calibre.utils.icu import sort_key from calibre.utils.icu import sort_key
from calibre.utils.formatter import EvalFormatter
def render_html(mi, css, vertical, widget, all_fields=False): # {{{ def render_html(mi, css, vertical, widget, all_fields=False): # {{{
table = render_data(mi, all_fields=all_fields, table = render_data(mi, all_fields=all_fields,
@ -98,6 +98,14 @@ def render_data(mi, use_roman_numbers=True, all_fields=False):
val = force_unicode(val) val = force_unicode(val)
ans.append((field, ans.append((field,
u'<td class="comments" colspan="2">%s</td>'%comments_to_html(val))) u'<td class="comments" colspan="2">%s</td>'%comments_to_html(val)))
elif metadata['datatype'] == 'composite' and \
metadata['display'].get('contains_html', False):
val = getattr(mi, field)
if val:
val = force_unicode(val)
ans.append((field,
u'<td class="title">%s</td><td>%s</td>'%
(name, comments_to_html(val))))
elif field == 'path': elif field == 'path':
if mi.path: if mi.path:
path = force_unicode(mi.path, filesystem_encoding) path = force_unicode(mi.path, filesystem_encoding)
@ -121,6 +129,27 @@ def render_data(mi, use_roman_numbers=True, all_fields=False):
if links: if links:
ans.append((field, u'<td class="title">%s</td><td>%s</td>'%( ans.append((field, u'<td class="title">%s</td><td>%s</td>'%(
_('Ids')+':', links))) _('Ids')+':', links)))
elif field == 'authors' and not isdevice:
authors = []
formatter = EvalFormatter()
for aut in mi.authors:
if mi.author_link_map[aut]:
link = mi.author_link_map[aut]
elif gprefs.get('default_author_link'):
vals = {'author': aut.replace(' ', '+')}
try:
vals['author_sort'] = mi.author_sort_map[aut].replace(' ', '+')
except:
vals['author_sort'] = aut.replace(' ', '+')
link = formatter.safe_format(
gprefs.get('default_author_link'), vals, '', vals)
if link:
link = prepare_string_for_xml(link)
authors.append(u'<a href="%s">%s</a>'%(link, aut))
else:
authors.append(aut)
ans.append((field, u'<td class="title">%s</td><td>%s</td>'%(name,
u' & '.join(authors))))
else: else:
val = mi.format_field(field)[-1] val = mi.format_field(field)[-1]
if val is None: if val is None:

View File

@ -22,7 +22,7 @@ class TOCWidget(Widget, Ui_Form):
Widget.__init__(self, parent, Widget.__init__(self, parent,
['level1_toc', 'level2_toc', 'level3_toc', ['level1_toc', 'level2_toc', 'level3_toc',
'toc_threshold', 'max_toc_links', 'no_chapters_in_toc', 'toc_threshold', 'max_toc_links', 'no_chapters_in_toc',
'use_auto_toc', 'toc_filter', 'use_auto_toc', 'toc_filter', 'duplicate_links_in_toc',
] ]
) )
self.db, self.book_id = db, book_id self.db, self.book_id = db, book_id

View File

@ -21,7 +21,7 @@
</property> </property>
</widget> </widget>
</item> </item>
<item row="2" column="0"> <item row="3" column="0">
<widget class="QLabel" name="label_10"> <widget class="QLabel" name="label_10">
<property name="text"> <property name="text">
<string>Number of &amp;links to add to Table of Contents</string> <string>Number of &amp;links to add to Table of Contents</string>
@ -31,14 +31,14 @@
</property> </property>
</widget> </widget>
</item> </item>
<item row="2" column="1"> <item row="3" column="1">
<widget class="QSpinBox" name="opt_max_toc_links"> <widget class="QSpinBox" name="opt_max_toc_links">
<property name="maximum"> <property name="maximum">
<number>10000</number> <number>10000</number>
</property> </property>
</widget> </widget>
</item> </item>
<item row="3" column="0"> <item row="4" column="0">
<widget class="QLabel" name="label_16"> <widget class="QLabel" name="label_16">
<property name="text"> <property name="text">
<string>Chapter &amp;threshold</string> <string>Chapter &amp;threshold</string>
@ -48,7 +48,7 @@
</property> </property>
</widget> </widget>
</item> </item>
<item row="3" column="1"> <item row="4" column="1">
<widget class="QSpinBox" name="opt_toc_threshold"/> <widget class="QSpinBox" name="opt_toc_threshold"/>
</item> </item>
<item row="0" column="0" colspan="2"> <item row="0" column="0" colspan="2">
@ -58,7 +58,7 @@
</property> </property>
</widget> </widget>
</item> </item>
<item row="4" column="0"> <item row="5" column="0">
<widget class="QLabel" name="label"> <widget class="QLabel" name="label">
<property name="text"> <property name="text">
<string>TOC &amp;Filter:</string> <string>TOC &amp;Filter:</string>
@ -68,19 +68,19 @@
</property> </property>
</widget> </widget>
</item> </item>
<item row="4" column="1"> <item row="5" column="1">
<widget class="QLineEdit" name="opt_toc_filter"/> <widget class="QLineEdit" name="opt_toc_filter"/>
</item> </item>
<item row="5" column="0" colspan="2"> <item row="6" column="0" colspan="2">
<widget class="XPathEdit" name="opt_level1_toc" native="true"/> <widget class="XPathEdit" name="opt_level1_toc" native="true"/>
</item> </item>
<item row="6" column="0" colspan="2"> <item row="7" column="0" colspan="2">
<widget class="XPathEdit" name="opt_level2_toc" native="true"/> <widget class="XPathEdit" name="opt_level2_toc" native="true"/>
</item> </item>
<item row="7" column="0" colspan="2"> <item row="8" column="0" colspan="2">
<widget class="XPathEdit" name="opt_level3_toc" native="true"/> <widget class="XPathEdit" name="opt_level3_toc" native="true"/>
</item> </item>
<item row="8" column="0"> <item row="9" column="0">
<spacer name="verticalSpacer"> <spacer name="verticalSpacer">
<property name="orientation"> <property name="orientation">
<enum>Qt::Vertical</enum> <enum>Qt::Vertical</enum>
@ -93,6 +93,13 @@
</property> </property>
</spacer> </spacer>
</item> </item>
<item row="2" column="0" colspan="2">
<widget class="QCheckBox" name="opt_duplicate_links_in_toc">
<property name="text">
<string>Allow &amp;duplicate links when creating the Table of Contents</string>
</property>
</widget>
</item>
</layout> </layout>
</widget> </widget>
<customwidgets> <customwidgets>

View File

@ -4,10 +4,11 @@ __docformat__ = 'restructuredtext en'
__license__ = 'GPL v3' __license__ = 'GPL v3'
from PyQt4.Qt import (Qt, QDialog, QTableWidgetItem, QAbstractItemView, QIcon, from PyQt4.Qt import (Qt, QDialog, QTableWidgetItem, QAbstractItemView, QIcon,
QDialogButtonBox, QFrame, QLabel, QTimer, QMenu, QApplication) QDialogButtonBox, QFrame, QLabel, QTimer, QMenu, QApplication,
QByteArray)
from calibre.ebooks.metadata import author_to_author_sort from calibre.ebooks.metadata import author_to_author_sort
from calibre.gui2 import error_dialog from calibre.gui2 import error_dialog, gprefs
from calibre.gui2.dialogs.edit_authors_dialog_ui import Ui_EditAuthorsDialog from calibre.gui2.dialogs.edit_authors_dialog_ui import Ui_EditAuthorsDialog
from calibre.utils.icu import sort_key from calibre.utils.icu import sort_key
@ -20,7 +21,7 @@ class tableItem(QTableWidgetItem):
class EditAuthorsDialog(QDialog, Ui_EditAuthorsDialog): class EditAuthorsDialog(QDialog, Ui_EditAuthorsDialog):
def __init__(self, parent, db, id_to_select, select_sort): def __init__(self, parent, db, id_to_select, select_sort, select_link):
QDialog.__init__(self, parent) QDialog.__init__(self, parent)
Ui_EditAuthorsDialog.__init__(self) Ui_EditAuthorsDialog.__init__(self)
self.setupUi(self) self.setupUi(self)
@ -29,11 +30,19 @@ class EditAuthorsDialog(QDialog, Ui_EditAuthorsDialog):
self.setWindowFlags(self.windowFlags()&(~Qt.WindowContextHelpButtonHint)) self.setWindowFlags(self.windowFlags()&(~Qt.WindowContextHelpButtonHint))
self.setWindowIcon(icon) self.setWindowIcon(icon)
try:
self.table_column_widths = \
gprefs.get('manage_authors_table_widths', None)
geom = gprefs.get('manage_authors_dialog_geometry', bytearray(''))
self.restoreGeometry(QByteArray(geom))
except:
pass
self.buttonBox.accepted.connect(self.accepted) self.buttonBox.accepted.connect(self.accepted)
# Set up the column headings # Set up the column headings
self.table.setSelectionMode(QAbstractItemView.SingleSelection) self.table.setSelectionMode(QAbstractItemView.SingleSelection)
self.table.setColumnCount(2) self.table.setColumnCount(3)
self.down_arrow_icon = QIcon(I('arrow-down.png')) self.down_arrow_icon = QIcon(I('arrow-down.png'))
self.up_arrow_icon = QIcon(I('arrow-up.png')) self.up_arrow_icon = QIcon(I('arrow-up.png'))
self.blank_icon = QIcon(I('blank.png')) self.blank_icon = QIcon(I('blank.png'))
@ -43,26 +52,35 @@ class EditAuthorsDialog(QDialog, Ui_EditAuthorsDialog):
self.aus_col = QTableWidgetItem(_('Author sort')) self.aus_col = QTableWidgetItem(_('Author sort'))
self.table.setHorizontalHeaderItem(1, self.aus_col) self.table.setHorizontalHeaderItem(1, self.aus_col)
self.aus_col.setIcon(self.up_arrow_icon) self.aus_col.setIcon(self.up_arrow_icon)
self.aul_col = QTableWidgetItem(_('Link'))
self.table.setHorizontalHeaderItem(2, self.aul_col)
self.aus_col.setIcon(self.blank_icon)
# Add the data # Add the data
self.authors = {} self.authors = {}
auts = db.get_authors_with_ids() auts = db.get_authors_with_ids()
self.table.setRowCount(len(auts)) self.table.setRowCount(len(auts))
select_item = None select_item = None
for row, (id, author, sort) in enumerate(auts): for row, (id, author, sort, link) in enumerate(auts):
author = author.replace('|', ',') author = author.replace('|', ',')
self.authors[id] = (author, sort) self.authors[id] = (author, sort, link)
aut = tableItem(author) aut = tableItem(author)
aut.setData(Qt.UserRole, id) aut.setData(Qt.UserRole, id)
sort = tableItem(sort) sort = tableItem(sort)
link = tableItem(link)
self.table.setItem(row, 0, aut) self.table.setItem(row, 0, aut)
self.table.setItem(row, 1, sort) self.table.setItem(row, 1, sort)
self.table.setItem(row, 2, link)
if id == id_to_select: if id == id_to_select:
if select_sort: if select_sort:
select_item = sort select_item = sort
elif select_link:
select_item = link
else: else:
select_item = aut select_item = aut
self.table.resizeColumnsToContents() self.table.resizeColumnsToContents()
if self.table.columnWidth(2) < 200:
self.table.setColumnWidth(2, 200)
# set up the cellChanged signal only after the table is filled # set up the cellChanged signal only after the table is filled
self.table.cellChanged.connect(self.cell_changed) self.table.cellChanged.connect(self.cell_changed)
@ -115,6 +133,28 @@ class EditAuthorsDialog(QDialog, Ui_EditAuthorsDialog):
self.table.setContextMenuPolicy(Qt.CustomContextMenu) self.table.setContextMenuPolicy(Qt.CustomContextMenu)
self.table.customContextMenuRequested .connect(self.show_context_menu) self.table.customContextMenuRequested .connect(self.show_context_menu)
def save_state(self):
self.table_column_widths = []
for c in range(0, self.table.columnCount()):
self.table_column_widths.append(self.table.columnWidth(c))
gprefs['manage_authors_table_widths'] = self.table_column_widths
gprefs['manage_authors_dialog_geometry'] = bytearray(self.saveGeometry())
def resizeEvent(self, *args):
QDialog.resizeEvent(self, *args)
if self.table_column_widths is not None:
for c,w in enumerate(self.table_column_widths):
self.table.setColumnWidth(c, w)
else:
# the vertical scroll bar might not be rendered, so might not yet
# have a width. Assume 25. Not a problem because user-changed column
# widths will be remembered
w = self.table.width() - 25 - self.table.verticalHeader().width()
w /= self.table.columnCount()
for c in range(0, self.table.columnCount()):
self.table.setColumnWidth(c, w)
self.save_state()
def show_context_menu(self, point): def show_context_menu(self, point):
self.context_item = self.table.itemAt(point) self.context_item = self.table.itemAt(point)
case_menu = QMenu(_('Change Case')) case_menu = QMenu(_('Change Case'))
@ -231,14 +271,16 @@ class EditAuthorsDialog(QDialog, Ui_EditAuthorsDialog):
self.auth_col.setIcon(self.blank_icon) self.auth_col.setIcon(self.blank_icon)
def accepted(self): def accepted(self):
self.save_state()
self.result = [] self.result = []
for row in range(0,self.table.rowCount()): for row in range(0,self.table.rowCount()):
id = self.table.item(row, 0).data(Qt.UserRole).toInt()[0] id = self.table.item(row, 0).data(Qt.UserRole).toInt()[0]
aut = unicode(self.table.item(row, 0).text()).strip() aut = unicode(self.table.item(row, 0).text()).strip()
sort = unicode(self.table.item(row, 1).text()).strip() sort = unicode(self.table.item(row, 1).text()).strip()
orig_aut,orig_sort = self.authors[id] link = unicode(self.table.item(row, 2).text()).strip()
if orig_aut != aut or orig_sort != sort: orig_aut,orig_sort,orig_link = self.authors[id]
self.result.append((id, orig_aut, aut, sort)) if orig_aut != aut or orig_sort != sort or orig_link != link:
self.result.append((id, orig_aut, aut, sort, link))
def do_recalc_author_sort(self): def do_recalc_author_sort(self):
self.table.cellChanged.disconnect() self.table.cellChanged.disconnect()
@ -276,6 +318,6 @@ class EditAuthorsDialog(QDialog, Ui_EditAuthorsDialog):
c.setText(author_to_author_sort(aut)) c.setText(author_to_author_sort(aut))
item = c item = c
else: else:
item = self.table.item(row, 1) item = self.table.item(row, col)
self.table.setCurrentItem(item) self.table.setCurrentItem(item)
self.table.scrollToItem(item) self.table.scrollToItem(item)

View File

@ -12,7 +12,7 @@ from PyQt4.Qt import Qt, QDialog, QGridLayout, QVBoxLayout, QFont, QLabel, \
from calibre.gui2.dialogs.metadata_bulk_ui import Ui_MetadataBulkDialog from calibre.gui2.dialogs.metadata_bulk_ui import Ui_MetadataBulkDialog
from calibre.gui2.dialogs.tag_editor import TagEditor from calibre.gui2.dialogs.tag_editor import TagEditor
from calibre.ebooks.metadata import string_to_authors, authors_to_string, title_sort from calibre.ebooks.metadata import string_to_authors, authors_to_string, title_sort
from calibre.ebooks.metadata.book.base import composite_formatter from calibre.ebooks.metadata.book.base import SafeFormat
from calibre.gui2.custom_column_widgets import populate_metadata_page from calibre.gui2.custom_column_widgets import populate_metadata_page
from calibre.gui2 import error_dialog, ResizableDialog, UNDEFINED_QDATE, \ from calibre.gui2 import error_dialog, ResizableDialog, UNDEFINED_QDATE, \
gprefs, question_dialog gprefs, question_dialog
@ -499,7 +499,7 @@ class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):
def s_r_get_field(self, mi, field): def s_r_get_field(self, mi, field):
if field: if field:
if field == '{template}': if field == '{template}':
v = composite_formatter.safe_format\ v = SafeFormat().safe_format\
(unicode(self.s_r_template.text()), mi, _('S/R TEMPLATE ERROR'), mi) (unicode(self.s_r_template.text()), mi, _('S/R TEMPLATE ERROR'), mi)
return [v] return [v]
fm = self.db.metadata_for_field(field) fm = self.db.metadata_for_field(field)

View File

@ -18,16 +18,29 @@ class TableItem(QTableWidgetItem):
A QTableWidgetItem that sorts on a separate string and uses ICU rules A QTableWidgetItem that sorts on a separate string and uses ICU rules
''' '''
def __init__(self, val, sort): def __init__(self, val, sort, idx=0):
self.sort = sort self.sort = sort
self.sort_idx = idx
QTableWidgetItem.__init__(self, val) QTableWidgetItem.__init__(self, val)
self.setFlags(Qt.ItemIsEnabled|Qt.ItemIsSelectable) self.setFlags(Qt.ItemIsEnabled|Qt.ItemIsSelectable)
def __ge__(self, other): def __ge__(self, other):
return sort_key(self.sort) >= sort_key(other.sort) l = sort_key(self.sort)
r = sort_key(other.sort)
if l > r:
return 1
if l == r:
return self.sort_idx >= other.sort_idx
return 0
def __lt__(self, other): def __lt__(self, other):
return sort_key(self.sort) < sort_key(other.sort) l = sort_key(self.sort)
r = sort_key(other.sort)
if l < r:
return 1
if l == r:
return self.sort_idx < other.sort_idx
return 0
class Quickview(QDialog, Ui_Quickview): class Quickview(QDialog, Ui_Quickview):
@ -60,6 +73,7 @@ class Quickview(QDialog, Ui_Quickview):
self.last_search = None self.last_search = None
self.current_column = None self.current_column = None
self.current_item = None self.current_item = None
self.no_valid_items = False
self.items.setSelectionMode(QAbstractItemView.SingleSelection) self.items.setSelectionMode(QAbstractItemView.SingleSelection)
self.items.currentTextChanged.connect(self.item_selected) self.items.currentTextChanged.connect(self.item_selected)
@ -95,8 +109,19 @@ class Quickview(QDialog, Ui_Quickview):
self.search_button.clicked.connect(self.do_search) self.search_button.clicked.connect(self.do_search)
view.model().new_bookdisplay_data.connect(self.book_was_changed) view.model().new_bookdisplay_data.connect(self.book_was_changed)
def set_database(self, db):
self.db = db
self.items.blockSignals(True)
self.books_table.blockSignals(True)
self.items.clear()
self.books_table.setRowCount(0)
self.books_table.blockSignals(False)
self.items.blockSignals(False)
# search button # search button
def do_search(self): def do_search(self):
if self.no_valid_items:
return
if self.last_search is not None: if self.last_search is not None:
self.gui.search.set_search_string(self.last_search) self.gui.search.set_search_string(self.last_search)
@ -110,6 +135,8 @@ class Quickview(QDialog, Ui_Quickview):
# clicks on the items listWidget # clicks on the items listWidget
def item_selected(self, txt): def item_selected(self, txt):
if self.no_valid_items:
return
self.fill_in_books_box(unicode(txt)) self.fill_in_books_box(unicode(txt))
# Given a cell in the library view, display the information # Given a cell in the library view, display the information
@ -122,6 +149,7 @@ class Quickview(QDialog, Ui_Quickview):
# Only show items for categories # Only show items for categories
if not self.db.field_metadata[key]['is_category']: if not self.db.field_metadata[key]['is_category']:
if self.current_key is None: if self.current_key is None:
self.indicate_no_items()
return return
key = self.current_key key = self.current_key
self.items_label.setText('{0} ({1})'.format( self.items_label.setText('{0} ({1})'.format(
@ -135,6 +163,7 @@ class Quickview(QDialog, Ui_Quickview):
vals = mi.get(key, None) vals = mi.get(key, None)
if vals: if vals:
self.no_valid_items = False
if not isinstance(vals, list): if not isinstance(vals, list):
vals = [vals] vals = [vals]
vals.sort(key=sort_key) vals.sort(key=sort_key)
@ -148,8 +177,19 @@ class Quickview(QDialog, Ui_Quickview):
self.current_key = key self.current_key = key
self.fill_in_books_box(vals[0]) self.fill_in_books_box(vals[0])
else:
self.indicate_no_items()
self.items.blockSignals(False) self.items.blockSignals(False)
def indicate_no_items(self):
print 'no items'
self.no_valid_items = True
self.items.clear()
self.items.addItem(QListWidgetItem(_('**No items found**')))
self.books_label.setText(_('Click in a column in the library view '
'to see the information for that book'))
def fill_in_books_box(self, selected_item): def fill_in_books_box(self, selected_item):
self.current_item = selected_item self.current_item = selected_item
# Do a bit of fix-up on the items so that the search works. # Do a bit of fix-up on the items so that the search works.
@ -163,7 +203,8 @@ class Quickview(QDialog, Ui_Quickview):
self.db.data.search_restriction) self.db.data.search_restriction)
self.books_table.setRowCount(len(books)) self.books_table.setRowCount(len(books))
self.books_label.setText(_('Books with selected item: {0}').format(len(books))) self.books_label.setText(_('Books with selected item "{0}": {1}').
format(selected_item, len(books)))
select_item = None select_item = None
self.books_table.setSortingEnabled(False) self.books_table.setSortingEnabled(False)
@ -185,7 +226,7 @@ class Quickview(QDialog, Ui_Quickview):
series = mi.format_field('series')[1] series = mi.format_field('series')[1]
if series is None: if series is None:
series = '' series = ''
a = TableItem(series, series) a = TableItem(series, mi.series, mi.series_index)
a.setToolTip(tt) a.setToolTip(tt)
self.books_table.setItem(row, 2, a) self.books_table.setItem(row, 2, a)
self.books_table.setRowHeight(row, self.books_table_row_height) self.books_table.setRowHeight(row, self.books_table_row_height)
@ -213,6 +254,8 @@ class Quickview(QDialog, Ui_Quickview):
self.save_state() self.save_state()
def book_doubleclicked(self, row, column): def book_doubleclicked(self, row, column):
if self.no_valid_items:
return
book_id = self.books_table.item(row, 0).data(Qt.UserRole).toInt()[0] book_id = self.books_table.item(row, 0).data(Qt.UserRole).toInt()[0]
self.view.select_rows([book_id]) self.view.select_rows([book_id])
modifiers = int(QApplication.keyboardModifiers()) modifiers = int(QApplication.keyboardModifiers())

View File

@ -57,19 +57,6 @@
</property> </property>
</widget> </widget>
</item> </item>
<item row="2" column="1">
<spacer>
<property name="orientation">
<enum>Qt::Vertical</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>0</width>
<height>0</height>
</size>
</property>
</spacer>
</item>
<item row="3" column="0" colspan="2"> <item row="3" column="0" colspan="2">
<layout class="QHBoxLayout"> <layout class="QHBoxLayout">
<item> <item>

View File

@ -54,7 +54,7 @@ class DBRestore(QDialog):
def reject(self): def reject(self):
self.rejected = True self.rejected = True
self.restorer.progress_callback = lambda x, y: x self.restorer.progress_callback = lambda x, y: x
QDialog.rejecet(self) QDialog.reject(self)
def update(self): def update(self):
if self.restorer.is_alive(): if self.restorer.is_alive():

View File

@ -11,7 +11,7 @@ from PyQt4.Qt import (Qt, QDialog, QDialogButtonBox, QSyntaxHighlighter, QFont,
from calibre.gui2 import error_dialog from calibre.gui2 import error_dialog
from calibre.gui2.dialogs.template_dialog_ui import Ui_TemplateDialog from calibre.gui2.dialogs.template_dialog_ui import Ui_TemplateDialog
from calibre.utils.formatter_functions import formatter_functions from calibre.utils.formatter_functions import formatter_functions
from calibre.ebooks.metadata.book.base import composite_formatter, Metadata from calibre.ebooks.metadata.book.base import SafeFormat, Metadata
from calibre.library.coloring import (displayable_columns) from calibre.library.coloring import (displayable_columns)
@ -270,7 +270,7 @@ class TemplateDialog(QDialog, Ui_TemplateDialog):
self.highlighter.regenerate_paren_positions() self.highlighter.regenerate_paren_positions()
self.text_cursor_changed() self.text_cursor_changed()
self.template_value.setText( self.template_value.setText(
composite_formatter.safe_format(cur_text, self.mi, SafeFormat().safe_format(cur_text, self.mi,
_('EXCEPTION: '), self.mi)) _('EXCEPTION: '), self.mi))
def text_cursor_changed(self): def text_cursor_changed(self):

View File

@ -14,7 +14,7 @@ from PyQt4.Qt import (QAbstractTableModel, Qt, pyqtSignal, QIcon, QImage,
from calibre.gui2 import NONE, UNDEFINED_QDATE from calibre.gui2 import NONE, UNDEFINED_QDATE
from calibre.utils.pyparsing import ParseException from calibre.utils.pyparsing import ParseException
from calibre.ebooks.metadata import fmt_sidx, authors_to_string, string_to_authors from calibre.ebooks.metadata import fmt_sidx, authors_to_string, string_to_authors
from calibre.ebooks.metadata.book.base import composite_formatter from calibre.ebooks.metadata.book.base import SafeFormat
from calibre.ptempfile import PersistentTemporaryFile from calibre.ptempfile import PersistentTemporaryFile
from calibre.utils.config import tweaks, prefs from calibre.utils.config import tweaks, prefs
from calibre.utils.date import dt_factory, qt_to_dt from calibre.utils.date import dt_factory, qt_to_dt
@ -91,6 +91,7 @@ class BooksModel(QAbstractTableModel): # {{{
self.current_highlighted_idx = None self.current_highlighted_idx = None
self.highlight_only = False self.highlight_only = False
self.colors = frozenset([unicode(c) for c in QColor.colorNames()]) self.colors = frozenset([unicode(c) for c in QColor.colorNames()])
self.formatter = SafeFormat()
self.read_config() self.read_config()
def change_alignment(self, colname, alignment): def change_alignment(self, colname, alignment):
@ -711,7 +712,7 @@ class BooksModel(QAbstractTableModel): # {{{
try: try:
if mi is None: if mi is None:
mi = self.db.get_metadata(id_, index_is_id=True) mi = self.db.get_metadata(id_, index_is_id=True)
color = composite_formatter.safe_format(fmt, mi, '', mi) color = self.formatter.safe_format(fmt, mi, '', mi)
if color in self.colors: if color in self.colors:
color = QColor(color) color = QColor(color)
if color.isValid(): if color.isValid():

View File

@ -51,6 +51,9 @@ class BooksView(QTableView): # {{{
def __init__(self, parent, modelcls=BooksModel, use_edit_metadata_dialog=True): def __init__(self, parent, modelcls=BooksModel, use_edit_metadata_dialog=True):
QTableView.__init__(self, parent) QTableView.__init__(self, parent)
if not tweaks['horizontal_scrolling_per_column']:
self.setHorizontalScrollMode(self.ScrollPerPixel)
self.setEditTriggers(self.EditKeyPressed) self.setEditTriggers(self.EditKeyPressed)
if tweaks['doubleclick_on_library_view'] == 'edit_cell': if tweaks['doubleclick_on_library_view'] == 'edit_cell':
self.setEditTriggers(self.DoubleClicked|self.editTriggers()) self.setEditTriggers(self.DoubleClicked|self.editTriggers())
@ -110,6 +113,7 @@ class BooksView(QTableView): # {{{
self.column_header.sectionMoved.connect(self.save_state) self.column_header.sectionMoved.connect(self.save_state)
self.column_header.setContextMenuPolicy(Qt.CustomContextMenu) self.column_header.setContextMenuPolicy(Qt.CustomContextMenu)
self.column_header.customContextMenuRequested.connect(self.show_column_header_context_menu) self.column_header.customContextMenuRequested.connect(self.show_column_header_context_menu)
self.column_header.sectionResized.connect(self.column_resized, Qt.QueuedConnection)
# }}} # }}}
self._model.database_changed.connect(self.database_changed) self._model.database_changed.connect(self.database_changed)
@ -214,6 +218,9 @@ class BooksView(QTableView): # {{{
self.column_header_context_menu.addSeparator() self.column_header_context_menu.addSeparator()
self.column_header_context_menu.addAction(
_('Shrink column if it is too wide to fit'),
partial(self.resize_column_to_fit, column=self.column_map[idx]))
self.column_header_context_menu.addAction( self.column_header_context_menu.addAction(
_('Restore default layout'), _('Restore default layout'),
partial(self.column_header_context_handler, partial(self.column_header_context_handler,
@ -235,13 +242,8 @@ class BooksView(QTableView): # {{{
self.selected_ids = [idc(r) for r in selected_rows] self.selected_ids = [idc(r) for r in selected_rows]
def sorting_done(self, indexc): def sorting_done(self, indexc):
if self.selected_ids: self.select_rows(self.selected_ids, using_ids=True, change_current=True,
indices = [self.model().index(indexc(i), 0) for i in scroll=True)
self.selected_ids]
sm = self.selectionModel()
for idx in indices:
sm.select(idx, sm.Select|sm.Rows)
self.scroll_to_row(indices[0].row())
self.selected_ids = [] self.selected_ids = []
def sort_by_named_field(self, field, order, reset=True): def sort_by_named_field(self, field, order, reset=True):
@ -456,7 +458,9 @@ class BooksView(QTableView): # {{{
traceback.print_exc() traceback.print_exc()
old_state['sort_history'] = sh old_state['sort_history'] = sh
self.column_header.blockSignals(True)
self.apply_state(old_state) self.apply_state(old_state)
self.column_header.blockSignals(False)
# Resize all rows to have the correct height # Resize all rows to have the correct height
if self.model().rowCount(QModelIndex()) > 0: if self.model().rowCount(QModelIndex()) > 0:
@ -465,6 +469,19 @@ class BooksView(QTableView): # {{{
self.was_restored = True self.was_restored = True
def resize_column_to_fit(self, column):
col = self.column_map.index(column)
self.column_resized(col, self.columnWidth(col), self.columnWidth(col))
def column_resized(self, col, old_size, new_size):
# arbitrary: scroll bar + header + some
max_width = self.width() - (self.verticalScrollBar().width() +
self.verticalHeader().width() + 10)
if new_size > max_width:
self.column_header.blockSignals(True)
self.setColumnWidth(col, max_width)
self.column_header.blockSignals(False)
# }}} # }}}
# Initialization/Delegate Setup {{{ # Initialization/Delegate Setup {{{

View File

@ -1092,11 +1092,12 @@ class IdentifiersEdit(QLineEdit): # {{{
for x in parts: for x in parts:
c = x.split(':') c = x.split(':')
if len(c) > 1: if len(c) > 1:
if c[0] == 'isbn': itype = c[0].lower()
if itype == 'isbn':
v = check_isbn(c[1]) v = check_isbn(c[1])
if v is not None: if v is not None:
c[1] = v c[1] = v
ans[c[0]] = c[1] ans[itype] = c[1]
return ans return ans
def fset(self, val): def fset(self, val):
if not val: if not val:
@ -1112,7 +1113,7 @@ class IdentifiersEdit(QLineEdit): # {{{
if v is not None: if v is not None:
val[k] = v val[k] = v
ids = sorted(val.iteritems(), key=keygen) ids = sorted(val.iteritems(), key=keygen)
txt = ', '.join(['%s:%s'%(k, v) for k, v in ids]) txt = ', '.join(['%s:%s'%(k.lower(), v) for k, v in ids])
self.setText(txt.strip()) self.setText(txt.strip())
self.setCursorPosition(0) self.setCursorPosition(0)
return property(fget=fget, fset=fset) return property(fget=fget, fset=fset)

View File

@ -20,6 +20,7 @@ from calibre.ebooks.metadata.sources.covers import download_cover
from calibre.ebooks.metadata.book.base import Metadata from calibre.ebooks.metadata.book.base import Metadata
from calibre.customize.ui import metadata_plugins from calibre.customize.ui import metadata_plugins
from calibre.ptempfile import PersistentTemporaryFile from calibre.ptempfile import PersistentTemporaryFile
from calibre.utils.date import as_utc
# Start download {{{ # Start download {{{
def show_config(gui, parent): def show_config(gui, parent):
@ -124,10 +125,18 @@ def merge_result(oldmi, newmi):
for plugin in metadata_plugins(['identify']): for plugin in metadata_plugins(['identify']):
fields |= plugin.touched_fields fields |= plugin.touched_fields
def is_equal(x, y):
if hasattr(x, 'tzinfo'):
x = as_utc(x)
if hasattr(y, 'tzinfo'):
y = as_utc(y)
return x == y
for f in fields: for f in fields:
# Optimize so that set_metadata does not have to do extra work later # Optimize so that set_metadata does not have to do extra work later
if not f.startswith('identifier:'): if not f.startswith('identifier:'):
if (not newmi.is_null(f) and getattr(newmi, f) == getattr(oldmi, f)): if (not newmi.is_null(f) and is_equal(getattr(newmi, f),
getattr(oldmi, f))):
setattr(newmi, f, getattr(dummy, f)) setattr(newmi, f, getattr(dummy, f))
newmi.last_modified = oldmi.last_modified newmi.last_modified = oldmi.last_modified

View File

@ -254,6 +254,10 @@ class ResultsView(QTableView): # {{{
'<h2>%s</h2>'%book.title, '<h2>%s</h2>'%book.title,
'<div><i>%s</i></div>'%authors_to_string(book.authors), '<div><i>%s</i></div>'%authors_to_string(book.authors),
] ]
if not book.is_null('series'):
series = book.format_field('series')
if series[1]:
parts.append('<div>%s: %s</div>'%series)
if not book.is_null('rating'): if not book.is_null('rating'):
parts.append('<div>%s</div>'%('\u2605'*int(book.rating))) parts.append('<div>%s</div>'%('\u2605'*int(book.rating)))
parts.append('</center>') parts.append('</center>')

View File

@ -127,6 +127,8 @@ class CreateCustomColumn(QDialog, Ui_QCreateCustomColumn):
self.composite_sort_by.setCurrentIndex(sb) self.composite_sort_by.setCurrentIndex(sb)
self.composite_make_category.setChecked( self.composite_make_category.setChecked(
c['display'].get('make_category', False)) c['display'].get('make_category', False))
self.composite_make_category.setChecked(
c['display'].get('contains_html', False))
elif ct == 'enumeration': elif ct == 'enumeration':
self.enum_box.setText(','.join(c['display'].get('enum_values', []))) self.enum_box.setText(','.join(c['display'].get('enum_values', [])))
self.enum_colors.setText(','.join(c['display'].get('enum_colors', []))) self.enum_colors.setText(','.join(c['display'].get('enum_colors', [])))
@ -141,6 +143,21 @@ class CreateCustomColumn(QDialog, Ui_QCreateCustomColumn):
all_colors = [unicode(s) for s in list(QColor.colorNames())] all_colors = [unicode(s) for s in list(QColor.colorNames())]
self.enum_colors_label.setToolTip('<p>' + ', '.join(all_colors) + '</p>') self.enum_colors_label.setToolTip('<p>' + ', '.join(all_colors) + '</p>')
self.composite_contains_html.setToolTip('<p>' +
_('If checked, this column will be displayed as HTML in '
'book details and the content server. This can be used to '
'construct links with the template language. For example, '
'the template '
'<pre>&lt;big&gt;&lt;b&gt;{title}&lt;/b&gt;&lt;/big&gt;'
'{series:| [|}{series_index:| [|]]}</pre>'
'will create a field displaying the title in bold large '
'characters, along with the series, for example <br>"<big><b>'
'An Oblique Approach</b></big> [Belisarius [1]]". The template '
'<pre>&lt;a href="http://www.beam-ebooks.de/ebook/{identifiers'
':select(beam)}"&gt;Beam book&lt;/a&gt;</pre> '
'will generate a link to the book on the Beam ebooks site.')
+ '</p>')
self.exec_() self.exec_()
def shortcut_activated(self, url): def shortcut_activated(self, url):
@ -179,7 +196,7 @@ class CreateCustomColumn(QDialog, Ui_QCreateCustomColumn):
getattr(self, 'date_format_'+x).setVisible(col_type == 'datetime') getattr(self, 'date_format_'+x).setVisible(col_type == 'datetime')
getattr(self, 'number_format_'+x).setVisible(col_type in ['int', 'float']) getattr(self, 'number_format_'+x).setVisible(col_type in ['int', 'float'])
for x in ('box', 'default_label', 'label', 'sort_by', 'sort_by_label', for x in ('box', 'default_label', 'label', 'sort_by', 'sort_by_label',
'make_category'): 'make_category', 'contains_html'):
getattr(self, 'composite_'+x).setVisible(col_type in ['composite', '*composite']) getattr(self, 'composite_'+x).setVisible(col_type in ['composite', '*composite'])
for x in ('box', 'default_label', 'label', 'colors', 'colors_label'): for x in ('box', 'default_label', 'label', 'colors', 'colors_label'):
getattr(self, 'enum_'+x).setVisible(col_type == 'enumeration') getattr(self, 'enum_'+x).setVisible(col_type == 'enumeration')
@ -257,6 +274,7 @@ class CreateCustomColumn(QDialog, Ui_QCreateCustomColumn):
'composite_sort': ['text', 'number', 'date', 'bool'] 'composite_sort': ['text', 'number', 'date', 'bool']
[self.composite_sort_by.currentIndex()], [self.composite_sort_by.currentIndex()],
'make_category': self.composite_make_category.isChecked(), 'make_category': self.composite_make_category.isChecked(),
'contains_html': self.composite_contains_html.isChecked(),
} }
elif col_type == 'enumeration': elif col_type == 'enumeration':
if not unicode(self.enum_box.text()).strip(): if not unicode(self.enum_box.text()).strip():

View File

@ -294,6 +294,13 @@ and end with &lt;code&gt;}&lt;/code&gt; You can have text before and after the f
</property> </property>
</widget> </widget>
</item> </item>
<item>
<widget class="QCheckBox" name="composite_contains_html">
<property name="text">
<string>Show as HTML in book details</string>
</property>
</widget>
</item>
<item> <item>
<spacer name="horizontalSpacer_24"> <spacer name="horizontalSpacer_24">
<property name="sizePolicy"> <property name="sizePolicy">

View File

@ -138,6 +138,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
(_('Partitioned'), 'partition')] (_('Partitioned'), 'partition')]
r('tags_browser_partition_method', gprefs, choices=choices) r('tags_browser_partition_method', gprefs, choices=choices)
r('tags_browser_collapse_at', gprefs) r('tags_browser_collapse_at', gprefs)
r('default_author_link', gprefs)
choices = set([k for k in db.field_metadata.all_field_keys() choices = set([k for k in db.field_metadata.all_field_keys()
if db.field_metadata[k]['is_category'] and if db.field_metadata[k]['is_category'] and

View File

@ -192,7 +192,7 @@
<string>Book Details</string> <string>Book Details</string>
</attribute> </attribute>
<layout class="QGridLayout" name="gridLayout_12"> <layout class="QGridLayout" name="gridLayout_12">
<item row="0" column="0" rowspan="2"> <item row="1" column="0" rowspan="2">
<widget class="QGroupBox" name="groupBox"> <widget class="QGroupBox" name="groupBox">
<property name="title"> <property name="title">
<string>Select displayed metadata</string> <string>Select displayed metadata</string>
@ -243,6 +243,31 @@
</layout> </layout>
</widget> </widget>
</item> </item>
<item row="0" column="0">
<layout class="QHBoxLayout">
<item>
<widget class="QLabel" name="label">
<property name="text">
<string>Default author link template:</string>
</property>
<property name="buddy">
<cstring>opt_default_author_link</cstring>
</property>
</widget>
</item>
<item>
<widget class="QLineEdit" name="opt_default_author_link">
<property name="toolTip">
<string>&lt;p&gt;Enter a template to be used to create a link for
an author in the books information dialog. This template will
be used when no link has been provided for the author using
Manage Authors. You can use the values {author} and
{author_sort}, and any template function.</string>
</property>
</widget>
</item>
</layout>
</item>
<item row="0" column="1"> <item row="0" column="1">
<widget class="QCheckBox" name="opt_use_roman_numerals_for_series_number"> <widget class="QCheckBox" name="opt_use_roman_numerals_for_series_number">
<property name="text"> <property name="text">

View File

@ -357,7 +357,6 @@ class Preferences(QMainWindow):
bytearray(self.saveGeometry())) bytearray(self.saveGeometry()))
if self.committed: if self.committed:
self.gui.must_restart_before_config = self.must_restart self.gui.must_restart_before_config = self.must_restart
self.gui.tags_view.set_new_model() # in case columns changed
self.gui.tags_view.recount() self.gui.tags_view.recount()
self.gui.create_device_menu() self.gui.create_device_menu()
self.gui.set_device_menu_items_state(bool(self.gui.device_connected)) self.gui.set_device_menu_items_state(bool(self.gui.device_connected))

View File

@ -17,12 +17,13 @@ from calibre.gui2.preferences.metadata_sources_ui import Ui_Form
from calibre.ebooks.metadata.sources.base import msprefs from calibre.ebooks.metadata.sources.base import msprefs
from calibre.customize.ui import (all_metadata_plugins, is_disabled, from calibre.customize.ui import (all_metadata_plugins, is_disabled,
enable_plugin, disable_plugin, default_disabled_plugins) enable_plugin, disable_plugin, default_disabled_plugins)
from calibre.gui2 import NONE, error_dialog from calibre.gui2 import NONE, error_dialog, question_dialog
class SourcesModel(QAbstractTableModel): # {{{ class SourcesModel(QAbstractTableModel): # {{{
def __init__(self, parent=None): def __init__(self, parent=None):
QAbstractTableModel.__init__(self, parent) QAbstractTableModel.__init__(self, parent)
self.gui_parent = parent
self.plugins = [] self.plugins = []
self.enabled_overrides = {} self.enabled_overrides = {}
@ -87,6 +88,15 @@ class SourcesModel(QAbstractTableModel): # {{{
if col == 0 and role == Qt.CheckStateRole: if col == 0 and role == Qt.CheckStateRole:
val, ok = val.toInt() val, ok = val.toInt()
if ok: if ok:
if val == Qt.Checked and 'Douban' in plugin.name:
if not question_dialog(self.gui_parent,
_('Are you sure?'), '<p>'+
_('This plugin is useful only for <b>Chinese</b>'
' language books. It can return incorrect'
' results for books in English. Are you'
' sure you want to enable it?'),
show_copy_button=False):
return ret
self.enabled_overrides[plugin] = val self.enabled_overrides[plugin] = val
ret = True ret = True
if col == 1 and role == Qt.EditRole: if col == 1 and role == Qt.EditRole:

View File

@ -31,7 +31,7 @@ class SaveTemplate(QWidget, Ui_Form):
(var, FORMAT_ARG_DESCS[var])) (var, FORMAT_ARG_DESCS[var]))
rows.append(u'<tr><td>%s&nbsp;</td><td>&nbsp;</td><td>%s</td></tr>'%( rows.append(u'<tr><td>%s&nbsp;</td><td>&nbsp;</td><td>%s</td></tr>'%(
_('Any custom field'), _('Any custom field'),
_('The lookup name of any custom field. These names begin with "#")'))) _('The lookup name of any custom field (these names begin with "#").')))
table = u'<table>%s</table>'%(u'\n'.join(rows)) table = u'<table>%s</table>'%(u'\n'.join(rows))
self.template_variables.setText(table) self.template_variables.setText(table)

View File

@ -173,7 +173,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
def refresh_gui(self, gui): def refresh_gui(self, gui):
gui.set_highlight_only_button_icon() gui.set_highlight_only_button_icon()
if self.muc_changed: if self.muc_changed:
gui.tags_view.set_new_model() gui.tags_view.recount()
gui.search.search_as_you_type(config['search_as_you_type']) gui.search.search_as_you_type(config['search_as_you_type'])
gui.search.do_search() gui.search.do_search()

View File

@ -126,7 +126,7 @@ class Matches(QAbstractItemModel):
elif role == Qt.ToolTipRole: elif role == Qt.ToolTipRole:
if col == 0: if col == 0:
if is_disabled(result): if is_disabled(result):
return QVariant('<p>' + _('This store is currently diabled and cannot be used in other parts of calibre.') + '</p>') return QVariant('<p>' + _('This store is currently disabled and cannot be used in other parts of calibre.') + '</p>')
else: else:
return QVariant('<p>' + _('This store is currently enabled and can be used in other parts of calibre.') + '</p>') return QVariant('<p>' + _('This store is currently enabled and can be used in other parts of calibre.') + '</p>')
elif col == 1: elif col == 1:

View File

@ -1,78 +0,0 @@
# -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function)
__license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import urllib
from contextlib import closing
from lxml import html
from PyQt4.Qt import QUrl
from calibre import browser, url_slash_cleaner
from calibre.gui2 import open_url
from calibre.gui2.store import StorePlugin
from calibre.gui2.store.basic_config import BasicStoreConfig
from calibre.gui2.store.search_result import SearchResult
from calibre.gui2.store.web_store_dialog import WebStoreDialog
class EpubBudStore(BasicStoreConfig, StorePlugin):
def open(self, parent=None, detail_item=None, external=False):
url = 'http://epubbud.com/'
if external or self.config.get('open_external', False):
open_url(QUrl(url_slash_cleaner(detail_item if detail_item else url)))
else:
d = WebStoreDialog(self.gui, url, parent, detail_item)
d.setWindowTitle(self.name)
d.set_tags(self.config.get('tags', ''))
d.exec_()
def search(self, query, max_results=10, timeout=60):
'''
OPDS based search.
We really should get the catelog from http://pragprog.com/catalog.opds
and look for the application/opensearchdescription+xml entry.
Then get the opensearch description to get the search url and
format. However, we are going to be lazy and hard code it.
'''
url = 'http://www.epubbud.com/search.php?format=atom&q=' + urllib.quote_plus(query)
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
# Use html instead of etree as html allows us
# to ignore the namespace easily.
doc = html.fromstring(f.read())
for data in doc.xpath('//entry'):
if counter <= 0:
break
id = ''.join(data.xpath('.//id/text()'))
if not id:
continue
cover_url = ''.join(data.xpath('.//link[@rel="http://opds-spec.org/thumbnail"]/@href'))
title = u''.join(data.xpath('.//title/text()'))
author = u''.join(data.xpath('.//author/name/text()'))
counter -= 1
s = SearchResult()
s.cover_url = cover_url
s.title = title.strip()
s.author = author.strip()
s.price = '$0.00'
s.detail_item = id.strip()
s.drm = SearchResult.DRM_UNLOCKED
s.formats = 'EPUB'
yield s

View File

@ -1,106 +0,0 @@
# -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function)
__license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import urllib2
from contextlib import closing
from lxml import html
from PyQt4.Qt import QUrl
from calibre import browser, url_slash_cleaner
from calibre.gui2 import open_url
from calibre.gui2.store import StorePlugin
from calibre.gui2.store.basic_config import BasicStoreConfig
from calibre.gui2.store.search_result import SearchResult
from calibre.gui2.store.web_store_dialog import WebStoreDialog
class FeedbooksStore(BasicStoreConfig, StorePlugin):
def open(self, parent=None, detail_item=None, external=False):
url = 'http://m.feedbooks.com/'
ext_url = 'http://feedbooks.com/'
if external or self.config.get('open_external', False):
if detail_item:
ext_url = ext_url + detail_item
open_url(QUrl(url_slash_cleaner(ext_url)))
else:
detail_url = None
if detail_item:
detail_url = url + detail_item
d = WebStoreDialog(self.gui, url, parent, detail_url)
d.setWindowTitle(self.name)
d.set_tags(self.config.get('tags', ''))
d.exec_()
def search(self, query, max_results=10, timeout=60):
url = 'http://m.feedbooks.com/search?query=' + urllib2.quote(query)
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read())
for data in doc.xpath('//ul[@class="m-list"]//li'):
if counter <= 0:
break
data = html.fromstring(html.tostring(data))
id = ''
id_a = data.xpath('//a[@class="buy"]')
if id_a:
id = id_a[0].get('href', None)
id = id.split('/')[-2]
id = '/item/' + id
else:
id_a = data.xpath('//a[@class="download"]')
if id_a:
id = id_a[0].get('href', None)
id = id.split('/')[-1]
id = id.split('.')[0]
id = '/book/' + id
if not id:
continue
title = ''.join(data.xpath('//h5//a/text()'))
author = ''.join(data.xpath('//h6//a/text()'))
price = ''.join(data.xpath('//a[@class="buy"]/text()'))
formats = 'EPUB'
if not price:
price = '$0.00'
formats = 'EPUB, MOBI, PDF'
cover_url = ''
cover_url_img = data.xpath('//img')
if cover_url_img:
cover_url = cover_url_img[0].get('src')
cover_url.split('?')[0]
counter -= 1
s = SearchResult()
s.cover_url = cover_url
s.title = title.strip()
s.author = author.strip()
s.price = price.replace(' ', '').strip()
s.detail_item = id.strip()
s.formats = formats
yield s
def get_details(self, search_result, timeout):
url = 'http://m.feedbooks.com/'
br = browser()
with closing(br.open(url_slash_cleaner(url + search_result.detail_item), timeout=timeout)) as nf:
idata = html.fromstring(nf.read())
if idata.xpath('boolean(//div[contains(@class, "m-description-long")]//p[contains(., "DRM") or contains(b, "Protection")])'):
search_result.drm = SearchResult.DRM_LOCKED
else:
search_result.drm = SearchResult.DRM_UNLOCKED
return True

View File

@ -1,92 +0,0 @@
# -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function)
__license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import urllib
from contextlib import closing
from lxml import html
from PyQt4.Qt import QUrl
from calibre import browser, url_slash_cleaner
from calibre.gui2 import open_url
from calibre.gui2.store import StorePlugin
from calibre.gui2.store.basic_config import BasicStoreConfig
from calibre.gui2.store.search_result import SearchResult
from calibre.gui2.store.web_store_dialog import WebStoreDialog
class GutenbergStore(BasicStoreConfig, StorePlugin):
def open(self, parent=None, detail_item=None, external=False):
url = 'http://m.gutenberg.org/'
ext_url = 'http://gutenberg.org/'
if external or self.config.get('open_external', False):
if detail_item:
ext_url = ext_url + detail_item
open_url(QUrl(url_slash_cleaner(ext_url)))
else:
detail_url = None
if detail_item:
detail_url = url + detail_item
d = WebStoreDialog(self.gui, url, parent, detail_url)
d.setWindowTitle(self.name)
d.set_tags(self.config.get('tags', ''))
d.exec_()
def search(self, query, max_results=10, timeout=60):
# Gutenberg's website does not allow searching both author and title.
# Using a google search so we can search on both fields at once.
url = 'http://www.google.com/xhtml?q=site:gutenberg.org+' + urllib.quote_plus(query)
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read())
for data in doc.xpath('//div[@class="edewpi"]//div[@class="r ld"]'):
if counter <= 0:
break
url = ''
url_a = data.xpath('div[@class="jd"]/a')
if url_a:
url_a = url_a[0]
url = url_a.get('href', None)
if url:
url = url.split('u=')[-1].split('&')[0]
if '/ebooks/' not in url:
continue
id = url.split('/')[-1]
url_a = html.fromstring(html.tostring(url_a))
heading = ''.join(url_a.xpath('//text()'))
title, _, author = heading.rpartition('by ')
author = author.split('-')[0]
price = '$0.00'
counter -= 1
s = SearchResult()
s.cover_url = ''
s.title = title.strip()
s.author = author.strip()
s.price = price.strip()
s.detail_item = '/ebooks/' + id.strip()
s.drm = SearchResult.DRM_UNLOCKED
yield s
def get_details(self, search_result, timeout):
url = 'http://m.gutenberg.org/'
br = browser()
with closing(br.open(url + search_result.detail_item, timeout=timeout)) as nf:
idata = html.fromstring(nf.read())
search_result.formats = ', '.join(idata.xpath('//a[@type!="application/atom+xml"]//span[@class="title"]/text()'))
return True

View File

@ -1,95 +0,0 @@
# -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function)
__license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import re
import urllib
from contextlib import closing
from lxml import html
from PyQt4.Qt import QUrl
from calibre import browser, url_slash_cleaner
from calibre.gui2 import open_url
from calibre.gui2.store import StorePlugin
from calibre.gui2.store.basic_config import BasicStoreConfig
from calibre.gui2.store.search_result import SearchResult
from calibre.gui2.store.web_store_dialog import WebStoreDialog
class ManyBooksStore(BasicStoreConfig, StorePlugin):
def open(self, parent=None, detail_item=None, external=False):
url = 'http://manybooks.net/'
detail_url = None
if detail_item:
detail_url = url + detail_item
if external or self.config.get('open_external', False):
open_url(QUrl(url_slash_cleaner(detail_url if detail_url else url)))
else:
d = WebStoreDialog(self.gui, url, parent, detail_url)
d.setWindowTitle(self.name)
d.set_tags(self.config.get('tags', ''))
d.exec_()
def search(self, query, max_results=10, timeout=60):
# ManyBooks website separates results for title and author.
# It also doesn't do a clear job of references authors and
# secondary titles. Google is also faster.
# Using a google search so we can search on both fields at once.
url = 'http://www.google.com/xhtml?q=site:manybooks.net+' + urllib.quote_plus(query)
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read())
for data in doc.xpath('//div[@class="edewpi"]//div[@class="r ld"]'):
if counter <= 0:
break
url = ''
url_a = data.xpath('div[@class="jd"]/a')
if url_a:
url_a = url_a[0]
url = url_a.get('href', None)
if url:
url = url.split('u=')[-1][:-2]
if '/titles/' not in url:
continue
id = url.split('/')[-1]
id = id.strip()
url_a = html.fromstring(html.tostring(url_a))
heading = ''.join(url_a.xpath('//text()'))
title, _, author = heading.rpartition('by ')
author = author.split('-')[0]
price = '$0.00'
cover_url = ''
mo = re.match('^\D+', id)
if mo:
cover_name = mo.group()
cover_name = cover_name.replace('etext', '')
cover_id = id.split('.')[0]
cover_url = 'http://www.manybooks.net/images/' + id[0] + '/' + cover_name + '/' + cover_id + '-thumb.jpg'
print(cover_url)
counter -= 1
s = SearchResult()
s.cover_url = cover_url
s.title = title.strip()
s.author = author.strip()
s.price = price.strip()
s.detail_item = '/titles/' + id
s.drm = SearchResult.DRM_UNLOCKED
s.formts = 'EPUB, PDB (eReader, PalmDoc, zTXT, Plucker, iSilo), FB2, ZIP, AZW, MOBI, PRC, LIT, PKG, PDF, TXT, RB, RTF, LRF, TCR, JAR'
yield s

View File

@ -1,84 +0,0 @@
# -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function)
__license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import urllib2
from contextlib import closing
from lxml import html
from PyQt4.Qt import QUrl
from calibre import browser, url_slash_cleaner
from calibre.gui2 import open_url
from calibre.gui2.store import StorePlugin
from calibre.gui2.store.basic_config import BasicStoreConfig
from calibre.gui2.store.search_result import SearchResult
from calibre.gui2.store.web_store_dialog import WebStoreDialog
class OpenLibraryStore(BasicStoreConfig, StorePlugin):
def open(self, parent=None, detail_item=None, external=False):
url = 'http://openlibrary.org/'
if external or self.config.get('open_external', False):
if detail_item:
url = url + detail_item
open_url(QUrl(url_slash_cleaner(url)))
else:
detail_url = None
if detail_item:
detail_url = url + detail_item
d = WebStoreDialog(self.gui, url, parent, detail_url)
d.setWindowTitle(self.name)
d.set_tags(self.config.get('tags', ''))
d.exec_()
def search(self, query, max_results=10, timeout=60):
url = 'http://openlibrary.org/search?q=' + urllib2.quote(query) + '&has_fulltext=true'
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read())
for data in doc.xpath('//div[@id="searchResults"]/ul[@id="siteSearch"]/li'):
if counter <= 0:
break
# Don't include books that don't have downloadable files.
if not data.xpath('boolean(./span[@class="actions"]//span[@class="label" and contains(text(), "Read")])'):
continue
id = ''.join(data.xpath('./span[@class="bookcover"]/a/@href'))
if not id:
continue
cover_url = ''.join(data.xpath('./span[@class="bookcover"]/a/img/@src'))
title = ''.join(data.xpath('.//h3[@class="booktitle"]/a[@class="results"]/text()'))
author = ''.join(data.xpath('.//span[@class="bookauthor"]/a/text()'))
price = '$0.00'
counter -= 1
s = SearchResult()
s.cover_url = cover_url
s.title = title.strip()
s.author = author.strip()
s.price = price
s.detail_item = id.strip()
s.drm = SearchResult.DRM_UNLOCKED
yield s
def get_details(self, search_result, timeout):
url = 'http://openlibrary.org/'
br = browser()
with closing(br.open(url_slash_cleaner(url + search_result.detail_item), timeout=timeout)) as nf:
idata = html.fromstring(nf.read())
search_result.formats = ', '.join(list(set(idata.xpath('//a[contains(@title, "Download")]/text()'))))
return True

View File

@ -0,0 +1,100 @@
# -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function)
__license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import mimetypes
from contextlib import closing
from lxml import etree
from PyQt4.Qt import QUrl
from calibre import browser
from calibre.gui2 import open_url
from calibre.gui2.store import StorePlugin
from calibre.gui2.store.search_result import SearchResult
from calibre.gui2.store.web_store_dialog import WebStoreDialog
from calibre.utils.opensearch.description import Description
from calibre.utils.opensearch.query import Query
class OpenSearchOPDSStore(StorePlugin):
open_search_url = ''
web_url = ''
def open(self, parent=None, detail_item=None, external=False):
if not hasattr(self, 'web_url'):
return
if external or self.config.get('open_external', False):
open_url(QUrl(detail_item if detail_item else self.web_url))
else:
d = WebStoreDialog(self.gui, self.web_url, parent, detail_item)
d.setWindowTitle(self.name)
d.set_tags(self.config.get('tags', ''))
d.exec_()
def search(self, query, max_results=10, timeout=60):
if not hasattr(self, 'open_search_url'):
return
description = Description(self.open_search_url)
url_template = description.get_best_template()
if not url_template:
return
oquery = Query(url_template)
# set up initial values
oquery.searchTerms = query
oquery.count = max_results
url = oquery.url()
counter = max_results
br = browser()
with closing(br.open(url, timeout=timeout)) as f:
doc = etree.fromstring(f.read())
for data in doc.xpath('//*[local-name() = "entry"]'):
if counter <= 0:
break
counter -= 1
s = SearchResult()
s.detail_item = ''.join(data.xpath('./*[local-name() = "id"]/text()')).strip()
for link in data.xpath('./*[local-name() = "link"]'):
rel = link.get('rel')
href = link.get('href')
type = link.get('type')
if rel and href and type:
if rel in ('http://opds-spec.org/thumbnail', 'http://opds-spec.org/image/thumbnail'):
s.cover_url = href
elif rel == u'http://opds-spec.org/acquisition/buy':
s.detail_item = href
elif rel == u'http://opds-spec.org/acquisition':
if type:
ext = mimetypes.guess_extension(type)
if ext:
ext = ext[1:].upper().strip()
s.downloads[ext] = href
s.formats = ', '.join(s.downloads.keys()).strip()
s.title = ' '.join(data.xpath('./*[local-name() = "title"]//text()')).strip()
s.author = ', '.join(data.xpath('./*[local-name() = "author"]//*[local-name() = "name"]//text()')).strip()
price_e = data.xpath('.//*[local-name() = "price"][1]')
if price_e:
price_e = price_e[0]
currency_code = price_e.get('currencycode', '')
price = ''.join(price_e.xpath('.//text()')).strip()
s.price = currency_code + ' ' + price
s.price = s.price.strip()
yield s

View File

@ -1,84 +0,0 @@
# -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function)
__license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import urllib
from contextlib import closing
from lxml import html
from PyQt4.Qt import QUrl
from calibre import browser, url_slash_cleaner
from calibre.gui2 import open_url
from calibre.gui2.store import StorePlugin
from calibre.gui2.store.basic_config import BasicStoreConfig
from calibre.gui2.store.search_result import SearchResult
from calibre.gui2.store.web_store_dialog import WebStoreDialog
class PragmaticBookshelfStore(BasicStoreConfig, StorePlugin):
def open(self, parent=None, detail_item=None, external=False):
url = 'http://pragprog.com/'
if external or self.config.get('open_external', False):
open_url(QUrl(url_slash_cleaner(detail_item if detail_item else url)))
else:
d = WebStoreDialog(self.gui, url, parent, detail_item)
d.setWindowTitle(self.name)
d.set_tags(self.config.get('tags', ''))
d.exec_()
def search(self, query, max_results=10, timeout=60):
'''
OPDS based search.
We really should get the catelog from http://pragprog.com/catalog.opds
and look for the application/opensearchdescription+xml entry.
Then get the opensearch description to get the search url and
format. However, we are going to be lazy and hard code it.
'''
url = 'http://pragprog.com/catalog/search?q=' + urllib.quote_plus(query)
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
# Use html instead of etree as html allows us
# to ignore the namespace easily.
doc = html.fromstring(f.read())
for data in doc.xpath('//entry'):
if counter <= 0:
break
id = ''.join(data.xpath('.//link[@rel="http://opds-spec.org/acquisition/buy"]/@href'))
if not id:
continue
price = ''.join(data.xpath('.//price/@currencycode')).strip()
price += ' '
price += ''.join(data.xpath('.//price/text()')).strip()
if not price.strip():
continue
cover_url = ''.join(data.xpath('.//link[@rel="http://opds-spec.org/cover"]/@href'))
title = ''.join(data.xpath('.//title/text()'))
author = ''.join(data.xpath('.//author//text()'))
counter -= 1
s = SearchResult()
s.cover_url = cover_url
s.title = title.strip()
s.author = author.strip()
s.price = price.strip()
s.detail_item = id.strip()
s.drm = SearchResult.DRM_UNLOCKED
s.formats = 'EPUB, PDF, MOBI'
yield s

View File

@ -45,6 +45,7 @@ class AdvSearchBuilderDialog(QDialog, Ui_Dialog):
self.author_box.setText('') self.author_box.setText('')
self.price_box.setText('') self.price_box.setText('')
self.format_box.setText('') self.format_box.setText('')
self.download_combo.setCurrentIndex(0)
self.affiliate_combo.setCurrentIndex(0) self.affiliate_combo.setCurrentIndex(0)
def tokens(self, raw): def tokens(self, raw):
@ -119,6 +120,9 @@ class AdvSearchBuilderDialog(QDialog, Ui_Dialog):
format = unicode(self.format_box.text()).strip() format = unicode(self.format_box.text()).strip()
if format: if format:
ans.append('format:"' + self.mc + format + '"') ans.append('format:"' + self.mc + format + '"')
download = unicode(self.download_combo.currentText()).strip()
if download:
ans.append('download:' + download)
affiliate = unicode(self.affiliate_combo.currentText()).strip() affiliate = unicode(self.affiliate_combo.currentText()).strip()
if affiliate: if affiliate:
ans.append('affiliate:' + affiliate) ans.append('affiliate:' + affiliate)

View File

@ -226,7 +226,7 @@
</property> </property>
</widget> </widget>
</item> </item>
<item row="7" column="0" colspan="2"> <item row="8" column="0" colspan="2">
<layout class="QHBoxLayout" name="horizontalLayout_6"> <layout class="QHBoxLayout" name="horizontalLayout_6">
<item> <item>
<widget class="QPushButton" name="clear_button"> <widget class="QPushButton" name="clear_button">
@ -244,7 +244,7 @@
</item> </item>
</layout> </layout>
</item> </item>
<item row="6" column="1"> <item row="7" column="1">
<spacer name="verticalSpacer"> <spacer name="verticalSpacer">
<property name="orientation"> <property name="orientation">
<enum>Qt::Vertical</enum> <enum>Qt::Vertical</enum>
@ -283,14 +283,14 @@
<item row="3" column="1"> <item row="3" column="1">
<widget class="EnLineEdit" name="price_box"/> <widget class="EnLineEdit" name="price_box"/>
</item> </item>
<item row="5" column="0"> <item row="6" column="0">
<widget class="QLabel" name="label_9"> <widget class="QLabel" name="label_9">
<property name="text"> <property name="text">
<string>Affiliate:</string> <string>Affiliate:</string>
</property> </property>
</widget> </widget>
</item> </item>
<item row="5" column="1"> <item row="6" column="1">
<widget class="QComboBox" name="affiliate_combo"> <widget class="QComboBox" name="affiliate_combo">
<item> <item>
<property name="text"> <property name="text">
@ -309,6 +309,32 @@
</item> </item>
</widget> </widget>
</item> </item>
<item row="5" column="0">
<widget class="QLabel" name="label_12">
<property name="text">
<string>Download:</string>
</property>
</widget>
</item>
<item row="5" column="1">
<widget class="QComboBox" name="download_combo">
<item>
<property name="text">
<string/>
</property>
</item>
<item>
<property name="text">
<string>true</string>
</property>
</item>
<item>
<property name="text">
<string>false</string>
</property>
</item>
</widget>
</item>
</layout> </layout>
</widget> </widget>
</widget> </widget>

View File

@ -22,6 +22,7 @@ from calibre.utils.icu import sort_key
from calibre.utils.search_query_parser import SearchQueryParser from calibre.utils.search_query_parser import SearchQueryParser
def comparable_price(text): def comparable_price(text):
text = re.sub(r'[^0-9.,]', '', text)
if len(text) < 3 or text[-3] not in ('.', ','): if len(text) < 3 or text[-3] not in ('.', ','):
text += '00' text += '00'
text = re.sub(r'\D', '', text) text = re.sub(r'\D', '', text)
@ -33,7 +34,7 @@ class Matches(QAbstractItemModel):
total_changed = pyqtSignal(int) total_changed = pyqtSignal(int)
HEADERS = [_('Cover'), _('Title'), _('Price'), _('DRM'), _('Store'), ''] HEADERS = [_('Cover'), _('Title'), _('Price'), _('DRM'), _('Store'), _('Download'), _('Affiliate')]
HTML_COLS = (1, 4) HTML_COLS = (1, 4)
def __init__(self, cover_thread_count=2, detail_thread_count=4): def __init__(self, cover_thread_count=2, detail_thread_count=4):
@ -47,6 +48,8 @@ class Matches(QAbstractItemModel):
Qt.SmoothTransformation) Qt.SmoothTransformation)
self.DONATE_ICON = QPixmap(I('donate.png')).scaledToHeight(16, self.DONATE_ICON = QPixmap(I('donate.png')).scaledToHeight(16,
Qt.SmoothTransformation) Qt.SmoothTransformation)
self.DOWNLOAD_ICON = QPixmap(I('arrow-down.png')).scaledToHeight(16,
Qt.SmoothTransformation)
# All matches. Used to determine the order to display # All matches. Used to determine the order to display
# self.matches because the SearchFilter returns # self.matches because the SearchFilter returns
@ -181,9 +184,11 @@ class Matches(QAbstractItemModel):
elif result.drm == SearchResult.DRM_UNKNOWN: elif result.drm == SearchResult.DRM_UNKNOWN:
return QVariant(self.DRM_UNKNOWN_ICON) return QVariant(self.DRM_UNKNOWN_ICON)
if col == 5: if col == 5:
if result.downloads:
return QVariant(self.DOWNLOAD_ICON)
if col == 6:
if result.affiliate: if result.affiliate:
return QVariant(self.DONATE_ICON) return QVariant(self.DONATE_ICON)
return NONE
elif role == Qt.ToolTipRole: elif role == Qt.ToolTipRole:
if col == 1: if col == 1:
return QVariant('<p>%s</p>' % result.title) return QVariant('<p>%s</p>' % result.title)
@ -199,6 +204,9 @@ class Matches(QAbstractItemModel):
elif col == 4: elif col == 4:
return QVariant('<p>%s</p>' % result.formats) return QVariant('<p>%s</p>' % result.formats)
elif col == 5: elif col == 5:
if result.downloads:
return QVariant('<p>' + _('The following formats can be downloaded directly: %s.') % ', '.join(result.downloads.keys()) + '</p>')
elif col == 6:
if result.affiliate: if result.affiliate:
return QVariant('<p>' + _('Buying from this store supports the calibre developer: %s.') % result.plugin_author + '</p>') return QVariant('<p>' + _('Buying from this store supports the calibre developer: %s.') % result.plugin_author + '</p>')
elif role == Qt.SizeHintRole: elif role == Qt.SizeHintRole:
@ -221,6 +229,11 @@ class Matches(QAbstractItemModel):
elif col == 4: elif col == 4:
text = result.store_name text = result.store_name
elif col == 5: elif col == 5:
if result.downloads:
text = 'a'
else:
text = 'b'
elif col == 6:
if result.affiliate: if result.affiliate:
text = 'a' text = 'a'
else: else:
@ -257,6 +270,8 @@ class SearchFilter(SearchQueryParser):
'author', 'author',
'authors', 'authors',
'cover', 'cover',
'download',
'downloads',
'drm', 'drm',
'format', 'format',
'formats', 'formats',
@ -279,9 +294,12 @@ class SearchFilter(SearchQueryParser):
return self.srs return self.srs
def get_matches(self, location, query): def get_matches(self, location, query):
query = query.strip()
location = location.lower().strip() location = location.lower().strip()
if location == 'authors': if location == 'authors':
location = 'author' location = 'author'
elif location == 'downloads':
location = 'download'
elif location == 'formats': elif location == 'formats':
location = 'format' location = 'format'
@ -308,12 +326,13 @@ class SearchFilter(SearchQueryParser):
'author': lambda x: x.author.lower(), 'author': lambda x: x.author.lower(),
'cover': attrgetter('cover_url'), 'cover': attrgetter('cover_url'),
'drm': attrgetter('drm'), 'drm': attrgetter('drm'),
'download': attrgetter('downloads'),
'format': attrgetter('formats'), 'format': attrgetter('formats'),
'price': lambda x: comparable_price(x.price), 'price': lambda x: comparable_price(x.price),
'store': lambda x: x.store_name.lower(), 'store': lambda x: x.store_name.lower(),
'title': lambda x: x.title.lower(), 'title': lambda x: x.title.lower(),
} }
for x in ('author', 'format'): for x in ('author', 'download', 'format'):
q[x+'s'] = q[x] q[x+'s'] = q[x]
for sr in self.srs: for sr in self.srs:
for locvalue in locations: for locvalue in locations:
@ -347,7 +366,7 @@ class SearchFilter(SearchQueryParser):
matches.add(sr) matches.add(sr)
continue continue
# this is bool or treated as bool, so can't match below. # this is bool or treated as bool, so can't match below.
if locvalue in ('affiliate', 'drm'): if locvalue in ('affiliate', 'drm', 'download', 'downloads'):
continue continue
try: try:
### Can't separate authors because comma is used for name sep and author sep ### Can't separate authors because comma is used for name sep and author sep

View File

@ -6,13 +6,18 @@ __license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>' __copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
from PyQt4.Qt import (QTreeView) from functools import partial
from PyQt4.Qt import (pyqtSignal, QMenu, QTreeView)
from calibre.gui2.metadata.single_download import RichTextDelegate from calibre.gui2.metadata.single_download import RichTextDelegate
from calibre.gui2.store.search.models import Matches from calibre.gui2.store.search.models import Matches
class ResultsView(QTreeView): class ResultsView(QTreeView):
download_requested = pyqtSignal(object)
open_requested = pyqtSignal(object)
def __init__(self, *args): def __init__(self, *args):
QTreeView.__init__(self,*args) QTreeView.__init__(self,*args)
@ -24,3 +29,18 @@ class ResultsView(QTreeView):
for i in self._model.HTML_COLS: for i in self._model.HTML_COLS:
self.setItemDelegateForColumn(i, self.rt_delegate) self.setItemDelegateForColumn(i, self.rt_delegate)
def contextMenuEvent(self, event):
index = self.indexAt(event.pos())
if not index.isValid():
return
result = self.model().get_result(index)
menu = QMenu()
da = menu.addAction(_('Download...'), partial(self.download_requested.emit, result))
if not result.downloads:
da.setEnabled(False)
menu.addSeparator()
menu.addAction(_('Goto in store...'), partial(self.open_requested.emit, result))
menu.exec_(event.globalPos())

View File

@ -14,6 +14,7 @@ from PyQt4.Qt import (Qt, QDialog, QDialogButtonBox, QTimer, QCheckBox, QLabel,
QComboBox) QComboBox)
from calibre.gui2 import JSONConfig, info_dialog from calibre.gui2 import JSONConfig, info_dialog
from calibre.gui2.dialogs.choose_format import ChooseFormatDialog
from calibre.gui2.progress_indicator import ProgressIndicator from calibre.gui2.progress_indicator import ProgressIndicator
from calibre.gui2.store.config.chooser.chooser_widget import StoreChooserWidget from calibre.gui2.store.config.chooser.chooser_widget import StoreChooserWidget
from calibre.gui2.store.config.search.search_widget import StoreConfigWidget from calibre.gui2.store.config.search.search_widget import StoreConfigWidget
@ -21,6 +22,7 @@ from calibre.gui2.store.search.adv_search_builder import AdvSearchBuilderDialog
from calibre.gui2.store.search.download_thread import SearchThreadPool, \ from calibre.gui2.store.search.download_thread import SearchThreadPool, \
CacheUpdateThreadPool CacheUpdateThreadPool
from calibre.gui2.store.search.search_ui import Ui_Dialog from calibre.gui2.store.search.search_ui import Ui_Dialog
from calibre.utils.filenames import ascii_filename
class SearchDialog(QDialog, Ui_Dialog): class SearchDialog(QDialog, Ui_Dialog):
@ -72,7 +74,9 @@ class SearchDialog(QDialog, Ui_Dialog):
self.search.clicked.connect(self.do_search) self.search.clicked.connect(self.do_search)
self.checker.timeout.connect(self.get_results) self.checker.timeout.connect(self.get_results)
self.progress_checker.timeout.connect(self.check_progress) self.progress_checker.timeout.connect(self.check_progress)
self.results_view.activated.connect(self.open_store) self.results_view.activated.connect(self.result_item_activated)
self.results_view.download_requested.connect(self.download_book)
self.results_view.open_requested.connect(self.open_store)
self.results_view.model().total_changed.connect(self.update_book_total) self.results_view.model().total_changed.connect(self.update_book_total)
self.select_all_stores.clicked.connect(self.stores_select_all) self.select_all_stores.clicked.connect(self.stores_select_all)
self.select_invert_stores.clicked.connect(self.stores_select_invert) self.select_invert_stores.clicked.connect(self.stores_select_invert)
@ -129,11 +133,15 @@ class SearchDialog(QDialog, Ui_Dialog):
# Title / Author # Title / Author
self.results_view.setColumnWidth(1,int(total*.40)) self.results_view.setColumnWidth(1,int(total*.40))
# Price # Price
self.results_view.setColumnWidth(2,int(total*.20)) self.results_view.setColumnWidth(2,int(total*.12))
# DRM # DRM
self.results_view.setColumnWidth(3, int(total*.15)) self.results_view.setColumnWidth(3, int(total*.15))
# Store / Formats # Store / Formats
self.results_view.setColumnWidth(4, int(total*.25)) self.results_view.setColumnWidth(4, int(total*.25))
# Download
self.results_view.setColumnWidth(5, 20)
# Affiliate
self.results_view.setColumnWidth(6, 20)
def do_search(self): def do_search(self):
# Stop all running threads. # Stop all running threads.
@ -183,7 +191,7 @@ class SearchDialog(QDialog, Ui_Dialog):
query = re.sub(r'%s:"(?P<a>[^\s"]+)"' % loc, '\g<a>', query) query = re.sub(r'%s:"(?P<a>[^\s"]+)"' % loc, '\g<a>', query)
query = query.replace('%s:' % loc, '') query = query.replace('%s:' % loc, '')
# Remove the prefix and search text. # Remove the prefix and search text.
for loc in ('cover', 'drm', 'format', 'formats', 'price', 'store'): for loc in ('cover', 'download', 'downloads', 'drm', 'format', 'formats', 'price', 'store'):
query = re.sub(r'%s:"[^"]"' % loc, '', query) query = re.sub(r'%s:"[^"]"' % loc, '', query)
query = re.sub(r'%s:[^\s]*' % loc, '', query) query = re.sub(r'%s:[^\s]*' % loc, '', query)
# Remove logic. # Remove logic.
@ -330,8 +338,23 @@ class SearchDialog(QDialog, Ui_Dialog):
def update_book_total(self, total): def update_book_total(self, total):
self.total.setText('%s' % total) self.total.setText('%s' % total)
def open_store(self, index): def result_item_activated(self, index):
result = self.results_view.model().get_result(index) result = self.results_view.model().get_result(index)
if result.downloads:
self.download_book(result)
else:
self.open_store(result)
def download_book(self, result):
d = ChooseFormatDialog(self, _('Choose format to download to your library.'), result.downloads.keys())
if d.exec_() == d.Accepted:
ext = d.format()
fname = result.title + '.' + ext.lower()
fname = ascii_filename(fname)
self.gui.download_ebook(result.downloads[ext], filename=fname)
def open_store(self, result):
self.gui.istores[result.store_name].open(self, result.detail_item, self.open_external.isChecked()) self.gui.istores[result.store_name].open(self, result.detail_item, self.open_external.isChecked())
def check_progress(self): def check_progress(self):

Some files were not shown because too many files have changed in this diff Show More