Merge from trunk

This commit is contained in:
Charles Haley 2011-09-23 21:14:49 +02:00
commit 7b5f14462a
139 changed files with 31346 additions and 24610 deletions

View File

@ -19,6 +19,76 @@
# new recipes:
# - title:
- version: 0.8.20
date: 2011-09-23
new features:
- title: "MOBI Output: Map a larger set of font names to sans-serif/monospace font in the MOBI file"
- title: "Get Books: Allow searching on the DRM column in the results."
tickets: [852514]
- title: "Manage tags/series/etc dialog: Add a was column to show the old value when changing values."
tickets: [846538]
- title: "Template language: Add new functions to manipulate language codes"
tickets: [832084]
bug fixes:
- title: "MOBI Output: Don't set cdetype when option to enable sharing instead of syncing is specified. This fixes the option."
- title: "Conversion pipeline: Fix crash caused by empty <style> elements."
tickets: [775277]
- title: "Get Books: Fix Woblink store"
- title: "MOBI Input: Correctly handle MOBI files that have been passed through a DRM removal tool that leaves the DRM fields in the header."
tickets: [855732]
- title: "Fix typo preventing the updating of metadata in MOBI files serverd by the content server"
- title: "Get Books: Handle non ASCII filenames for downloaded books"
tickets: [855109]
- title: "When generating the title sort string and stripping a leading article, strip leading punctuation that remains after removing the article"
tickets: [855070]
- title: "Fix downloading metadata in the Edit metadata dialog could result in off by one day published dates, in timezones behind GMT"
tickets: [855143]
- title: "Fix handing of title_sort and custom columns when creating a BiBTeX catalog."
tickets: [853249]
- title: "TXT Markdown Input: Change handling of _ to work mid word."
- title: "Fix Check library reporting unknown files ad both missing an unknown"
tickets: [846926]
- title: "Search/Replace: Permit .* to match empty tag like columns."
tickets: [840517]
improved recipes:
- Cicero (DE)
- Taz.de
- Ming Pao - HK
- Macleans Magazine
- IDG.se
- PC World (eng)
- LA Times
new recipes:
- title: Ekantipur (Nepal)
author: fab4.ilam
- title: Various Polish news sources
author: fenuks
- title: Taipei Times and China Post
author: Krittika Goyal
- title: Berliner Zeitung
author: ape
- version: 0.8.19
date: 2011-09-16

View File

@ -1,15 +1,52 @@
from calibre.web.feeds.news import BasicNewsRecipe
class Bash_org_pl(BasicNewsRecipe):
title = u'Bash.org.pl'
__author__ = 'fenuks'
description = 'Bash.org.pl - funny quotations from IRC discussions'
category = 'funny quotations, humour'
language = 'pl'
oldest_article = 15
cover_url = u'http://userlogos.org/files/logos/dzikiosiol/none_0.png'
max_articles_per_feed = 100
max_articles_per_feed = 50
no_stylesheets= True
keep_only_tags= [dict(name='div', attrs={'class':'quote post-content post-body'})]
feeds = [(u'Cytaty', u'http://bash.org.pl/rss')]
keep_only_tags= [dict(name='a', attrs={'class':'qid click'}),
dict(name='div', attrs={'class':'quote post-content post-body'})]
def latest_articles(self):
articles = []
soup=self.index_to_soup(u'http://bash.org.pl/latest/')
#date=soup.find('div', attrs={'class':'right'}).string
tags=soup.findAll('a', attrs={'class':'qid click'})
for a in tags:
title=a.string
url='http://bash.org.pl' +a['href']
articles.append({'title' : title,
'url' : url,
'date' : '',
'description' : ''
})
return articles
def random_articles(self):
articles = []
for i in range(self.max_articles_per_feed):
soup=self.index_to_soup(u'http://bash.org.pl/random/')
#date=soup.find('div', attrs={'class':'right'}).string
url=soup.find('a', attrs={'class':'qid click'})
title=url.string
url='http://bash.org.pl' +url['href']
articles.append({'title' : title,
'url' : url,
'date' : '',
'description' : ''
})
return articles
def parse_index(self):
feeds = []
feeds.append((u"Najnowsze", self.latest_articles()))
feeds.append((u"Losowe", self.random_articles()))
return feeds

View File

@ -0,0 +1,70 @@
from calibre.web.feeds.news import BasicNewsRecipe
import re
class Benchmark_pl(BasicNewsRecipe):
title = u'Benchmark.pl'
__author__ = 'fenuks'
description = u'benchmark.pl -IT site'
cover_url = 'http://www.ieaddons.pl/benchmark/logo_benchmark_new.gif'
category = 'IT'
language = 'pl'
oldest_article = 8
max_articles_per_feed = 100
no_stylesheets=True
preprocess_regexps = [(re.compile(ur'\bWięcej o .*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</body>')]
keep_only_tags=[dict(name='div', attrs={'class':['m_zwykly', 'gallery']})]
remove_tags_after=dict(name='div', attrs={'class':'body'})
remove_tags=[dict(name='div', attrs={'class':['kategoria', 'socialize', 'thumb', 'panelOcenaObserwowane', 'categoryNextToSocializeGallery']})]
INDEX= 'http://www.benchmark.pl'
feeds = [(u'Aktualności', u'http://www.benchmark.pl/rss/aktualnosci-pliki.xml'),
(u'Testy i recenzje', u'http://www.benchmark.pl/rss/testy-recenzje-minirecenzje.xml')]
def append_page(self, soup, appendtag):
nexturl = soup.find('span', attrs={'class':'next'})
while nexturl is not None:
nexturl= self.INDEX + nexturl.parent['href']
soup2 = self.index_to_soup(nexturl)
nexturl=soup2.find('span', attrs={'class':'next'})
pagetext = soup2.find(name='div', attrs={'class':'body'})
appendtag.find('div', attrs={'class':'k_ster'}).extract()
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
if appendtag.find('div', attrs={'class':'k_ster'}) is not None:
appendtag.find('div', attrs={'class':'k_ster'}).extract()
def image_article(self, soup, appendtag):
nexturl=soup.find('div', attrs={'class':'preview'})
if nexturl is not None:
nexturl=nexturl.find('a', attrs={'class':'move_next'})
image=appendtag.find('div', attrs={'class':'preview'}).div['style'][16:]
image=self.INDEX + image[:image.find("')")]
appendtag.find(attrs={'class':'preview'}).name='img'
appendtag.find(attrs={'class':'preview'})['src']=image
appendtag.find('a', attrs={'class':'move_next'}).extract()
while nexturl is not None:
nexturl= self.INDEX + nexturl['href']
soup2 = self.index_to_soup(nexturl)
nexturl=soup2.find('a', attrs={'class':'move_next'})
image=soup2.find('div', attrs={'class':'preview'}).div['style'][16:]
image=self.INDEX + image[:image.find("')")]
soup2.find(attrs={'class':'preview'}).name='img'
soup2.find(attrs={'class':'preview'})['src']=image
pagetext=soup2.find('div', attrs={'class':'gallery'})
pagetext.find('div', attrs={'class':'title'}).extract()
pagetext.find('div', attrs={'class':'thumb'}).extract()
pagetext.find('div', attrs={'class':'panelOcenaObserwowane'}).extract()
if nexturl is not None:
pagetext.find('a', attrs={'class':'move_next'}).extract()
pagetext.find('a', attrs={'class':'move_back'}).extract()
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
def preprocess_html(self, soup):
if soup.find('div', attrs={'class':'preview'}) is not None:
self.image_article(soup, soup.body)
else:
self.append_page(soup, soup.body)
return soup

View File

@ -0,0 +1,61 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
import re
class SportsIllustratedRecipe(BasicNewsRecipe) :
__author__ = 'ape'
__copyright__ = 'ape'
__license__ = 'GPL v3'
language = 'de'
description = 'Berliner Zeitung'
version = 2
title = u'Berliner Zeitung'
timefmt = ' [%d.%m.%Y]'
no_stylesheets = True
remove_javascript = True
use_embedded_content = False
publication_type = 'newspaper'
keep_only_tags = [dict(name='div', attrs={'class':'teaser t_split t_artikel'})]
INDEX = 'http://www.berlinonline.de/berliner-zeitung/'
def parse_index(self):
base = 'http://www.berlinonline.de'
answer = []
articles = {}
more = 1
soup = self.index_to_soup(self.INDEX)
# Get list of links to ressorts from index page
ressort_list = soup.findAll('ul', attrs={'class': re.compile('ressortlist')})
for ressort in ressort_list[0].findAll('a'):
feed_title = ressort.string
print 'Analyzing', feed_title
if not articles.has_key(feed_title):
articles[feed_title] = []
answer.append(feed_title)
# Load ressort page.
feed = self.index_to_soup('http://www.berlinonline.de' + ressort['href'])
# find mainbar div which contains the list of all articles
for article_container in feed.findAll('div', attrs={'class': re.compile('mainbar')}):
# iterate over all articles
for article_teaser in article_container.findAll('div', attrs={'class': re.compile('teaser')}):
# extract title of article
if article_teaser.h3 != None:
article = {'title' : article_teaser.h3.a.string, 'date' : u'', 'url' : base + article_teaser.h3.a['href'], 'description' : u''}
articles[feed_title].append(article)
else:
# Skip teasers for missing photos
if article_teaser.div.p.contents[0].find('Foto:') > -1:
continue
article = {'title': 'Weitere Artikel ' + str(more), 'date': u'', 'url': base + article_teaser.div.p.a['href'], 'description': u''}
articles[feed_title].append(article)
more += 1
answer = [[key, articles[key]] for key in answer if articles.has_key(key)]
return answer
def get_masthead_url(self):
return 'http://www.berlinonline.de/.img/berliner-zeitung/blz_logo.gif'

40
recipes/cgm_pl.recipe Normal file
View File

@ -0,0 +1,40 @@
from calibre.web.feeds.news import BasicNewsRecipe
class CGM(BasicNewsRecipe):
title = u'CGM'
oldest_article = 7
__author__ = 'fenuks'
description = u'Codzienna Gazeta Muzyczna'
cover_url = 'http://www.krafcy.com/foto/tinymce/Image/cgm%281%29.jpg'
category = 'music'
language = 'pl'
use_embedded_content = False
max_articles_per_feed = 100
no_stylesheers=True
extra_css = 'div {color:black;} strong {color:black;} span {color:black;} p {color:black;}'
remove_tags_before=dict(id='mainContent')
remove_tags_after=dict(name='div', attrs={'class':'fbContainer'})
remove_tags=[dict(name='div', attrs={'class':'fbContainer'}),
dict(name='p', attrs={'class':['tagCloud', 'galleryAuthor']}),
dict(id=['movieShare', 'container'])]
feeds = [(u'Informacje', u'http://www.cgm.pl/rss.xml'), (u'Polecamy', u'http://www.cgm.pl/rss,4,news.xml'),
(u'Recenzje', u'http://www.cgm.pl/rss,1,news.xml')]
def preprocess_html(self, soup):
ad=soup.findAll('img')
for r in ad:
if '/_vault/_article_photos/5841.jpg' in r['src'] or '_vault/_article_photos/5807.jpg' in r['src'] or 'article_photos/5841.jpg' in r['src'] or 'article_photos/5825.jpg' in r['src'] or '_article_photos/5920.jpg' in r['src'] or '_article_photos/5919.jpg' in r['src'] or '_article_photos/5918.jpg' in r['src'] or '_article_photos/5914.jpg' in r['src'] or '_article_photos/5911.jpg' in r['src'] or '_article_photos/5923.jpg' in r['src'] or '_article_photos/5921.jpg' in r['src']:
ad[ad.index(r)].extract()
gallery=soup.find('div', attrs={'class':'galleryFlash'})
if gallery:
img=gallery.find('embed')
if img:
img=img['src'][35:]
img='http://www.cgm.pl/_vault/_gallery/_photo/'+img
param=gallery.findAll(name='param')
for i in param:
i.extract()
gallery.contents[1].name='img'
gallery.contents[1]['src']=img
return soup

29
recipes/china_post.recipe Normal file
View File

@ -0,0 +1,29 @@
from calibre.web.feeds.news import BasicNewsRecipe
class CP(BasicNewsRecipe):
title = u'China Post'
language = 'en_CN'
__author__ = 'Krittika Goyal'
oldest_article = 1 #days
max_articles_per_feed = 25
use_embedded_content = False
no_stylesheets = True
auto_cleanup = True
feeds = [
('Top Stories',
'http://www.chinapost.com.tw/rss/front.xml'),
('Taiwan',
'http://www.chinapost.com.tw/rss/taiwan.xml'),
('China',
'http://www.chinapost.com.tw/rss/china.xml'),
('Business',
'http://www.chinapost.com.tw/rss/business.xml'),
('World',
'http://www.chinapost.com.tw/rss/international.xml'),
('Sports',
'http://www.chinapost.com.tw/rss/sports.xml'),
]

View File

@ -1,38 +1,52 @@
from calibre.web.feeds.news import BasicNewsRecipe
class BasicUserRecipe1316245412(BasicNewsRecipe):
#from calibre.utils.magick import Image, PixelWand
title = u'Cicero Online'
description = u'Magazin f\xfcr politische Kultur'
description = u'Magazin f\xfcr politische Kultur (RSS Version)'
publisher = 'Ringier Publishing GmbH'
category = 'news, politics, Germany'
language = 'de'
encoding = 'UTF-8'
__author__ = 'Armin Geller' # 2011-09-17
__author__ = 'Armin Geller' # Upd. 2011-09-23
oldest_article = 7
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
auto_cleanup = False
# remove_javascript = True
remove_tags = [
dict(name='div', attrs={'id':["header", "navigation", "skip-link", "header-print", "header-print-url", "meta-toolbar", "footer"]}),
dict(name='div', attrs={'class':["region region-sidebar-first column sidebar", "breadcrumb", "breadcrumb-title", "meta", "comment-wrapper",
"field field-name-field-show-teaser-right field-type-list-boolean field-label-above"]}),
dict(name='div', attrs={'title':["Dossier Auswahl"]}),
dict(name='h2', attrs={'class':["title comment-form"]}),
dict(name='form', attrs={'class':["comment-form user-info-from-cookie"]}),
]
dict(name='div', attrs={'id':["header", "navigation", "skip-link", "header-print", "header-print-url", "meta-toolbar", "footer"]}),
dict(name='div', attrs={'class':["region region-sidebar-first column sidebar", "breadcrumb",
"breadcrumb-title", "meta", "comment-wrapper",
"field field-name-field-show-teaser-right field-type-list-boolean field-label-above",
"page-header",
"view view-alle-karikaturen view-id-alle_karikaturen view-display-id-default view-dom-id-1",
"pagination",
"view view-letzte-videos view-id-letzte_videos view-display-id-default view-dom-id-1",
"view view-letzte-videos view-id-letzte_videos view-display-id-default view-dom-id-2", # 2011-09-23
"view view-alle-karikaturen view-id-alle_karikaturen view-display-id-default view-dom-id-2", # 2011-09-23
]}),
dict(name='div', attrs={'title':["Dossier Auswahl"]}),
dict(name='h2', attrs={'class':["title comment-form"]}),
dict(name='form', attrs={'class':["comment-form user-info-from-cookie"]}),
dict(name='table', attrs={'class':["mcx-social-horizontal", "page-header"]}),
]
feeds = [
(u'Das gesamte Portfolio', u'http://www.cicero.de/rss.xml'),
(u'Berliner Republik', u'http://www.cicero.de/berliner-republik.xml'),
(u'Weltb\xfchne', u'http://www.cicero.de/weltbuehne.xml'),
(u'Kapital', u'http://www.cicero.de/kapital.xml'),
(u'Salon', u'http://www.cicero.de/salon.xml'),
(u'Blogs', u'http://www.cicero.de/blogs.xml'), #seems not to be in use at the moment
]
(u'Das gesamte Portfolio', u'http://www.cicero.de/rss.xml'),
(u'Berliner Republik', u'http://www.cicero.de/berliner-republik.xml'),
(u'Weltb\xfchne', u'http://www.cicero.de/weltbuehne.xml'),
(u'Kapital', u'http://www.cicero.de/kapital.xml'),
(u'Salon', u'http://www.cicero.de/salon.xml'),
(u'Blogs', u'http://www.cicero.de/blogs.xml'), #seems not to be in use at the moment
]
def print_version(self, url):
return url + '?print'
return url + '?print'
# def get_cover_url(self):
# return 'http://www.cicero.de/sites/all/themes/cicero/logo.png' # need to find a good logo on their home page!

View File

@ -1,5 +1,5 @@
from calibre.web.feeds.news import BasicNewsRecipe
import re
class Dobreprogramy_pl(BasicNewsRecipe):
title = 'Dobreprogramy.pl'
@ -15,6 +15,7 @@ class Dobreprogramy_pl(BasicNewsRecipe):
extra_css = '.title {font-size:22px;}'
oldest_article = 8
max_articles_per_feed = 100
preprocess_regexps = [(re.compile(ur'<div id="\S+360pmp4">Twoja przeglądarka nie obsługuje Flasha i HTML5 lub wyłączono obsługę JavaScript...</div>'), lambda match: '') ]
remove_tags = [dict(name='div', attrs={'class':['komentarze', 'block', 'portalInfo', 'menuBar', 'topBar']})]
keep_only_tags = [dict(name='div', attrs={'class':['mainBar', 'newsContent', 'postTitle title', 'postInfo', 'contentText', 'content']})]
feeds = [(u'Aktualności', 'http://feeds.feedburner.com/dobreprogramy/Aktualnosci'),

17
recipes/dzieje_pl.recipe Normal file
View File

@ -0,0 +1,17 @@
from calibre.web.feeds.news import BasicNewsRecipe
class Dzieje(BasicNewsRecipe):
title = u'dzieje.pl'
__author__ = 'fenuks'
description = 'Dzieje - history of Poland'
cover_url = 'http://www.dzieje.pl/sites/default/files/dzieje_logo.png'
category = 'history'
language = 'pl'
oldest_article = 8
max_articles_per_feed = 100
remove_javascript=True
no_stylesheets= True
remove_tags_before= dict(name='h1', attrs={'class':'title'})
remove_tags_after= dict(id='dogory')
remove_tags=[dict(id='dogory')]
feeds = [(u'Dzieje', u'http://dzieje.pl/rss.xml')]

19
recipes/ekantipur.recipe Normal file
View File

@ -0,0 +1,19 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1314326622(BasicNewsRecipe):
title = u'Ekantipur'
__author__ = 'fab4.ilam'
language = 'en_NP'
oldest_article = 7
max_articles_per_feed = 25
masthead_url = 'http://kantipur.com.np/images/ekantipur_01.jpg'
remove_empty_feeds = True
remove_tags_before = dict(id='main-content')
remove_tags_after = dict(id='view-comments')
remove_tags = [dict(attrs={'class':['ratings', 'news-tool', 'comment', 'post-ur-comment','asideBox','commentsbox','related-sidebar-row related-news']}),
dict(id=['sidebar','news-detail-img', 'footer-wrapper']),
dict(name=['script'])]
feeds = [(u'Top Stories', u'http://www.ekantipur.com/en/rss/top-stories/'), (u'National', u'http://www.ekantipur.com/en/rss/national/1'), (u'Capital', u'http://www.ekantipur.com/en/rss/capital/7'), (u'Business', u'http://www.ekantipur.com/en/rss/business/3'), (u'World', u'http://www.ekantipur.com/en/rss/world/5'), (u'Sports', u'http://www.ekantipur.com/en/rss/sports/4'), (u'Mixed Bag', u'http://www.ekantipur.com/en/rss/mixed-bag/14'), (u'Health & Living', u'http://www.ekantipur.com/en/rss/health-and-living/19'), (u'Entertainment', u'http://www.ekantipur.com/en/rss/entertainment/6')]

View File

@ -16,13 +16,13 @@ class Fleshbot(BasicNewsRecipe):
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
use_embedded_content = True
language = 'en'
masthead_url = 'http://cache.fleshbot.com/assets/base/img/thumbs140x140/fleshbot.com.png'
masthead_url = 'http://cache.gawkerassets.com/assets/kotaku.com/img/logo.png'
extra_css = '''
body{font-family: "Lucida Grande",Helvetica,Arial,sans-serif}
img{margin-bottom: 1em}
h1{font-family :Arial,Helvetica,sans-serif; font-size:x-large}
h1{font-family :Arial,Helvetica,sans-serif; font-size:large}
'''
conversion_options = {
'comment' : description
@ -31,13 +31,12 @@ class Fleshbot(BasicNewsRecipe):
, 'language' : language
}
remove_attributes = ['width','height']
keep_only_tags = [dict(attrs={'class':'content permalink'})]
remove_tags_before = dict(name='h1')
remove_tags = [dict(attrs={'class':'contactinfo'})]
remove_tags_after = dict(attrs={'class':'contactinfo'})
feeds = [(u'Articles', u'http://feeds.gawker.com/fleshbot/vip?format=xml')]
remove_tags = [
{'class': 'feedflare'},
]
feeds = [(u'Articles', u'http://feeds.gawker.com/fleshbot/full')]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,13 @@
from calibre.web.feeds.news import BasicNewsRecipe
class GreenLinux(BasicNewsRecipe):
title = u'GreenLinux.pl'
__author__ = 'fenuks'
category = 'IT'
language = 'pl'
cover_url = 'http://lh5.ggpht.com/_xd_6Y9kXhEc/S8tjyqlfhfI/AAAAAAAAAYU/zFNTp07ZQko/top.png'
oldest_article = 15
max_articles_per_feed = 100
auto_cleanup = True
feeds = [(u'Newsy', u'http://feeds.feedburner.com/greenlinux')]

View File

@ -0,0 +1,13 @@
from calibre.web.feeds.news import BasicNewsRecipe
class Historia_org_pl(BasicNewsRecipe):
title = u'Historia.org.pl'
__author__ = 'fenuks'
description = u'history site'
cover_url = 'http://lh3.googleusercontent.com/_QeRQus12wGg/TOvHsZ2GN7I/AAAAAAAAD_o/LY1JZDnq7ro/logo5.jpg'
category = 'history'
language = 'pl'
oldest_article = 8
max_articles_per_feed = 100
feeds = [(u'Artykuły', u'http://www.historia.org.pl/index.php?format=feed&type=rss')]

Binary file not shown.

After

Width:  |  Height:  |  Size: 658 B

BIN
recipes/icons/cgm_pl.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 837 B

BIN
recipes/icons/dzieje_pl.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 642 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 648 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 806 B

BIN
recipes/icons/lomza.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.0 KiB

View File

@ -4,19 +4,19 @@ from calibre.web.feeds.news import BasicNewsRecipe
class IDGse(BasicNewsRecipe):
title = 'IDG'
description = 'IDG.se'
language = 'se'
__author__ = 'zapt0'
language = 'sv'
description = 'IDG.se'
oldest_article = 1
max_articles_per_feed = 40
max_articles_per_feed = 256
no_stylesheets = True
encoding = 'ISO-8859-1'
remove_javascript = True
feeds = [(u'Senaste nytt',u'http://feeds.idg.se/idg/vzzs')]
feeds = [(u'Dagens IDG-nyheter',u'http://feeds.idg.se/idg/ETkj?format=xml')]
def print_version(self,url):
return url + '?articleRenderMode=print&m=print'
return url + '?articleRenderMode=print&m=print'
def get_cover_url(this):
return 'http://idgmedia.idg.se/polopoly_fs/2.3275!images/idgmedia_logo_75.jpg'
@ -30,4 +30,3 @@ class IDGse(BasicNewsRecipe):
dict(name='div', attrs={'id':['preamble_ad']}),
dict(name='ul', attrs={'class':['share']})
]

28
recipes/ksiazka_pl.recipe Normal file
View File

@ -0,0 +1,28 @@
from calibre.web.feeds.news import BasicNewsRecipe
import re
class Ksiazka_net_pl(BasicNewsRecipe):
title = u'ksiazka.net.pl'
__author__ = 'fenuks'
description = u'Ksiazka.net.pl - book vortal'
cover_url = 'http://www.ksiazka.net.pl/fileadmin/templates/ksiazka.net.pl/images/1PortalKsiegarski-logo.jpg'
category = 'books'
language = 'pl'
oldest_article = 8
max_articles_per_feed = 100
no_stylesheets= True
#extra_css = 'img {float: right;}'
preprocess_regexps = [(re.compile(ur'Podoba mi się, kupuję:'), lambda match: '<br />')]
remove_tags_before= dict(name='div', attrs={'class':'m-body'})
remove_tags_after= dict(name='div', attrs={'class':'m-body-link'})
remove_tags=[dict(attrs={'class':['mk_library-icon', 'm-body-link', 'tagi']})]
feeds = [(u'Wiadomości', u'http://www.ksiazka.net.pl/?id=wiadomosci&type=100'),
(u'Książki', u'http://www.ksiazka.net.pl/?id=ksiazki&type=100'),
(u'Rynek', u'http://www.ksiazka.net.pl/?id=rynek&type=100')]
def image_url_processor(self, baseurl, url):
if (('file://' in url) and ('www.ksiazka.net.pl/' not in url)):
return 'http://www.ksiazka.net.pl/' + url[8:]
elif 'http://' not in url:
return 'http://www.ksiazka.net.pl/' + url
else:
return url

14
recipes/lomza.recipe Normal file
View File

@ -0,0 +1,14 @@
from calibre.web.feeds.news import BasicNewsRecipe
class Lomza(BasicNewsRecipe):
title = u'4Lomza'
__author__ = 'fenuks'
description = u'4Łomża - regional site'
cover_url = 'http://www.4lomza.pl/i/logo4lomza_m.jpg'
language = 'pl'
oldest_article = 15
no_styleseets=True
max_articles_per_feed = 100
remove_tags=[dict(name='div', attrs={'class':['bxbanner', 'drukuj', 'wyslijznajomemu']})]
keep_only_tags=[dict(name='div', attrs={'class':'wiadomosc'})]
feeds = [(u'Łomża', u'http://feeds.feedburner.com/4lomza.pl')]

View File

@ -95,7 +95,7 @@ class WeeklyLWN(BasicNewsRecipe):
break
article = dict(
title=tag_title.string,
title=self.tag_to_string(tag_title),
url= 'http://lwn.net' + tag_url['href'].split('#')[0] + '?format=printable',
description='', content='', date='')
articles[section].append(article)

View File

@ -4,25 +4,17 @@ from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1308306308(BasicNewsRecipe):
title = u'Macleans Magazine'
language = 'en_CA'
__author__ = 'sexymax15'
oldest_article = 30
max_articles_per_feed = 12
__author__ = 'Medius'
oldest_article = 7
cover_url = 'http://www.rogersmagazines.com/rms_covers/md/CLE_md.jpg'
use_embedded_content = False
remove_empty_feeds = True
no_stylesheets = True
remove_javascript = True
remove_tags = [dict(name ='img'),dict (id='header'),{'class':'postmetadata'}]
remove_tags_after = {'class':'postmetadata'}
remove_tags = [dict(id='header'),{'class':'comment'}]
remove_tags_after = {'class':'pagination'}
feeds = [(u'Blog Central', u'http://www2.macleans.ca/category/blog-central/feed/'),
(u'Canada', u'http://www2.macleans.ca/category/canada/feed/'),
(u'World', u'http://www2.macleans.ca/category/world-from-the-magazine/feed/'),
(u'Business', u'http://www2.macleans.ca/category/business/feed/'),
(u'Arts & Culture', u'http://www2.macleans.ca/category/arts-culture/feed/'),
(u'Opinion', u'http://www2.macleans.ca/category/opinion/feed/'),
(u'Health', u'http://www2.macleans.ca/category/health-from-the-magazine/feed/'),
(u'Environment', u'http://www2.macleans.ca/category/environment-from-the-magazine/feed/')]
def print_version(self, url):
return url + 'print/'
feeds = [(u'Canada', u'http://www2.macleans.ca/category/canada/feed/'),
(u'World', u'http://www2.macleans.ca/category/news-politics/world/feed/'), (u'Business', u'http://www2.macleans.ca/category/business/feed/'), (u'Arts & Culture', u'http://www2.macleans.ca/category/arts/feed/'), (u'Opinion', u'http://www2.macleans.ca/category/opinion/feed/'), (u'Health', u'http://www2.macleans.ca/category/life/health/feed/'), (u'Sports', u'http://www2.macleans.ca/category/life/sports/feed/'), (u'Environment', u'http://www2.macleans.ca/category/life/environment/feed/'), (u'Technology', u'http://www2.macleans.ca/category/life/technology/feed/'), (u'Travel', u'http://www2.macleans.ca/category/life/travel/feed/'), (u'Blog Central', u'http://www2.macleans.ca/category/blog-central/feed/')]

View File

@ -12,10 +12,14 @@ __UseChineseTitle__ = False
__KeepImages__ = True
# (HK only) Turn below to true if you wish to use life.mingpao.com as the main article source
__UseLife__ = True
# (HK only) if __UseLife__ is true, turn this on if you want to include the column section
__InclCols__ = False
'''
Change Log:
2011/09/21: fetching "column" section is made optional. Default is False
2011/09/18: parse "column" section stuff from source text file directly.
2011/09/07: disable "column" section as it is no longer offered free.
2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
provide options to remove all images in the file
@ -52,16 +56,19 @@ class MPRecipe(BasicNewsRecipe):
title = 'Ming Pao - Hong Kong'
description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
category = 'Chinese, News, Hong Kong'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;} div[class=heading] {font-size:200%; font-weight:bold;} div[class=images] {font-size:50%;}'
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
keep_only_tags = [dict(name='h1'),
dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
dict(name='font', attrs={'color':['AA0000']}), # for column articles title
dict(attrs={'class':['heading']}), # for heading from txt
dict(attrs={'id':['newscontent']}), # entertainment and column page content
dict(attrs={'id':['newscontent01','newscontent02']}),
dict(attrs={'class':['content']}), # for content from txt
dict(attrs={'class':['photo']}),
dict(name='table', attrs={'width':['100%'], 'border':['0'], 'cellspacing':['5'], 'cellpadding':['0']}), # content in printed version of life.mingpao.com
dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}) # images for source from life.mingpao.com
dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}), # images for source from life.mingpao.com
dict(attrs={'class':['images']}) # for images from txt
]
if __KeepImages__:
remove_tags = [dict(name='style'),
@ -232,12 +239,19 @@ class MPRecipe(BasicNewsRecipe):
(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal'),
(u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'),
(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal')
#(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
]:
articles = self.parse_section2(url, keystr)
if articles:
feeds.append((title, articles))
if __InclCols__ == True:
# parse column section articles directly from .txt files
for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
]:
articles = self.parse_section2_txt(url, keystr)
if articles:
feeds.append((title, articles))
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
articles = self.parse_section(url)
@ -358,6 +372,24 @@ class MPRecipe(BasicNewsRecipe):
current_articles.reverse()
return current_articles
# parse from text file of life.mingpao.com
def parse_section2_txt(self, url, keystr):
self.get_fetchdate()
soup = self.index_to_soup(url)
a = soup.findAll('a', href=True)
a.reverse()
current_articles = []
included_urls = []
for i in a:
title = self.tag_to_string(i)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
url = url.replace('cfm/dailynews3.cfm?File=', 'ftp/Life3/') # use printed version of the article
current_articles.append({'title': title, 'url': url, 'description': ''})
included_urls.append(url)
current_articles.reverse()
return current_articles
# parse from www.mingpaovan.com
def parse_section3(self, url, baseUrl):
self.get_fetchdate()
@ -440,6 +472,39 @@ class MPRecipe(BasicNewsRecipe):
current_articles.reverse()
return current_articles
# preprocess those .txt based files
def preprocess_raw_html(self, raw_html, url):
if url.rfind('ftp') == -1:
return raw_html
else:
splitter = re.compile(r'\n') # Match non-digits
new_raw_html = '<html><head><title>Untitled</title></head><body><div class="images">'
next_is_img_txt = False
title_started = False
met_article_start_char = False
for item in splitter.split(raw_html):
if item.startswith(u'\u3010'):
met_article_start_char = True
new_raw_html = new_raw_html + '</div><div class="content"><p>' + item + '<p>\n'
else:
if next_is_img_txt == False:
if item.startswith('='):
next_is_img_txt = True
new_raw_html += '<img src="' + str(item)[1:].strip() + '.jpg" /><p>\n'
else:
if met_article_start_char == False:
if title_started == False:
new_raw_html = new_raw_html + '</div><div class="heading">' + item + '\n'
title_started = True
else:
new_raw_html = new_raw_html + item + '\n'
else:
new_raw_html = new_raw_html + item + '<p>\n'
else:
next_is_img_txt = False
new_raw_html = new_raw_html + item + '\n'
return new_raw_html + '</div></body></html>'
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']

View File

@ -51,14 +51,13 @@ class pcWorld(BasicNewsRecipe):
keep_only_tags = [
dict(name='div', attrs={'class':'article'})
]
remove_tags = [
dict(name='div', attrs={'class':['toolBar','mac_tags','toolBar btmTools','recommend longRecommend','recommend shortRecommend','textAds']}),
dict(name='div', attrs={'id':['sidebar','comments','mac_tags']}),
dict(name='ul', attrs={'class':'tools'}),
dict(name='li', attrs={'class':'sub'})
dict(name='ul', attrs={'class':['tools', 'tools clearfix']}),
dict(name='li', attrs={'class':'sub'}),
dict(name='p', attrs={'id':'userDesire'})
]
feeds = [
(u'PCWorld Headlines', u'http://feeds.pcworld.com/pcworld/latestnews'),
(u'How-To', u'http://feeds.pcworld.com/pcworld/update/howto'),

12
recipes/tablety_pl.recipe Normal file
View File

@ -0,0 +1,12 @@
from calibre.web.feeds.news import BasicNewsRecipe
class Tablety_pl(BasicNewsRecipe):
title = u'Tablety.pl'
__author__ = 'fenuks'
description = u'tablety.pl - latest tablet news'
cover_url = 'http://www.tablety.pl/wp-content/themes/kolektyw/img/logo.png'
category = 'IT'
language = 'pl'
oldest_article = 8
max_articles_per_feed = 100
feeds = [(u'Najnowsze posty', u'http://www.tablety.pl/feed/')]

30
recipes/taipei.recipe Normal file
View File

@ -0,0 +1,30 @@
from calibre.web.feeds.news import BasicNewsRecipe
class TN(BasicNewsRecipe):
title = u'Taipei Times'
language = 'en_CN'
__author__ = 'Krittika Goyal'
oldest_article = 1 #days
max_articles_per_feed = 25
use_embedded_content = False
no_stylesheets = True
auto_cleanup = True
auto_cleanup_keep = '//*[@class="main_ipic"]'
feeds = [
('Editorials',
'http://www.taipeitimes.com/xml/editorials.rss'),
('Taiwan',
'http://www.taipeitimes.com/xml/taiwan.rss'),
('Features',
'http://www.taipeitimes.com/xml/feat.rss'),
('Business',
'http://www.taipeitimes.com/xml/biz.rss'),
('World',
'http://www.taipeitimes.com/xml/world.rss'),
('Sports',
'http://www.taipeitimes.com/xml/sport.rss'),
]

View File

@ -19,6 +19,6 @@ class TazRSSRecipe(BasicNewsRecipe):
feeds = [(u'TAZ main feed', u'http://www.taz.de/rss.xml')]
keep_only_tags = [dict(name='div', attrs={'class': 'sect sect_article'})]
remove_tags_after = dict(name='div', attrs={'class': 'rack'})
remove_tags = [dict(name=['div'], attrs={'class': 'rack'}),
dict(name=['div'], attrs={'class': 'artikelwerbung'}),
dict(name=['ul'], attrs={'class': 'toolbar'}),]
remove_tags = [
dict(name=['div'], attrs={'class': 'artikelwerbung'}),
dict(name=['ul'], attrs={'class': 'toolbar'}),]

View File

@ -1,5 +1,6 @@
#!/usr/bin/env python
__author__ = 'Darko Spasovski'
__license__ = 'GPL v3'
__copyright__ = '2011, Darko Spasovski <darko.spasovski at gmail.com>'
'''
@ -9,10 +10,11 @@ utrinski.com.mk
import re
import datetime
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre import browser
class UtrinskiVesnik(BasicNewsRecipe):
__author__ = 'Darko Spasovski'
INDEX = 'http://www.utrinski.com.mk/'
title = 'Utrinski Vesnik'
description = 'Daily Macedonian newspaper'
@ -21,7 +23,6 @@ class UtrinskiVesnik(BasicNewsRecipe):
remove_javascript = True
publication_type = 'newspaper'
category = 'news, Macedonia'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
@ -47,25 +48,29 @@ class UtrinskiVesnik(BasicNewsRecipe):
}
def parse_index(self):
soup = self.index_to_soup(self.INDEX)
feeds = []
for section in soup.findAll('a', attrs={'class':'WB_UTRINSKIVESNIK_TOCTitleBig'}):
# open main page
soup = self.index_to_soup(self.INDEX)
# find all anchors with class attribute equal to 'WB_UTRINSKIVESNIK_MainMenu'
for section in soup.findAll('a', attrs={'class':'WB_UTRINSKIVESNIK_MainMenu'}):
sectionTitle = section.contents[0].string
tocItemTable = section.findAllPrevious('table')[1]
if tocItemTable is None: continue
sectionUrl = self.INDEX + section['href'].strip()
# open the anchor link
raw = browser().open_novisit(sectionUrl).read()
sectionSoup = BeautifulSoup(raw)
# find all anchors with class attribute equal to 'WB_UTRINSKIVESNIK_ONLINEArticleTitle'
sectionArticles = sectionSoup.findAll('a', attrs={'class':'WB_UTRINSKIVESNIK_ONLINEArticleTitle'})
articles = []
while True:
tocItemTable = tocItemTable.nextSibling
if tocItemTable is None: break
article = tocItemTable.findAll('a', attrs={'class': 'WB_UTRINSKIVESNIK_TocItem'})
if len(article)==0: break
title = self.tag_to_string(article[0], use_alt=True).strip()
articles.append({'title': title, 'url':'http://www.utrinski.com.mk/' + article[0]['href'], 'description':'', 'date':''})
for sectionArticle in sectionArticles:
# article title = anchor's contents, article url = anchor's href
articleTitle = sectionArticle.contents[0].string.strip()
articleUrl = self.INDEX + sectionArticle['href'].strip()
articleDate = datetime.datetime.today().strftime('%d.%m.%Y')
articles.append({'title': articleTitle, 'url':articleUrl, 'description':'', 'date': articleDate})
if articles:
feeds.append((sectionTitle, articles))
return feeds
def get_cover_url(self):
datum = datetime.datetime.today().strftime('%d_%m_%Y')
return 'http://www.utrinski.com.mk/WBStorage/Files/' + datum + '.jpg'

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -4,7 +4,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
__appname__ = u'calibre'
numeric_version = (0, 8, 19)
numeric_version = (0, 8, 20)
__version__ = u'.'.join(map(unicode, numeric_version))
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"

View File

@ -4,6 +4,10 @@ __license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os
import cStringIO
from calibre.devices.usbms.driver import USBMS
class ANDROID(USBMS):
@ -209,4 +213,63 @@ class WEBOS(USBMS):
VENDOR_NAME = 'HP'
WINDOWS_MAIN_MEM = 'WEBOS-DEVICE'
THUMBNAIL_HEIGHT = 160
THUMBNAIL_WIDTH = 120
def upload_cover(self, path, filename, metadata, filepath):
try:
from PIL import Image, ImageDraw
Image, ImageDraw
except ImportError:
import Image, ImageDraw
coverdata = getattr(metadata, 'thumbnail', None)
if coverdata and coverdata[2]:
cover = Image.open(cStringIO.StringIO(coverdata[2]))
else:
coverdata = open(I('library.png'), 'rb').read()
cover = Image.new('RGB', (120,160), 'black')
im = Image.open(cStringIO.StringIO(coverdata))
im.thumbnail((120, 160), Image.ANTIALIAS)
x, y = im.size
cover.paste(im, ((120-x)/2, (160-y)/2))
draw = ImageDraw.Draw(cover)
draw.text((1, 10), metadata.get('title', _('Unknown')).encode('ascii', 'ignore'))
draw.text((1, 140), metadata.get('authors', _('Unknown'))[0].encode('ascii', 'ignore'))
data = cStringIO.StringIO()
cover.save(data, 'JPEG')
coverdata = data.getvalue()
with open(os.path.join(path, 'coverCache', filename + '-medium.jpg'), 'wb') as coverfile:
coverfile.write(coverdata)
coverdata = getattr(metadata, 'thumbnail', None)
if coverdata and coverdata[2]:
cover = Image.open(cStringIO.StringIO(coverdata[2]))
else:
coverdata = open(I('library.png'), 'rb').read()
cover = Image.new('RGB', (52,69), 'black')
im = Image.open(cStringIO.StringIO(coverdata))
im.thumbnail((52, 69), Image.ANTIALIAS)
x, y = im.size
cover.paste(im, ((52-x)/2, (69-y)/2))
cover2 = cover.resize((52, 69), Image.ANTIALIAS).convert('RGB')
data = cStringIO.StringIO()
cover2.save(data, 'JPEG')
coverdata = data.getvalue()
with open(os.path.join(path, 'coverCache', filename +
'-small.jpg'), 'wb') as coverfile:
coverfile.write(coverdata)

View File

@ -65,7 +65,8 @@ Constants you might want to modify
COMMAND_LINE_LOGGING_LEVEL = CRITICAL
TAB_LENGTH = 4 # expand tabs to this many spaces
ENABLE_ATTRIBUTES = True # @id = xyz -> <... id="xyz">
SMART_EMPHASIS = True # this_or_that does not become this<i>or</i>that
#SMART_EMPHASIS = True # this_or_that does not become this<i>or</i>that
SMART_EMPHASIS = False # this_or_that needs to have _ escaped as \_.
DEFAULT_OUTPUT_FORMAT = 'xhtml1' # xhtml or html4 output
HTML_REMOVED_TEXT = "[HTML_REMOVED]" # text used instead of HTML in safe mode
BLOCK_LEVEL_ELEMENTS = re.compile("p|div|h[1-6]|blockquote|pre|table|dl|ol|ul"

View File

@ -116,6 +116,8 @@ def title_sort(title, order=None):
if match:
prep = match.group(1)
title = title[len(prep):] + ', ' + prep
if title[0] in _ignore_starts:
title = title[1:]
return title.strip()
coding = zip(

View File

@ -31,7 +31,7 @@ def metadata_from_formats(formats, force_read_metadata=False, pattern=None):
try:
return _metadata_from_formats(formats, force_read_metadata, pattern)
except:
mi = metadata_from_filename(list(iter(formats), pattern)[0])
mi = metadata_from_filename(list(iter(formats))[0], pat=pattern)
if not mi.authors:
mi.authors = [_('Unknown')]
return mi

View File

@ -78,7 +78,7 @@ class StreamSlicer(object):
stream = self._stream
base = self.start
stream.seek(base)
self._stream.truncate(base)
stream.truncate()
for block in data_blocks:
stream.write(block)

View File

@ -367,9 +367,11 @@ class MobiMLizer(object):
istate.fgcolor = style['color']
istate.strikethrough = style['text-decoration'] == 'line-through'
istate.underline = style['text-decoration'] == 'underline'
if 'monospace' in style['font-family']:
ff = style['font-family'].lower() if style['font-family'] else ''
if 'monospace' in ff or 'courier' in ff:
istate.family = 'monospace'
elif 'sans-serif' in style['font-family']:
elif ('sans-serif' in ff or 'sansserif' in ff or 'verdana' in ff or
'arial' in ff or 'helvetica' in ff):
istate.family = 'sans-serif'
else:
istate.family = 'serif'

View File

@ -135,7 +135,6 @@ class BookHeader(object):
self.length, self.type, self.codepage, self.unique_id, \
self.version = struct.unpack('>LLLLL', raw[20:40])
try:
self.codec = {
1252: 'cp1252',
@ -145,8 +144,16 @@ class BookHeader(object):
self.codec = 'cp1252' if not user_encoding else user_encoding
log.warn('Unknown codepage %d. Assuming %s' % (self.codepage,
self.codec))
if ident == 'TEXTREAD' or self.length < 0xE4 or 0xE8 < self.length \
or (try_extra_data_fix and self.length == 0xE4):
# There exists some broken DRM removal tool that removes DRM but
# leaves the DRM fields in the header yielding a header size of
# 0xF8. The actual value of max_header_length should be 0xE8 but
# it's changed to accommodate this silly tool. Hopefully that will
# not break anything else.
max_header_length = 0xF8
if (ident == 'TEXTREAD' or self.length < 0xE4 or
self.length > max_header_length or
(try_extra_data_fix and self.length == 0xE4)):
self.extra_flags = 0
else:
self.extra_flags, = struct.unpack('>H', raw[0xF2:0xF4])

View File

@ -216,7 +216,7 @@ class Stylizer(object):
if t:
text += u'\n\n' + force_unicode(t, u'utf-8')
if text:
text = XHTML_CSS_NAMESPACE + elem.text
text = XHTML_CSS_NAMESPACE + text
text = oeb.css_preprocessor(text)
stylesheet = parser.parseString(text, href=cssname)
stylesheet.namespaces['h'] = XHTML_NS

View File

@ -110,9 +110,9 @@
<string>Some explanation about this template:
-The fields availables are 'author_sort', 'authors', 'id',
'isbn', 'pubdate', 'publisher', 'series_index', 'series',
'tags', 'timestamp', 'title', 'uuid'
'tags', 'timestamp', 'title', 'uuid', 'title_sort'
-For list types ie authors and tags, only the first element
wil be selected.
will be selected.
-For time field, only the date will be used. </string>
</property>
<property name="scaledContents">

View File

@ -29,7 +29,7 @@ class PluginWidget(QWidget, Ui_Form):
QListWidgetItem(x, self.db_fields)
db = db_()
for x in sorted(db.custom_field_keys()):
for x in sorted(db.custom_field_keys()):
self.all_fields.append(x)
QListWidgetItem(x, self.db_fields)

View File

@ -87,7 +87,7 @@ class DeviceJob(BaseJob): # {{{
self.failed = True
ex = as_unicode(err)
self._details = ex + '\n\n' + \
traceback.format_exc()
force_unicode(traceback.format_exc())
self.exception = err
finally:
self.job_done()

View File

@ -16,6 +16,7 @@ from calibre.ebooks import BOOK_EXTENSIONS
from calibre.gui2 import Dispatcher
from calibre.gui2.threaded_jobs import ThreadedJob
from calibre.ptempfile import PersistentTemporaryFile
from calibre.utils.filenames import ascii_filename
class EbookDownload(object):
@ -45,6 +46,9 @@ class EbookDownload(object):
if not filename:
filename = get_download_filename(url, cookie_file)
filename, ext = os.path.splitext(filename)
filename = filename[:60] + ext
filename = ascii_filename(filename)
br = browser()
if cookie_file:
@ -84,7 +88,7 @@ class EbookDownload(object):
gui_ebook_download = EbookDownload()
def start_ebook_download(callback, job_manager, gui, cookie_file=None, url='', filename='', save_loc='', add_to_lib=True, tags=[]):
description = _('Downloading %s') % filename if filename else url
description = _('Downloading %s') % filename.decode('utf-8', 'ignore') if filename else url.decode('utf-8', 'ignore')
job = ThreadedJob('ebook_download', description, gui_ebook_download, (gui, cookie_file, url, filename, save_loc, add_to_lib, tags), {}, callback, max_concurrent_count=2, killable=False)
job_manager.run_threaded_job(job)
@ -96,7 +100,7 @@ class EbookDownloadMixin(object):
if isinstance(tags, basestring):
tags = tags.split(',')
start_ebook_download(Dispatcher(self.downloaded_ebook), self.job_manager, self, cookie_file, url, filename, save_loc, add_to_lib, tags)
self.status_bar.show_message(_('Downloading') + ' ' + filename if filename else url, 3000)
self.status_bar.show_message(_('Downloading') + ' ' + filename.decode('utf-8', 'ignore') if filename else url.decode('utf-8', 'ignore'), 3000)
def downloaded_ebook(self, job):
if job.failed:

View File

@ -9,6 +9,7 @@ __docformat__ = 'restructuredtext en'
import os, errno
from functools import partial
from datetime import datetime
from PyQt4.Qt import (Qt, QVBoxLayout, QHBoxLayout, QWidget, QPushButton,
QGridLayout, pyqtSignal, QDialogButtonBox, QScrollArea, QFont,
@ -26,6 +27,7 @@ from calibre.gui2.custom_column_widgets import populate_metadata_page
from calibre.utils.config import tweaks
from calibre.ebooks.metadata.book.base import Metadata
from calibre.utils.localization import canonicalize_lang
from calibre.utils.date import local_tz
BASE_TITLE = _('Edit Metadata')
@ -396,6 +398,14 @@ class MetadataSingleDialogBase(ResizableDialog):
if ':' not in f:
setattr(mi, f, getattr(dummy, f))
if mi is not None:
pd = mi.pubdate
if pd is not None:
# Put the downloaded published date into the local timezone
# as we discard time info and the date is timezone
# invariant. This prevents the as_local_timezone() call in
# update_from_mi from changing the pubdate
mi.pubdate = datetime(pd.year, pd.month, pd.day,
tzinfo=local_tz)
self.update_from_mi(mi)
if d.cover_pixmap is not None:
self.cover.current_val = pixmap_to_data(d.cover_pixmap)

View File

@ -234,7 +234,7 @@
<widget class="QLabel" name="label_13">
<property name="text">
<string>&lt;p&gt;Remember to leave calibre running as the server only runs as long as calibre is running.
&lt;p&gt;Stanza should see your calibre collection automatically. If not, try adding the URL http://myhostname:8080 as a new catalog in the Stanza reader on your iPhone. Here myhostname should be the fully qualified hostname or the IP address of the computer calibre is running on.</string>
&lt;p&gt;To connect to the calibre server from your device you should use a URL of the form &lt;b&gt;http://myhostname:8080&lt;/b&gt; as a new catalog in the Stanza reader on your iPhone. Here myhostname should be either the fully qualified hostname or the IP address of the computer calibre is running on.</string>
</property>
<property name="wordWrap">
<bool>true</bool>

View File

@ -34,7 +34,7 @@
</property>
</widget>
</item>
<item row="1" column="0" colspan="2">
<item row="2" column="0" colspan="2">
<widget class="QGroupBox" name="groupBox">
<property name="title">
<string>A&amp;vailable actions</string>
@ -62,7 +62,7 @@
</layout>
</widget>
</item>
<item row="1" column="2">
<item row="2" column="2">
<layout class="QVBoxLayout" name="verticalLayout_3">
<item>
<widget class="QToolButton" name="add_action_button">
@ -122,7 +122,7 @@
</item>
</layout>
</item>
<item row="1" column="3" colspan="2">
<item row="2" column="3" colspan="2">
<widget class="QGroupBox" name="groupBox_2">
<property name="title">
<string>&amp;Current actions</string>
@ -210,6 +210,16 @@
</layout>
</widget>
</item>
<item row="1" column="0" colspan="5">
<widget class="QLabel" name="label">
<property name="text">
<string>&lt;p&gt;The toolbar in calibre is different depending on whether a device is connected or not. To customize the toolbar when a device is connected as well as customizing right click menus, &lt;b&gt;click the dropdown above&lt;/b&gt; and select which toolbar/menu you want to customize.</string>
</property>
<property name="wordWrap">
<bool>true</bool>
</property>
</widget>
</item>
</layout>
</widget>
<resources>

View File

@ -45,6 +45,7 @@ class AdvSearchBuilderDialog(QDialog, Ui_Dialog):
self.author_box.setText('')
self.price_box.setText('')
self.format_box.setText('')
self.drm_combo.setCurrentIndex(0)
self.download_combo.setCurrentIndex(0)
self.affiliate_combo.setCurrentIndex(0)
@ -120,6 +121,9 @@ class AdvSearchBuilderDialog(QDialog, Ui_Dialog):
format = unicode(self.format_box.text()).strip()
if format:
ans.append('format:"' + self.mc + format + '"')
drm = unicode(self.drm_combo.currentText()).strip()
if drm:
ans.append('drm:' + drm)
download = unicode(self.download_combo.currentText()).strip()
if download:
ans.append('download:' + download)

View File

@ -199,7 +199,7 @@
</property>
</widget>
</item>
<item row="1" column="1">
<item row="1" column="2">
<widget class="EnLineEdit" name="title_box">
<property name="toolTip">
<string>Enter the title.</string>
@ -226,7 +226,7 @@
</property>
</widget>
</item>
<item row="8" column="0" colspan="2">
<item row="9" column="0" colspan="3">
<layout class="QHBoxLayout" name="horizontalLayout_6">
<item>
<widget class="QPushButton" name="clear_button">
@ -244,7 +244,7 @@
</item>
</layout>
</item>
<item row="7" column="1">
<item row="8" column="2">
<spacer name="verticalSpacer">
<property name="orientation">
<enum>Qt::Vertical</enum>
@ -257,17 +257,17 @@
</property>
</spacer>
</item>
<item row="0" column="0" colspan="2">
<item row="0" column="0" colspan="3">
<widget class="QLabel" name="label_11">
<property name="text">
<string>Search only in specific fields:</string>
</property>
</widget>
</item>
<item row="2" column="1">
<item row="2" column="2">
<widget class="EnLineEdit" name="author_box"/>
</item>
<item row="4" column="1">
<item row="4" column="2">
<widget class="QLineEdit" name="format_box"/>
</item>
<item row="4" column="0">
@ -280,17 +280,17 @@
</property>
</widget>
</item>
<item row="3" column="1">
<item row="3" column="2">
<widget class="EnLineEdit" name="price_box"/>
</item>
<item row="6" column="0">
<item row="7" column="0">
<widget class="QLabel" name="label_9">
<property name="text">
<string>Affiliate:</string>
</property>
</widget>
</item>
<item row="6" column="1">
<item row="7" column="2">
<widget class="QComboBox" name="affiliate_combo">
<item>
<property name="text">
@ -309,14 +309,14 @@
</item>
</widget>
</item>
<item row="5" column="0">
<item row="6" column="0">
<widget class="QLabel" name="label_12">
<property name="text">
<string>Download:</string>
</property>
</widget>
</item>
<item row="5" column="1">
<item row="6" column="2">
<widget class="QComboBox" name="download_combo">
<item>
<property name="text">
@ -335,6 +335,32 @@
</item>
</widget>
</item>
<item row="5" column="0">
<widget class="QLabel" name="label_13">
<property name="text">
<string>DRM:</string>
</property>
</widget>
</item>
<item row="5" column="2">
<widget class="QComboBox" name="drm_combo">
<item>
<property name="text">
<string/>
</property>
</item>
<item>
<property name="text">
<string>true</string>
</property>
</item>
<item>
<property name="text">
<string>false</string>
</property>
</item>
</widget>
</item>
</layout>
</widget>
</widget>

View File

@ -350,7 +350,7 @@ class SearchDialog(QDialog, Ui_Dialog):
d = ChooseFormatDialog(self, _('Choose format to download to your library.'), result.downloads.keys())
if d.exec_() == d.Accepted:
ext = d.format()
fname = result.title + '.' + ext.lower()
fname = result.title[:60] + '.' + ext.lower()
fname = ascii_filename(fname)
self.gui.download_ebook(result.downloads[ext], filename=fname)

View File

@ -44,9 +44,12 @@ class MobileReadStore(BasicStoreConfig, StorePlugin):
def search(self, query, max_results=10, timeout=60):
books = self.get_book_list()
if not books:
return
sf = SearchFilter(books)
matches = sf.parse(query)
matches = sf.parse(query.decode('utf-8', 'replace'))
for book in matches:
book.price = '$0.00'

View File

@ -56,7 +56,7 @@ class WoblinkStore(BasicStoreConfig, StorePlugin):
continue
cover_url = ''.join(data.xpath('.//td[@class="w10 va-t"]/a[1]/img/@src'))
title = ''.join(data.xpath('.//h3[@class="title"]/a[1]/text()'))
title = ''.join(data.xpath('.//h2[@class="title"]/a[1]/text()'))
author = ', '.join(data.xpath('.//p[@class="author"]/a/text()'))
price = ''.join(data.xpath('.//div[@class="prices"]/p[1]/span/text()'))
price = re.sub('PLN', '', price)

View File

@ -15,6 +15,7 @@ from PyQt4.QtWebKit import QWebView, QWebPage
from calibre import USER_AGENT, get_proxies, get_download_filename
from calibre.ebooks import BOOK_EXTENSIONS
from calibre.ptempfile import PersistentTemporaryFile
from calibre.utils.filenames import ascii_filename
class NPWebView(QWebView):
@ -67,6 +68,7 @@ class NPWebView(QWebView):
filename = get_download_filename(url, cf)
ext = os.path.splitext(filename)[1][1:].lower()
filename = ascii_filename(filename[:60] + '.' + ext)
if ext not in BOOK_EXTENSIONS:
if ext == 'acsm':
from calibre.gui2.dialogs.confirm_delete import confirm

View File

@ -32,7 +32,7 @@ FIELDS = ['all', 'title', 'title_sort', 'author_sort', 'authors', 'comments',
'rating', 'series_index', 'series', 'size', 'tags', 'timestamp', 'uuid']
#Allowed fields for template
TEMPLATE_ALLOWED_FIELDS = [ 'author_sort', 'authors', 'id', 'isbn', 'pubdate',
TEMPLATE_ALLOWED_FIELDS = [ 'author_sort', 'authors', 'id', 'isbn', 'pubdate', 'title_sort',
'publisher', 'series_index', 'series', 'tags', 'timestamp', 'title', 'uuid' ]
class CSV_XML(CatalogPlugin): # {{{
@ -324,7 +324,7 @@ class BIBTEX(CatalogPlugin): # {{{
def run(self, path_to_output, opts, db, notification=DummyReporter()):
def create_bibtex_entry(entry, fields, mode, template_citation,
bibtexdict, citation_bibtex=True, calibre_files=True):
bibtexdict, db, citation_bibtex=True, calibre_files=True):
#Bibtex doesn't like UTF-8 but keep unicode until writing
#Define starting chain or if book valid strict and not book return a Fail string
@ -345,7 +345,13 @@ class BIBTEX(CatalogPlugin): # {{{
bibtex_entry = [u' '.join(bibtex_entry)]
for field in fields:
item = entry[field]
if field.startswith('#'):
item = db.get_field(entry['id'],field,index_is_id=True)
elif field == 'title_sort':
item = entry['sort']
else:
item = entry[field]
#check if the field should be included (none or empty)
if item is None:
continue
@ -358,10 +364,6 @@ class BIBTEX(CatalogPlugin): # {{{
if field == 'authors' :
bibtex_entry.append(u'author = "%s"' % bibtexdict.bibtex_author_format(item))
elif field in ['title', 'publisher', 'cover', 'uuid', 'ondevice',
'author_sort', 'series'] :
bibtex_entry.append(u'%s = "%s"' % (field, bibtexdict.utf8ToBibtex(item)))
elif field == 'id' :
bibtex_entry.append(u'calibreid = "%s"' % int(item))
@ -409,6 +411,14 @@ class BIBTEX(CatalogPlugin): # {{{
bibtex_entry.append(u'year = "%s"' % item.year)
bibtex_entry.append(u'month = "%s"' % bibtexdict.utf8ToBibtex(strftime("%b", item)))
elif field.startswith('#') :
bibtex_entry.append(u'%s = "%s"' % (field[1:], bibtexdict.utf8ToBibtex(item)))
else:
# elif field in ['title', 'publisher', 'cover', 'uuid', 'ondevice',
# 'author_sort', 'series', 'title_sort'] :
bibtex_entry.append(u'%s = "%s"' % (field, bibtexdict.utf8ToBibtex(item)))
bibtex_entry = u',\n '.join(bibtex_entry)
bibtex_entry += u' }\n\n'
@ -588,7 +598,7 @@ class BIBTEX(CatalogPlugin): # {{{
for entry in data:
outfile.write(create_bibtex_entry(entry, fields, bib_entry, template_citation,
bibtexc, citation_bibtex, addfiles_bibtex))
bibtexc, db, citation_bibtex, addfiles_bibtex))
# }}}
class EPUB_MOBI(CatalogPlugin):

View File

@ -465,13 +465,12 @@ class BrowseServer(object):
if not cats and len(items) == 1:
# Only one item in category, go directly to book list
prefix = '' if self.is_wsgi else self.opts.url_prefix
html = get_category_items(category, items,
self.search_restriction_name, datatype,
self.opts.url_prefix)
href = re.search(r'<a href="([^"]+)"', html)
if href is not None:
raise cherrypy.HTTPRedirect(prefix+href.group(1))
raise cherrypy.HTTPRedirect(href.group(1))
if len(items) <= self.opts.max_opds_ungrouped_items:
script = 'false'

View File

@ -218,7 +218,7 @@ class ContentServer(object):
if format in ('MOBI', 'EPUB'):
# Write the updated file
from calibre.ebooks.metadata.meta import set_metadata
set_metadata(fmt, newmi, 'epub')
set_metadata(fmt, newmi, format.lower())
fmt.seek(0)
mt = guess_type('dummy.'+format.lower())[0]

View File

@ -94,3 +94,10 @@ def unquote(s):
ans = ans.decode('utf-8')
return ans
def cookie_time_fmt(time_t):
return time.strftime('%a, %d-%b-%Y %H:%M:%S GMT', time_t)
def cookie_max_age_to_expires(max_age):
gmt_expiration_time = time.gmtime(time.time() + max_age)
return cookie_time_fmt(gmt_expiration_time)

View File

@ -108,6 +108,8 @@ At the moment |app| has full support for the SONY PRS line, Barnes & Noble Nook
There is also a special ``User Defined`` device plugin that can be used to connect to arbitrary devices that present their memory as disk drives. See the device plugin ``Preferences -> Plugins -> Device Plugins -> User Defined`` and ``Preferences -> Miscelleaneous -> Get information to setup the user defined device`` for more information.
.. _devsupport:
How can I help get my device supported in |app|?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -219,7 +221,7 @@ How do I use |app| with my iPad/iPhone/iTouch?
Over the air
^^^^^^^^^^^^^^
The easiest way to browse your |app| collection on your Apple device (iPad/iPhone/iPod) is by using the calibre sontent server, which makes your collection available over the net. First perform the following steps in |app|
The easiest way to browse your |app| collection on your Apple device (iPad/iPhone/iPod) is by using the calibre content server, which makes your collection available over the net. First perform the following steps in |app|
* Set the Preferred Output Format in |app| to EPUB (The output format can be set under :guilabel:`Preferences->Interface->Behavior`)
* Set the output profile to iPad (this will work for iPhone/iPods as well), under :guilabel:`Preferences->Conversion->Common Options->Page Setup`
@ -258,10 +260,36 @@ Use the 'Connect to iTunes' method in the 'Getting started' instructions in `Cal
This method only works on Windows XP and higher, and OS X 10.5 and higher. Linux is not supported (iTunes is not available in linux) and OS X 10.4 is not supported.
How do I use |app| with my Android phone?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
How do I use |app| with my Android phone/tablet?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
First install the WordPlayer ebook reading app from the Android Marketplace onto you phone. Then simply plug your phone into the computer with a USB cable. |app| should automatically detect the phone and then you can transfer books to it by clicking the Send to Device button. |app| does not have support for every single androind device out there, so if you would like to have support for your device added, follow the instructions above for getting your device supported in |app|.
There are two ways that you can connect your Android device to calibre. Using a USB cable-- or wirelessly, over the air.
The USB cable method only works if your Android device can act as a USB disk, which some Android tablets cannot.
Using a USB cable
^^^^^^^^^^^^^^^^^^^^
First install either the WordPlayer or Aldiko ebook reading apps from the Android Marketplace onto your phone. Then simply plug your phone into the computer with a USB cable. |app| should automatically detect the phone and then you can transfer books to it by clicking the Send to Device button. |app| does not have support for every single android device out there, so if your device is not automatically detected, follow the instructions at :ref:`devsupport` to get your device supported in |app|.
Over the air
^^^^^^^^^^^^^^
The easiest way to browse your |app| collection on your Android device is by using the calibre content server, which makes your collection available over the net. First perform the following steps in |app|
* Set the Preferred Output Format in |app| to EPUB (The output format can be set under :guilabel:`Preferences->Interface->Behavior`)
* Set the output profile to Tablet (this will work for phones as well), under :guilabel:`Preferences->Conversion->Common Options->Page Setup`
* Convert the books you want to read on your device to EPUB format by selecting them and clicking the Convert button.
* Turn on the Content Server in |app|'s preferences and leave |app| running.
Now on your Android device, open the browser and browse to
http://192.168.1.2:8080/
Replace ``192.168.1.2`` with the local IP address of the computer running |app|. If your local network supports the use of computer names, you can replace the IP address with the network name of the computer. If you have changed the port the |app| content server is running on, you will have to change ``8080`` as well to the new port.
The local IP address is the IP address you computer is assigned on your home network. A quick Google search will tell you how to find out your local IP address. You can now browse your book collection and download books from |app| to your device to open with whatever ebook reading software you have on your android device.
Some reading programs support browsing the Calibre library directly. For example, in Aldiko, click My Catalogs, then + to add a catalog, then give the catalog a title such as "Calibre" and provide the URL listed above. You can now browse the Calibre library and download directly into the reading software.
Can I access my |app| books using the web browser in my Kindle or other reading device?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

View File

@ -294,7 +294,7 @@ To learn more about writing advanced recipes using some of the facilities, avail
`BasicNewsRecipe <http://bazaar.launchpad.net/~kovid/calibre/trunk/annotate/head:/src/calibre/web/feeds/news.py>`_
The source code of ``BasicNewsRecipe``
`Built-in recipes <http://bazaar.launchpad.net/~kovid/calibre/trunk/files/head:/src/calibre/web/feeds/recipes/>`_
`Built-in recipes <http://bazaar.launchpad.net/~kovid/calibre/trunk/files/head:/recipes/>`_
The source code for the built-in recipes that come with |app|
`The calibre recipes forum <http://www.mobileread.com/forums/forumdisplay.php?f=228>`_

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More