This commit is contained in:
GRiker 2012-02-24 05:29:39 -07:00
commit f3f379c0ef
192 changed files with 13655 additions and 10208 deletions

View File

@ -19,6 +19,68 @@
# new recipes:
# - title:
- version: 0.8.41
date: 2012-02-24
new features:
- title: "Driver for Sony Experia Play 4G"
tickets: [938831]
- title: "News download system: Allow use of __future__ in recipes, and do not change line numbers of code in the recipe when compiling it"
- title: "Use the My Documents folder as the default location for the Calibre Library folder on first start in windows"
tickets: [934840]
- title: "Add a tweak to Preferences->Tweaks to control the order in which categories appear in the Tag Browser"
- title: "Tag Browser: Add an entry to the right click menu to quickly delete tags"
tickets: [934509]
- title: "Amazon metadata download: Try to scrape series information from the amazon details page. Note that currently very few books have series info available. Often the page for hardcover will have series, but the Kindle edition will not. In such cases calibre may or may not find the series, depending on which page it ends up using."
- title: "Content server: Add favicon to OPDS feeds."
tickets: [934731]
bug fixes:
- title: "RTF Input: Fix some WMF images embedded in RTF files being distorted on conversion."
tickets: [934167]
- title: "Fix long standing bug preventing calibre from working on east asian windows installs when the user name in windows has non-ascii characters"
tickets: [937389]
- title: "Get Books: Fix Baen Webscription and O'Reilly stores. Fix price detection for Google Books"
- title: "MOBI Output: When the same anchor is present more than once in the input document, use the first occurrence rather than the last one."
tickets: [934031]
- title: "Use the 'default cover font' tweak when generating default masthead images as well"
tickets: [939256]
- title: "Fix content server does not correctly display custom field of type 'rating'"
tickets: [938303]
- title: "Fix welcome wizard does not save send-from email info unless send-to field is filled"
tickets: [937087]
- title: "When reading metadata from odt files, use initial-creator in preference to creator for setting the author field"
tickets: [934564]
- title: "Fix conversion erroring out when the input document has very long and thin images"
tickets: [935234]
improved recipes:
- The Sun
- Various Polish news sources
- Mediapart
new recipes:
- title: La pausa caffe
author: faber1971
- title: Various Polish news sources
author: fenuks
- version: 0.8.40
date: 2012-02-17

View File

@ -7,6 +7,7 @@ class Archeowiesci(BasicNewsRecipe):
language = 'pl'
cover_url='http://archeowiesci.pl/wp-content/uploads/2011/05/Archeowiesci2-115x115.jpg'
oldest_article = 7
needs_subscription='optional'
max_articles_per_feed = 100
auto_cleanup = True
remove_tags=[dict(name='span', attrs={'class':['post-ratings', 'post-ratings-loading']})]
@ -16,6 +17,16 @@ class Archeowiesci(BasicNewsRecipe):
feeds = BasicNewsRecipe.parse_feeds(self)
for feed in feeds:
for article in feed.articles[:]:
if 'subskrypcja' in article.title:
if self.username is None and 'subskrypcja' in article.title:
feed.articles.remove(article)
return feeds
def get_browser(self):
br = BasicNewsRecipe.get_browser()
if self.username is not None and self.password is not None:
br.open('http://archeowiesci.pl/wp-login.php')
br.select_form(name='loginform')
br['log'] = self.username
br['pwd'] = self.password
br.submit()
return br

View File

@ -1,15 +1,18 @@
from calibre.web.feeds.news import BasicNewsRecipe
import re
class Astronomia_pl(BasicNewsRecipe):
title = u'Astronomia.pl'
__author__ = 'fenuks'
description = 'Astronomia - polish astronomy site'
masthead_url = 'http://www.astronomia.pl/grafika/logo.gif'
cover_url = 'http://www.astronomia.pl/grafika/logo.gif'
category = 'astronomy, science'
language = 'pl'
oldest_article = 8
max_articles_per_feed = 100
#no_stylesheets=True
extra_css='#h2 {font-size: 18px;}'
no_stylesheets=True
preprocess_regexps = [(re.compile(ur'<b>Przeczytaj także:.*?</BODY>', re.DOTALL), lambda match: '</BODY>') ]
remove_tags_before=dict(name='div', attrs={'id':'a1'})
keep_only_tags=[dict(name='div', attrs={'id':['a1', 'h2']})]
feeds = [(u'Wiadomości z astronomii i astronautyki', u'http://www.astronomia.pl/rss/')]

View File

@ -4,16 +4,17 @@ class Benchmark_pl(BasicNewsRecipe):
title = u'Benchmark.pl'
__author__ = 'fenuks'
description = u'benchmark.pl -IT site'
masthead_url = 'http://www.benchmark.pl/i/logo-footer.png'
cover_url = 'http://www.ieaddons.pl/benchmark/logo_benchmark_new.gif'
category = 'IT'
language = 'pl'
oldest_article = 8
max_articles_per_feed = 100
no_stylesheets=True
preprocess_regexps = [(re.compile(ur'\bWięcej o .*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</body>')]
preprocess_regexps = [(re.compile(ur'<h3><span style="font-size: small;">&nbsp;Zobacz poprzednie <a href="http://www.benchmark.pl/news/zestawienie/grupa_id/135">Opinie dnia:</a></span>.*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</body>'), (re.compile(ur'Więcej o .*?</ul>', re.DOTALL|re.IGNORECASE), lambda match: '')]
keep_only_tags=[dict(name='div', attrs={'class':['m_zwykly', 'gallery']})]
remove_tags_after=dict(name='div', attrs={'class':'body'})
remove_tags=[dict(name='div', attrs={'class':['kategoria', 'socialize', 'thumb', 'panelOcenaObserwowane', 'categoryNextToSocializeGallery']})]
remove_tags=[dict(name='div', attrs={'class':['kategoria', 'socialize', 'thumb', 'panelOcenaObserwowane', 'categoryNextToSocializeGallery']}), dict(name='table', attrs={'background':'http://www.benchmark.pl/uploads/backend_img/a/fotki_newsy/opinie_dnia/bg.png'}), dict(name='table', attrs={'width':'210', 'cellspacing':'1', 'cellpadding':'4', 'border':'0', 'align':'right'})]
INDEX= 'http://www.benchmark.pl'
feeds = [(u'Aktualności', u'http://www.benchmark.pl/rss/aktualnosci-pliki.xml'),
(u'Testy i recenzje', u'http://www.benchmark.pl/rss/testy-recenzje-minirecenzje.xml')]

View File

@ -10,10 +10,11 @@ class Biolog_pl(BasicNewsRecipe):
description = u'Przyrodnicze aktualności ze świata nauki (codziennie aktualizowane), kurs biologii, testy i sprawdziany, forum dyskusyjne.'
category = 'biology'
language = 'pl'
masthead_url= 'http://www.biolog.pl/naukowy,portal,biolog.png'
cover_url='http://www.biolog.pl/naukowy,portal,biolog.png'
no_stylesheets = True
#keeps_only_tags=[dict(id='main')]
remove_tags_before=dict(id='main')
remove_tags_after=dict(name='a', attrs={'name':'komentarze'})
remove_tags=[dict(name='img', attrs={'alt':'Komentarze'})]
remove_tags=[dict(name='img', attrs={'alt':'Komentarze'}), dict(name='span', attrs={'class':'menu_odsylacze'})]
feeds = [(u'Wszystkie', u'http://www.biolog.pl/backend.php'), (u'Medycyna', u'http://www.biolog.pl/medycyna-rss.php'), (u'Ekologia', u'http://www.biolog.pl/rss-ekologia.php'), (u'Genetyka i biotechnologia', u'http://www.biolog.pl/rss-biotechnologia.php'), (u'Botanika', u'http://www.biolog.pl/rss-botanika.php'), (u'Le\u015bnictwo', u'http://www.biolog.pl/rss-lesnictwo.php'), (u'Zoologia', u'http://www.biolog.pl/rss-zoologia.php')]

View File

@ -1,16 +1,20 @@
from calibre.web.feeds.news import BasicNewsRecipe
class CD_Action(BasicNewsRecipe):
title = u'CD-Action'
__author__ = 'fenuks'
description = 'cdaction.pl - polish magazine about games site'
description = 'cdaction.pl - polish games magazine site'
category = 'games'
language = 'pl'
oldest_article = 8
max_articles_per_feed = 100
no_stylesheets= True
cover_url =u'http://s.cdaction.pl/obrazki/logo-CD-Action_172k9.JPG'
keep_only_tags= dict(id='news_content')
remove_tags_after= dict(name='div', attrs={'class':'tresc'})
feeds = [(u'Newsy', u'http://www.cdaction.pl/rss_newsy.xml')]
def get_cover_url(self):
soup = self.index_to_soup('http://www.cdaction.pl/magazyn/')
self.cover_url='http://www.cdaction.pl'+ soup.find(id='wspolnik').div.a['href']
return getattr(self, 'cover_url', self.cover_url)

View File

@ -5,6 +5,7 @@ class CGM(BasicNewsRecipe):
oldest_article = 7
__author__ = 'fenuks'
description = u'Codzienna Gazeta Muzyczna'
masthead_url='http://www.cgm.pl/img/header/logo.gif'
cover_url = 'http://www.krafcy.com/foto/tinymce/Image/cgm%281%29.jpg'
category = 'music'
language = 'pl'
@ -23,21 +24,19 @@ class CGM(BasicNewsRecipe):
def preprocess_html(self, soup):
gallery=soup.find('div', attrs={'class':'galleryFlash'})
if gallery:
img=gallery.div
gallery.img.extract()
if img:
img=img['style']
img='http://www.cgm.pl'+img[img.find('url(')+4:img.find(')')]
gallery.contents[1].name='img'
gallery.contents[1]['src']=img
for item in soup.findAll(style=True):
del item['style']
ad=soup.findAll('a')
for r in ad:
if 'http://www.hustla.pl' in r['href'] or 'http://www.ebilet.pl' in r['href']:
if 'www.hustla.pl' in r['href'] or 'www.ebilet.pl' in r['href']:
r.extract()
gallery=soup.find('div', attrs={'class':'galleryFlash'})
if gallery:
img=gallery.find('embed')
if img:
img=img['src'][35:]
img='http://www.cgm.pl/_vault/_gallery/_photo/'+img
param=gallery.findAll(name='param')
for i in param:
i.extract()
gallery.contents[1].name='img'
gallery.contents[1]['src']=img
return soup

View File

@ -33,6 +33,32 @@ class ChristianScienceMonitor(BasicNewsRecipe):
remove_javascript = True
no_stylesheets = True
requires_version = (0, 8, 39)
def preprocess_raw_html(self, raw, url):
try:
from html5lib import parse
root = parse(raw, namespaceHTMLElements=False,
treebuilder='lxml').getroot()
from lxml import etree
for tag in root.xpath(
'//script|//style|//noscript|//meta|//link|//object'):
tag.getparent().remove(tag)
for elem in list(root.iterdescendants(tag=etree.Comment)):
elem.getparent().remove(elem)
ans = etree.tostring(root, encoding=unicode)
ans = re.sub('.*<html', '<html', ans, flags=re.DOTALL)
return ans
except:
import traceback
traceback.print_exc()
raise
def index_to_soup(self, url):
raw = BasicNewsRecipe.index_to_soup(self, url,
raw=True).decode('utf-8')
raw = self.preprocess_raw_html(raw, url)
return BasicNewsRecipe.index_to_soup(self, raw)
def append_page(self, soup, appendtag, position):
nav = soup.find('div',attrs={'class':'navigation'})
@ -78,14 +104,6 @@ class ChristianScienceMonitor(BasicNewsRecipe):
print_soup = soup
return print_soup
preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
[
(r'<!--.*?-->', lambda match : ''),
(r'<body.*?<div id="story"', lambda match : '<body><div id="story"'),
(r'<div class="pubdate">.*?</div>', lambda m: ''),
(r'Full HTML version of this story which may include photos, graphics, and related links.*</body>',
lambda match : '</body>'),
]]
extra_css = '''
h1{ color:#000000;font-family: Georgia,Times,"Times New Roman",serif; font-size: large}
.sub{ color:#000000;font-family: Georgia,Times,"Times New Roman",serif; font-size: small;}

View File

@ -0,0 +1,48 @@
from calibre.web.feeds.news import BasicNewsRecipe
import re
class Ciekawostki_Historyczne(BasicNewsRecipe):
title = u'Ciekawostki Historyczne'
oldest_article = 7
__author__ = 'fenuks'
description = u'Serwis popularnonaukowy - odkrycia, kontrowersje, historia, ciekawostki, badania, ciekawostki z przeszłości.'
category = 'history'
language = 'pl'
masthead_url= 'http://ciekawostkihistoryczne.pl/wp-content/themes/Wordpress_Magazine/images/logo-ciekawostki-historyczne-male.jpg'
cover_url='http://ciekawostkihistoryczne.pl/wp-content/themes/Wordpress_Magazine/images/logo-ciekawostki-historyczne-male.jpg'
max_articles_per_feed = 100
preprocess_regexps = [(re.compile(ur'Ten artykuł ma kilka stron.*?</fb:like>', re.DOTALL), lambda match: ''), (re.compile(ur'<h2>Zobacz też:</h2>.*?</ol>', re.DOTALL), lambda match: '')]
no_stylesheets=True
remove_empty_feeds=True
keep_only_tags=[dict(name='div', attrs={'class':'post'})]
remove_tags=[dict(id='singlepostinfo')]
feeds = [(u'Staro\u017cytno\u015b\u0107', u'http://ciekawostkihistoryczne.pl/tag/starozytnosc/feed/'), (u'\u015aredniowiecze', u'http://ciekawostkihistoryczne.pl/tag/sredniowiecze/feed/'), (u'Nowo\u017cytno\u015b\u0107', u'http://ciekawostkihistoryczne.pl/tag/nowozytnosc/feed/'), (u'XIX wiek', u'http://ciekawostkihistoryczne.pl/tag/xix-wiek/feed/'), (u'1914-1939', u'http://ciekawostkihistoryczne.pl/tag/1914-1939/feed/'), (u'1939-1945', u'http://ciekawostkihistoryczne.pl/tag/1939-1945/feed/'), (u'Powojnie (od 1945)', u'http://ciekawostkihistoryczne.pl/tag/powojnie/feed/'), (u'Recenzje', u'http://ciekawostkihistoryczne.pl/category/recenzje/feed/')]
def append_page(self, soup, appendtag):
tag=soup.find(name='h7')
if tag:
if tag.br:
pass
elif tag.nextSibling.name=='p':
tag=tag.nextSibling
nexturl = tag.findAll('a')
for nextpage in nexturl:
tag.extract()
nextpage= nextpage['href']
soup2 = self.index_to_soup(nextpage)
pagetext = soup2.find(name='div', attrs={'class':'post'})
for r in pagetext.findAll('div', attrs={'id':'singlepostinfo'}):
r.extract()
for r in pagetext.findAll('div', attrs={'class':'wp-caption alignright'}):
r.extract()
for r in pagetext.findAll('h1'):
r.extract()
pagetext.find('h6').nextSibling.extract()
pagetext.find('h7').nextSibling.extract()
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
def preprocess_html(self, soup):
self.append_page(soup, soup.body)
return soup

View File

@ -7,10 +7,11 @@ class Computerworld_pl(BasicNewsRecipe):
description = u'Serwis o IT w przemyśle, finansach, handlu, administracji oraz rynku IT i telekomunikacyjnym - wiadomości, opinie, analizy, porady prawne'
category = 'IT'
language = 'pl'
masthead_url= 'http://g1.computerworld.pl/cw/beta_gfx/cw2.gif'
no_stylesheets=True
oldest_article = 7
max_articles_per_feed = 100
keep_only_tags=[dict(name='div', attrs={'id':'s'})]
keep_only_tags=[dict(attrs={'class':['tyt_news', 'prawo', 'autor', 'tresc']})]
remove_tags_after=dict(name='div', attrs={'class':'rMobi'})
remove_tags=[dict(name='div', attrs={'class':['nnav', 'rMobi']}), dict(name='table', attrs={'class':'ramka_slx'})]
feeds = [(u'Wiadomo\u015bci', u'http://rssout.idg.pl/cw/news_iso.xml')]

View File

@ -7,6 +7,7 @@ class Dobreprogramy_pl(BasicNewsRecipe):
__licence__ ='GPL v3'
category = 'IT'
language = 'pl'
masthead_url='http://static.dpcdn.pl/css/Black/Images/header_logo_napis_fullVersion.png'
cover_url = 'http://userlogos.org/files/logos/Karmody/dobreprogramy_01.png'
description = u'Aktualności i blogi z dobreprogramy.pl'
encoding = 'utf-8'
@ -16,7 +17,8 @@ class Dobreprogramy_pl(BasicNewsRecipe):
oldest_article = 8
max_articles_per_feed = 100
preprocess_regexps = [(re.compile(ur'<div id="\S+360pmp4">Twoja przeglądarka nie obsługuje Flasha i HTML5 lub wyłączono obsługę JavaScript...</div>'), lambda match: '') ]
remove_tags = [dict(name='div', attrs={'class':['komentarze', 'block', 'portalInfo', 'menuBar', 'topBar']})]
keep_only_tags = [dict(name='div', attrs={'class':['mainBar', 'newsContent', 'postTitle title', 'postInfo', 'contentText', 'content']})]
keep_only_tags=[dict(attrs={'class':['news', 'entry single']})]
remove_tags = [dict(name='div', attrs={'class':['newsOptions', 'noPrint', 'komentarze', 'tags font-heading-master']})]
#remove_tags = [dict(name='div', attrs={'class':['komentarze', 'block', 'portalInfo', 'menuBar', 'topBar']})]
feeds = [(u'Aktualności', 'http://feeds.feedburner.com/dobreprogramy/Aktualnosci'),
('Blogi', 'http://feeds.feedburner.com/dobreprogramy/BlogCzytelnikow')]

View File

@ -8,15 +8,17 @@ class Dziennik_pl(BasicNewsRecipe):
description = u'Wiadomości z kraju i ze świata. Wiadomości gospodarcze. Znajdziesz u nas informacje, wydarzenia, komentarze, opinie.'
category = 'newspaper'
language = 'pl'
cover_url='http://6.s.dziennik.pl/images/og_dziennik.jpg'
masthead_url= 'http://5.s.dziennik.pl/images/logos.png'
cover_url= 'http://5.s.dziennik.pl/images/logos.png'
no_stylesheets = True
oldest_article = 7
max_articles_per_feed = 100
remove_javascript=True
remove_empty_feeds=True
preprocess_regexps = [(re.compile("Komentarze:"), lambda m: '')]
extra_css= 'ul {list-style: none; padding: 0; margin: 0;} li {float: left;margin: 0 0.15em;}'
preprocess_regexps = [(re.compile("Komentarze:"), lambda m: ''), (re.compile('<p><strong><a href=".*?">&gt;&gt;&gt; CZYTAJ TAKŻE: ".*?"</a></strong></p>'), lambda m: '')]
keep_only_tags=[dict(id='article')]
remove_tags=[dict(name='div', attrs={'class':['art_box_dodatki', 'new_facebook_icons2', 'leftArt', 'article_print', 'quiz-widget']}), dict(name='a', attrs={'class':'komentarz'})]
remove_tags=[dict(name='div', attrs={'class':['art_box_dodatki', 'new_facebook_icons2', 'leftArt', 'article_print', 'quiz-widget', 'belka-spol', 'belka-spol belka-spol-bottom', 'art_data_tags', 'cl_right', 'boxRounded gal_inside']}), dict(name='a', attrs={'class':['komentarz', 'article_icon_addcommnent']})]
feeds = [(u'Wszystko', u'http://rss.dziennik.pl/Dziennik-PL/'),
(u'Wiadomości', u'http://rss.dziennik.pl/Dziennik-Wiadomosci'),
(u'Gospodarka', u'http://rss.dziennik.pl/Dziennik-Gospodarka'),
@ -30,6 +32,12 @@ class Dziennik_pl(BasicNewsRecipe):
(u'Podróże', u'http://rss.dziennik.pl/Dziennik-Podroze/'),
(u'Nieruchomości', u'http://rss.dziennik.pl/Dziennik-Nieruchomosci')]
def skip_ad_pages(self, soup):
tag=soup.find(name='a', attrs={'title':'CZYTAJ DALEJ'})
if tag:
new_soup=self.index_to_soup(tag['href'], raw=True)
return new_soup
def append_page(self, soup, appendtag):
tag=soup.find('a', attrs={'class':'page_next'})
if tag:
@ -56,3 +64,4 @@ class Dziennik_pl(BasicNewsRecipe):
def preprocess_html(self, soup):
self.append_page(soup, soup.body)
return soup

View File

@ -10,7 +10,8 @@ class Filmweb_pl(BasicNewsRecipe):
oldest_article = 8
max_articles_per_feed = 100
no_stylesheets= True
extra_css = '.hdrBig {font-size:22px;}'
remove_empty_feeds=True
extra_css = '.hdrBig {font-size:22px;} ul {list-style-type:none; padding: 0; margin: 0;}'
remove_tags= [dict(name='div', attrs={'class':['recommendOthers']}), dict(name='ul', attrs={'class':'fontSizeSet'})]
keep_only_tags= [dict(name='h1', attrs={'class':'hdrBig'}), dict(name='div', attrs={'class':['newsInfo', 'reviewContent fontSizeCont description']})]
feeds = [(u'Wszystkie newsy', u'http://www.filmweb.pl/feed/news/latest'),

View File

@ -0,0 +1,21 @@
from calibre.web.feeds.news import BasicNewsRecipe
class Gameplay_pl(BasicNewsRecipe):
title = u'Gameplay.pl'
oldest_article = 7
__author__ = 'fenuks'
description = u'gameplay.pl - serwis o naszych zainteresowaniach, grach, filmach, książkach, muzyce, fotografii i konsolach.'
category = 'games, movies, books, music'
language = 'pl'
masthead_url= 'http://gameplay.pl/img/gpy_top_logo.png'
cover_url= 'http://gameplay.pl/img/gpy_top_logo.png'
max_articles_per_feed = 100
no_stylesheets= True
keep_only_tags=[dict(name='div', attrs={'class':['news_endpage_tit', 'news']})]
remove_tags=[dict(name='div', attrs={'class':['galeria', 'noedit center im']})]
feeds = [(u'Wiadomo\u015bci', u'http://gameplay.pl/rss/')]
def image_url_processor(self, baseurl, url):
if 'http' not in url:
return 'http://gameplay.pl'+ url[2:]
else:
return url

View File

@ -4,10 +4,11 @@ from calibre.web.feeds.news import BasicNewsRecipe
class Gazeta_Wyborcza(BasicNewsRecipe):
title = u'Gazeta Wyborcza'
__author__ = 'fenuks'
cover_url = 'http://bi.gazeta.pl/im/5/10285/z10285445AA.jpg'
language = 'pl'
description ='news from gazeta.pl'
category='newspaper'
publication_type = 'newspaper'
masthead_url='http://bi.gazeta.pl/im/5/10285/z10285445AA.jpg'
INDEX='http://wyborcza.pl'
remove_empty_feeds= True
oldest_article = 3
@ -81,3 +82,10 @@ class Gazeta_Wyborcza(BasicNewsRecipe):
return url
else:
return url.replace('http://wyborcza.biz/biznes/1', 'http://wyborcza.biz/biznes/2029020')
def get_cover_url(self):
soup = self.index_to_soup('http://wyborcza.pl/0,76762,3751429.html')
cover=soup.find(id='GWmini2')
soup = self.index_to_soup('http://wyborcza.pl/'+ cover.contents[3].a['href'])
self.cover_url='http://wyborcza.pl' + soup.img['src']
return getattr(self, 'cover_url', self.cover_url)

View File

@ -8,29 +8,31 @@ class Gry_online_pl(BasicNewsRecipe):
language = 'pl'
oldest_article = 13
INDEX= 'http://www.gry-online.pl/'
cover_url='http://www.gry-online.pl/img/1st_10/1st-gol-logo.png'
masthead_url='http://www.gry-online.pl/im/gry-online-logo.png'
cover_url='http://www.gry-online.pl/im/gry-online-logo.png'
max_articles_per_feed = 100
no_stylesheets= True
extra_css = 'p.wn1{font-size:22px;}'
remove_tags_after= [dict(name='div', attrs={'class':['tresc-newsa']})]
keep_only_tags = [dict(name='div', attrs={'class':['txthead']}), dict(name='p', attrs={'class':['wtx1', 'wn1', 'wob']}), dict(name='a', attrs={'class':['num_str_nex']})]
#remove_tags= [dict(name='div', attrs={'class':['news_plat']})]
keep_only_tags=[dict(name='div', attrs={'class':'gc660'})]
remove_tags=[dict({'class':['nav-social', 'add-info', 'smlb', 'lista lista3 lista-gry', 'S013po', 'zm_gfx_cnt_bottom', 'ocen-txt', 'wiecej-txt', 'wiecej-txt2']})]
feeds = [(u'Newsy', 'http://www.gry-online.pl/rss/news.xml'), ('Teksty', u'http://www.gry-online.pl/rss/teksty.xml')]
def append_page(self, soup, appendtag):
nexturl = soup.find('a', attrs={'class':'num_str_nex'})
if appendtag.find('a', attrs={'class':'num_str_nex'}) is not None:
appendtag.find('a', attrs={'class':'num_str_nex'}).replaceWith('\n')
if nexturl is not None:
if 'strona' in nexturl.div.string:
nexturl= self.INDEX + nexturl['href']
soup2 = self.index_to_soup(nexturl)
pagetext = soup2.findAll(name='p', attrs={'class':['wtx1', 'wn1', 'wob']})
for tag in pagetext:
pos = len(appendtag.contents)
appendtag.insert(pos, tag)
self.append_page(soup2, appendtag)
tag = appendtag.find('div', attrs={'class':'n5p'})
if tag:
nexturls=tag.findAll('a')
for nexturl in nexturls[1:]:
try:
soup2 = self.index_to_soup('http://www.gry-online.pl/S020.asp'+ nexturl['href'])
except:
soup2 = self.index_to_soup('http://www.gry-online.pl/S022.asp'+ nexturl['href'])
pagetext = soup2.find(attrs={'class':'gc660'})
for r in pagetext.findAll(name='header'):
r.extract()
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
for r in appendtag.findAll(attrs={'class':['n5p', 'add-info', 'twitter-share-button']}):
r.extract()
def preprocess_html(self, soup):

Binary file not shown.

After

Width:  |  Height:  |  Size: 994 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 991 B

BIN
recipes/icons/in4_pl.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 357 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 808 B

BIN
recipes/icons/kresy_pl.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.0 KiB

BIN
recipes/icons/oclab_pl.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 881 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 817 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 366 B

BIN
recipes/icons/pc_arena.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.8 KiB

BIN
recipes/icons/pc_foster.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 694 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 322 B

BIN
recipes/icons/pure_pc.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 386 B

BIN
recipes/icons/tanuki.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1017 B

BIN
recipes/icons/tvn24.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.4 KiB

44
recipes/in4_pl.recipe Normal file
View File

@ -0,0 +1,44 @@
from calibre.web.feeds.news import BasicNewsRecipe
import re
class in4(BasicNewsRecipe):
title = u'IN4.pl'
oldest_article = 7
max_articles_per_feed = 100
__author__ = 'fenuks'
description = u'Serwis Informacyjny - Aktualnosci, recenzje'
category = 'IT'
language = 'pl'
#cover_url= 'http://www.in4.pl/recenzje/337/in4pl.jpg'
no_stylesheets = True
remove_empty_feeds = True
preprocess_regexps = [(re.compile(ur'<a title="translate into.*?</a>', re.DOTALL), lambda match: '') ]
keep_only_tags=[dict(name='div', attrs={'class':'left_alone'})]
remove_tags_after=dict(name='img', attrs={'title':'komentarze'})
remove_tags=[dict(name='img', attrs={'title':'komentarze'})]
feeds = [(u'Wiadomo\u015bci', u'http://www.in4.pl/rss.php'), (u'Recenzje', u'http://www.in4.pl/rss_recenzje.php'), (u'Mini recenzje', u'http://www.in4.pl/rss_mini.php')]
def append_page(self, soup, appendtag):
a=soup.findAll('a')
nexturl=None
for i in a:
if i.string and 'następna str' in i.string:
nexturl='http://www.in4.pl/' + i['href']
i.extract()
while nexturl:
soup2 = self.index_to_soup(nexturl)
pagetext = soup2.find(id='news')
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
nexturl=None
tag=soup2.findAll('a')
for z in tag:
if z.string and u'następna str' in z.string:
nexturl='http://www.in4.pl/' + z['href']
break
def preprocess_html(self, soup):
self.append_page(soup, soup.body)
return soup

View File

@ -0,0 +1,18 @@
from calibre.web.feeds.news import BasicNewsRecipe
import re
class Informacje_USA(BasicNewsRecipe):
title = u'Informacje USA'
oldest_article = 7
max_articles_per_feed = 100
__author__ = 'fenuks'
description = u'portal wiadomości amerykańskich'
category = 'news'
language = 'pl'
masthead_url= 'http://www.informacjeusa.com/wp-content/add_images/top_logo_5_2010.jpg'
cover_url='http://www.informacjeusa.com/wp-content/add_images/top_logo_5_2010.jpg'
no_stylesheets = True
preprocess_regexps = [(re.compile(ur'<p>Zobacz:.*?</p>', re.DOTALL), lambda match: ''), (re.compile(ur'<p><a href=".*?Zobacz także:.*?</a></p>', re.DOTALL), lambda match: ''), (re.compile(ur'<p><p>Zobacz też:.*?</a></p>', re.DOTALL), lambda match: '')]
keep_only_tags=[dict(name='div', attrs={'class':'box box-single'})]
remove_tags_after= dict(attrs={'class':'tags'})
remove_tags= [dict(attrs={'class':['postmetadata', 'tags', 'banner']}), dict(name='a', attrs={'title':['Drukuj', u'Wyślij']})]
feeds = [(u'Informacje', u'http://www.informacjeusa.com/feed/')]

14
recipes/kresy_pl.recipe Normal file
View File

@ -0,0 +1,14 @@
from calibre.web.feeds.news import BasicNewsRecipe
class Kresy(BasicNewsRecipe):
title = u'Kresy'
__author__ = 'fenuks'
description = u'portal społeczności kresowej'
language = 'pl'
masthead_url= 'http://www.kresy.pl/public/img/logo.png'
cover_url= 'http://www.kresy.pl/public/img/logo.png'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
keep_only_tags= [dict(id='artykul')]
remove_tags= [dict(attrs={'class':['twitter-share-button', 'likefbborder', 'tagi']})]
feeds = [(u'Wszystkie', u'http://www.kresy.pl/rss')]

View File

@ -0,0 +1,17 @@
__version__ = 'v1.0'
__date__ = '13, February 2011'
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1329125921(BasicNewsRecipe):
title = u'La pausa caff\xe8'
__author__ = 'faber1971'
description = 'An Italian satirical blog'
language = 'it'
oldest_article = 7
max_articles_per_feed = 100
auto_cleanup = True
no_stylesheets = True
feeds = [(u'La pausa caff\xe8', u'http://feeds.feedburner.com/LapausaCaffe')]

View File

@ -1,4 +1,5 @@
__license__ = 'GPL v3'
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1327062445(BasicNewsRecipe):
@ -7,10 +8,13 @@ class AdvancedUserRecipe1327062445(BasicNewsRecipe):
max_articles_per_feed = 100
auto_cleanup = True
remove_javascript = True
no_stylesheets = True
remove_tags = [
dict(name='ul', attrs={'id':'ads0'})
]
masthead_url = 'http://www.simrendeogun.com/wp-content/uploads/2011/06/New-Marketing-Magazine-Logo.jpg'
feeds = [(u'My Marketing', u'http://feed43.com/0537744466058428.xml'), (u'My Marketing_', u'http://feed43.com/8126723074604845.xml'), (u'Venturini', u'http://robertoventurini.blogspot.com/feeds/posts/default?alt=rss'), (u'Ninja Marketing', u'http://feeds.feedburner.com/NinjaMarketing'), (u'Comunitàzione', u'http://www.comunitazione.it/feed/novita.asp'), (u'Brandforum news', u'http://www.brandforum.it/rss/news'), (u'Brandforum papers', u'http://www.brandforum.it/rss/papers'), (u'Disambiguando', u'http://giovannacosenza.wordpress.com/feed/')]
__author__ = 'faber1971'
description = 'Collection of Italian marketing websites - v1.00 (28, January 2012)'
description = 'Collection of Italian marketing websites - v1.03 (20, February 2012)'
language = 'it'
feeds = [(u'My Marketing', u'http://feed43.com/0537744466058428.xml'), (u'My Marketing_', u'http://feed43.com/8126723074604845.xml'), (u'Venturini', u'http://robertoventurini.blogspot.com/feeds/posts/default?alt=rss'), (u'Ninja Marketing', u'http://feeds.feedburner.com/NinjaMarketing'), (u'Comunitàzione', u'http://www.comunitazione.it/feed/novita.asp'), (u'Brandforum news', u'http://www.brandforum.it/rss/news'), (u'Brandforum papers', u'http://www.brandforum.it/rss/papers'), (u'MarketingArena', u'http://feeds.feedburner.com/marketingarena'), (u'minimarketing', u'http://feeds.feedburner.com/minimarketingit'), (u'Disambiguando', u'http://giovannacosenza.wordpress.com/feed/')]

View File

@ -1,16 +1,17 @@
__license__ = 'GPL v3'
__copyright__ = '2009, Mathieu Godlewski <mathieu at godlewski.fr>; 2010, Louis Gesbert <meta at antislash dot info>'
__copyright__ = '2009, Mathieu Godlewski <mathieu at godlewski.fr>; 2010, 2011, Louis Gesbert <meta at antislash dot info>'
'''
Mediapart
'''
from calibre.ebooks.BeautifulSoup import Tag
import re
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.web.feeds.news import BasicNewsRecipe
class Mediapart(BasicNewsRecipe):
title = 'Mediapart'
__author__ = 'Mathieu Godlewski'
description = 'Global news in french from online newspapers'
__author__ = 'Mathieu Godlewski, Louis Gesbert'
description = 'Global news in french from news site Mediapart'
oldest_article = 7
language = 'fr'
needs_subscription = True
@ -18,52 +19,30 @@ class Mediapart(BasicNewsRecipe):
max_articles_per_feed = 50
no_stylesheets = True
cover_url = 'http://www.mediapart.fr/sites/all/themes/mediapart/mediapart/images/annonce.jpg'
cover_url = 'http://static.mediapart.fr/files/pave_mediapart.jpg'
feeds = [
('Les articles', 'http://www.mediapart.fr/articles/feed'),
]
# -- print-version has poor quality on this website, better do the conversion ourselves
#
# preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE|re.DOTALL), i[1]) for i in
# [
# (r'<div class="print-title">([^>]+)</div>', lambda match : '<h2>'+match.group(1)+'</h2>'),
# (r'<span class=\'auteur_staff\'>[^>]+<a title=\'[^\']*\'[^>]*>([^<]*)</a>[^<]*</span>',
# lambda match : '<i>'+match.group(1)+'</i>'),
# (r'\'', lambda match: '&rsquo;'),
# ]
# ]
#
# remove_tags = [ dict(name='div', attrs={'class':'print-source_url'}),
# dict(name='div', attrs={'class':'print-links'}),
# dict(name='img', attrs={'src':'entete_article.png'}),
# dict(name='br') ]
#
# def print_version(self, url):
# raw = self.browser.open(url).read()
# soup = BeautifulSoup(raw.decode('utf8', 'replace'))
# div = soup.find('div', {'id':re.compile('node-\d+')})
# if div is None:
# return None
# article_id = string.replace(div['id'], 'node-', '')
# if article_id is None:
# return None
# return 'http://www.mediapart.fr/print/'+article_id
# -- print-version
# -- Non-print version [dict(name='div', attrs={'class':'advert'})]
keep_only_tags = [
dict(name='h1', attrs={'class':'title'}),
dict(name='div', attrs={'class':'page_papier_detail'}),
preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE|re.DOTALL), i[1]) for i in
[
(r'<div class="print-title">([^>]+)</div>', lambda match : '<h2>'+match.group(1)+'</h2>'),
(r'\'', lambda match: '&rsquo;')
]
]
def preprocess_html(self,soup):
for title in soup.findAll('div', {'class':'titre'}):
tag = Tag(soup, 'h3')
title.replaceWith(tag)
tag.insert(0,title)
return soup
remove_tags = [ dict(name='div', attrs={'class':'print-source_url'}) ]
def print_version(self, url):
raw = self.browser.open(url).read()
soup = BeautifulSoup(raw.decode('utf8', 'replace'))
link = soup.find('a', {'title':'Imprimer'})
if link is None:
return None
return link['href']
# -- Handle login
@ -76,4 +55,3 @@ class Mediapart(BasicNewsRecipe):
br['pass'] = self.password
br.submit()
return br

View File

@ -1,8 +1,9 @@
from calibre.web.feeds.news import BasicNewsRecipe
import re
class naczytniki(BasicNewsRecipe):
title = u'naczytniki.pl'
__author__ = 'fenuks'
masthead_url= 'http://naczytniki.pl/wp-content/uploads/2010/08/logo_nc28.png'
cover_url = 'http://naczytniki.pl/wp-content/uploads/2010/08/logo_nc28.png'
language = 'pl'
description ='everything about e-readers'
@ -10,6 +11,7 @@ class naczytniki(BasicNewsRecipe):
no_stylesheets=True
oldest_article = 7
max_articles_per_feed = 100
preprocess_regexps = [(re.compile(ur'<p><br><b>Zobacz także:</b></p>.*?</body>', re.DOTALL), lambda match: '</body>') ]
remove_tags_after= dict(name='div', attrs={'class':'sociable'})
keep_only_tags=[dict(name='div', attrs={'class':'post'})]
remove_tags=[dict(name='span', attrs={'class':'comments'}), dict(name='div', attrs={'class':'sociable'})]

View File

@ -1,21 +1,33 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
import re
class Nowa_Fantastyka(BasicNewsRecipe):
title = u'Nowa Fantastyka'
oldest_article = 7
__author__ = 'fenuks'
__modified_by__ = 'zaslav'
language = 'pl'
encoding='latin2'
description ='site for fantasy readers'
category='fantasy'
masthead_url='http://farm5.static.flickr.com/4133/4956658792_7ba7fbf562.jpg'
#extra_css='.tytul {font-size: 20px;}' #not working
max_articles_per_feed = 100
INDEX='http://www.fantastyka.pl/'
no_stylesheets=True
needs_subscription = 'optional'
remove_tags_before=dict(attrs={'class':'belka1-tlo-md'})
remove_tags_before=dict(attrs={'class':'naglowek2'})
#remove_tags_after=dict(name='span', attrs={'class':'naglowek-oceny'})
remove_tags_after=dict(name='td', attrs={'class':'belka1-bot'})
remove_tags=[dict(attrs={'class':'avatar2'}), dict(name='span', attrs={'class':'alert-oceny'}), dict(name='img', attrs={'src':['obrazki/sledz1.png', 'obrazki/print.gif', 'obrazki/mlnf.gif']}), dict(name='b', text='Dodaj komentarz'),dict(name='a', attrs={'href':'http://www.fantastyka.pl/10,1727.html'})]
remove_tags_after=dict(name='form', attrs={'name':'form1'})
remove_tags=[dict(attrs={'class':['avatar2', 'belka-margin', 'naglowek2']}), dict(name='span', attrs={'class':'alert-oceny'}), dict(name='img', attrs={'src':['obrazki/sledz1.png', 'obrazki/print.gif', 'obrazki/mlnf.gif']}), dict(name='b', text='Dodaj komentarz'),dict(name='a', attrs={'href':'http://www.fantastyka.pl/10,1727.html'}), dict(name='form')]
preprocess_regexps = [
(re.compile(r'\<table .*?\>'), lambda match: ''),
(re.compile(r'\<td.*?\>'), lambda match: ''),
(re.compile(r'\<center\>'), lambda match: '')]
def find_articles(self, url):
articles = []
@ -41,10 +53,10 @@ class Nowa_Fantastyka(BasicNewsRecipe):
return feeds
def get_cover_url(self):
soup = self.index_to_soup('http://www.fantastyka.pl/1.html')
cover=soup.find(name='img', attrs={'class':'okladka'})
self.cover_url=self.INDEX+ cover['src']
soup = self.index_to_soup('http://www.e-kiosk.pl/nowa_fantastyka')
self.cover_url='http://www.e-kiosk.pl' + soup.find(name='a', attrs={'class':'img'})['href']
return getattr(self, 'cover_url', self.cover_url)
def get_browser(self):
@ -56,3 +68,18 @@ class Nowa_Fantastyka(BasicNewsRecipe):
br['pass'] = self.password
br.submit()
return br
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll(font=True):
del item['font']
for item in soup.findAll(align=True):
del item['align']
for item in soup.findAll(name='tr'):
item.name='div'
title=soup.find(attrs={'class':'tytul'})
if title:
title['style']='font-size: 20px; font-weight: bold;'
self.log.warn(soup)
return soup

31
recipes/oclab_pl.recipe Normal file
View File

@ -0,0 +1,31 @@
from calibre.web.feeds.news import BasicNewsRecipe
class OCLab(BasicNewsRecipe):
title = u'OCLab.pl'
oldest_article = 7
max_articles_per_feed = 100
__author__ = 'fenuks'
description = u'Portal OCLab.pl jest miejscem przyjaznym pasjonatom sprzętu komputerowego, w szczególności overclockerom, które będzie służyć im za aktualną bazę wiedzy o podkręcaniu komputera, źródło aktualnych informacji z rynku oraz opinii na temat sprzętu komputerowego.'
category = 'IT'
language = 'pl'
cover_url= 'http://www.idealforum.ru/attachment.php?attachmentid=7963&d=1316008118'
no_stylesheets = True
keep_only_tags=[dict(id='main')]
remove_tags_after= dict(attrs={'class':'single-postmetadata'})
remove_tags=[dict(attrs={'class':['single-postmetadata', 'pagebar']})]
feeds = [(u'Wpisy', u'http://oclab.pl/feed/')]
def append_page(self, soup, appendtag):
tag=soup.find(attrs={'class':'contentjumpddl'})
if tag:
nexturl=tag.findAll('option')
for nextpage in nexturl[1:-1]:
soup2 = self.index_to_soup(nextpage['value'])
pagetext = soup2.find(attrs={'class':'single-entry'})
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
for r in appendtag.findAll(attrs={'class':'post-nav-bottom-list'}):
r.extract()
def preprocess_html(self, soup):
self.append_page(soup, soup.body)
return soup

View File

@ -0,0 +1,37 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe
class Overclock_pl(BasicNewsRecipe):
title = u'Overclock.pl'
oldest_article = 7
max_articles_per_feed = 100
__author__ = 'fenuks'
description = u'Vortal poświęcony tematyce hardware, kładący największy nacisk na podkręcanie / overclocking (włącznie z extreme) i chłodzenie / cooling (air cooling, water cooling, freon cooling, dry ice, liquid nitrogen).'
category = 'IT'
language = 'pl'
masthead_url='http://www.overclock.pl/gfx/logo_m.png'
cover_url='http://www.overclock.pl/gfx/logo_m.png'
no_stylesheets = True
remove_empty_feeds = True
preprocess_regexps = [(re.compile(ur'<b>Komentarze do aktualności:.*?</a>', re.DOTALL), lambda match: ''), (re.compile(ur'<h3>Nawigacja</h3>', re.DOTALL), lambda match: '') ]
keep_only_tags=[dict(name='div', attrs={'class':'news'}), dict(id='articleContent')]
remove_tags=[dict(name='span', attrs={'class':'info'}), dict(attrs={'class':'shareit'})]
feeds = [(u'Aktualno\u015bci', u'http://www.overclock.pl/rss.news.xml'), (u'Testy i recenzje', u'http://www.overclock.pl/rss.articles.xml')]
def append_page(self, soup, appendtag):
tag=soup.find(id='navigation')
if tag:
nexturl=tag.findAll('option')
tag.extract()
for nextpage in nexturl[2:]:
soup2 = self.index_to_soup(nextpage['value'])
pagetext = soup2.find(id='content')
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
rem=appendtag.find(attrs={'alt':'Pierwsza'})
if rem:
rem.parent.extract()
def preprocess_html(self, soup):
self.append_page(soup, soup.body)
return soup

14
recipes/palmtop_pl.recipe Normal file
View File

@ -0,0 +1,14 @@
from calibre.web.feeds.news import BasicNewsRecipe
class palmtop_pl(BasicNewsRecipe):
title = u'Palmtop.pl'
__author__ = 'fenuks'
description = 'wortal technologii mobilnych'
category = 'mobile'
language = 'pl'
cover_url='http://cdn.g-point.biz/wp-content/themes/palmtop-new/images/header_palmtop_logo.png'
masthead_url='http://cdn.g-point.biz/wp-content/themes/palmtop-new/images/header_palmtop_logo.png'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
feeds = [(u'Newsy', u'http://palmtop.pl/feed/atom/')]

31
recipes/pc_arena.recipe Normal file
View File

@ -0,0 +1,31 @@
from calibre.web.feeds.news import BasicNewsRecipe
class PC_Arena(BasicNewsRecipe):
title = u'PCArena'
oldest_article = 18300
max_articles_per_feed = 100
__author__ = 'fenuks'
description = u'Najnowsze informacje z branży IT - testy, recenzje, aktualności, rankingi, wywiady. Twoje źródło informacji o sprzęcie komputerowym.'
category = 'IT'
language = 'pl'
masthead_url='http://pcarena.pl/public/design/frontend/images/logo.gif'
cover_url= 'http://pcarena.pl/public/design/frontend/images/logo.gif'
no_stylesheets = True
keep_only_tags=[dict(attrs={'class':['artHeader', 'art']})]
remove_tags=[dict(attrs={'class':'pages'})]
feeds = [(u'Newsy', u'http://pcarena.pl/misc/rss/news'), (u'Artyku\u0142y', u'http://pcarena.pl/misc/rss/articles')]
def append_page(self, soup, appendtag):
tag=soup.find(name='div', attrs={'class':'pagNum'})
if tag:
nexturl=tag.findAll('a')
tag.extract()
for nextpage in nexturl[1:]:
nextpage= 'http://pcarena.pl' + nextpage['href']
soup2 = self.index_to_soup(nextpage)
pagetext = soup2.find(attrs={'class':'artBody'})
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
def preprocess_html(self, soup):
self.append_page(soup, soup.body)
return soup

View File

@ -0,0 +1,41 @@
from calibre.web.feeds.news import BasicNewsRecipe
class PC_Centre(BasicNewsRecipe):
title = u'PC Centre'
oldest_article = 7
max_articles_per_feed = 100
__author__ = 'fenuks'
description = u'Portal komputerowy, a w nim: testy sprzętu komputerowego, recenzje gier i oprogramowania. a także opisy produktów związanych z komputerami.'
category = 'IT'
language = 'pl'
masthead_url= 'http://pccentre.pl/views/images/logo.gif'
cover_url= 'http://pccentre.pl/views/images/logo.gif'
no_stylesheets = True
keep_only_tags= [dict(id='content')]
remove_tags=[dict(attrs={'class':['ikony r', 'list_of_content', 'dot accordion']}), dict(id='comments')]
feeds = [(u'Publikacje', u'http://pccentre.pl/backend.php?mode=a'), (u'Aktualno\u015bci', u'http://pccentre.pl/backend.php'), (u'Sprz\u0119t komputerowy', u'http://pccentre.pl/backend.php?mode=n&section=2'), (u'Oprogramowanie', u'http://pccentre.pl/backend.php?mode=n&section=3'), (u'Gry komputerowe i konsole', u'http://pccentre.pl/backend.php?mode=n&section=4'), (u'Internet', u'http://pccentre.pl/backend.php?mode=n&section=7'), (u'Bezpiecze\u0144stwo', u'http://pccentre.pl/backend.php?mode=n&section=5'), (u'Multimedia', u'http://pccentre.pl/backend.php?mode=n&section=6'), (u'Biznes', u'http://pccentre.pl/backend.php?mode=n&section=9')]
def append_page(self, soup, appendtag):
tag=soup.find(name='div', attrs={'class':'pages'})
if tag:
nexturl=tag.findAll('a')
tag.extract()
for nextpage in nexturl[:-1]:
nextpage= 'http://pccentre.pl' + nextpage['href']
soup2 = self.index_to_soup(nextpage)
pagetext = soup2.find(id='content')
rem=pagetext.findAll(attrs={'class':['subtitle', 'content_info', 'list_of_content', 'pages', 'social2', 'pcc_acc', 'pcc_acc_na']})
for r in rem:
r.extract()
rem=pagetext.findAll(id='comments')
for r in rem:
r.extract()
rem=pagetext.findAll('h1')
for r in rem:
r.extract()
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
def preprocess_html(self, soup):
self.append_page(soup, soup.body)
return soup

35
recipes/pc_foster.recipe Normal file
View File

@ -0,0 +1,35 @@
from calibre.web.feeds.news import BasicNewsRecipe
class PC_Foster(BasicNewsRecipe):
title = u'PC Foster'
oldest_article = 7
max_articles_per_feed = 100
__author__ = 'fenuks'
description = u'Vortal technologiczny: testy, recenzje sprzętu komputerowego i telefonów, nowinki hardware, programy i gry dla Windows. Podkręcanie, modding i Overclocking.'
category = 'IT'
language = 'pl'
masthead_url='http://pcfoster.pl/public/images/logo.png'
cover_url= 'http://pcfoster.pl/public/images/logo.png'
no_stylesheets= True
remove_empty_feeds= True
keep_only_tags= [dict(id=['news_details', 'review_details']), dict(attrs={'class':'pager more_top'})]
remove_tags=[dict(name='p', attrs={'class':'right'})]
feeds = [(u'G\u0142\xf3wny', u'http://pcfoster.pl/public/rss/main.xml')]
def append_page(self, soup, appendtag):
nexturl= appendtag.find(attrs={'alt':u'Następna strona'})
if nexturl:
appendtag.find(attrs={'class':'pager more_top'}).extract()
while nexturl:
nexturl='http://pcfoster.pl' + nexturl.parent['href']
soup2 = self.index_to_soup(nexturl)
nexturl=soup2.find(attrs={'alt':u'Następna strona'})
pagetext = soup2.find(attrs={'class':'content'})
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
for r in appendtag.findAll(attrs={'class':'review_content double'}):
r.extract()
def preprocess_html(self, soup):
self.append_page(soup, soup.body)
return soup

View File

@ -0,0 +1,81 @@
from calibre.web.feeds.news import BasicNewsRecipe
import re
class Polska_times(BasicNewsRecipe):
title = u'Polska Times'
__author__ = 'fenuks'
description = u'Internetowe wydanie dziennika ogólnopolskiego Polska The Times. Najświeższe informacje: wydarzenia w kraju i na świecie, reportaże, poradniki, opinie.'
category = 'newspaper'
language = 'pl'
masthead_url = 'http://s.polskatimes.pl/g/logo_naglowek/polska.gif?17'
oldest_article = 7
max_articles_per_feed = 100
remove_emty_feeds= True
no_stylesheets = True
preprocess_regexps = [(re.compile(ur'<b>Czytaj także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur',<b>Czytaj też:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>Zobacz także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<center><h4><a.*?</a></h4></center>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TEŻ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ WIĘCEJ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TAKŻE:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>\* CZYTAJ KONIECZNIE:.*', re.DOTALL), lambda match: '</body>'), (re.compile(ur'<b>Nasze serwisy:</b>.*', re.DOTALL), lambda match: '</body>') ]
keep_only_tags= [dict(id=['tytul-artykulu', 'kontent'])]
remove_tags_after= dict(id='material-tagi')
remove_tags=[dict(attrs={'id':'reklama_srodtekst_0'}), dict(attrs={'id':'material-tagi'}), dict(name='div', attrs={'class':'zakladki'}), dict(attrs={'title':u'CZYTAJ TAKŻE'}), dict(attrs={'id':'podobne'}), dict(name='a', attrs={'href':'http://www.dzienniklodzki.pl/newsletter'})]
feeds = [(u'Fakty', u'http://polskatimes.feedsportal.com/c/32980/f/533648/index.rss'), (u'Opinie', u'http://www.polskatimes.pl/rss/opinie.xml'), (u'Sport', u'http://polskatimes.feedsportal.com/c/32980/f/533649/index.rss'), (u'Pieni\u0105dze', u'http://polskatimes.feedsportal.com/c/32980/f/533657/index.rss'), (u'Twoje finanse', u'http://www.polskatimes.pl/rss/twojefinanse.xml'), (u'Kultura', u'http://polskatimes.feedsportal.com/c/32980/f/533650/index.rss'), (u'Dodatki', u'http://www.polskatimes.pl/rss/dodatki.xml')]
def skip_ad_pages(self, soup):
if 'Advertisement' in soup.title:
nexturl=soup.find('a')['href']
return self.index_to_soup(nexturl, raw=True)
def append_page(self, soup, appendtag):
nexturl=soup.find(id='nastepna_strona')
while nexturl:
soup2= self.index_to_soup(nexturl['href'])
nexturl=soup2.find(id='nastepna_strona')
pagetext = soup2.find(id='tresc')
for dictionary in self.remove_tags:
v=pagetext.findAll(attrs=dictionary['attrs'])
for delete in v:
delete.extract()
for b in pagetext.findAll(name='b'):
if b.string:
if u'CZYTAJ TEŻ' in b.string or u'Czytaj także' in b.string or u'Czytaj też' in b.string or u'Zobacz także' in b.string:
b.extract()
for center in pagetext.findAll(name='center'):
if center.h4:
if center.h4.a:
center.extract()
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
for paginator in appendtag.findAll(attrs={'class':'stronicowanie'}):
paginator.extract()
def image_article(self, soup, appendtag):
nexturl=soup.find('a', attrs={'class':'nastepna'})
urls=[]
while nexturl:
if nexturl not in urls:
urls.append(nexturl)
else:
break
soup2= self.index_to_soup('http://www.polskatimes.pl/artykul/' + nexturl['href'])
nexturl=soup2.find('a', attrs={'class':'nastepna'})
if nexturl in urls:
break;
pagetext = soup2.find(id='galeria-material')
pos = len(appendtag.contents)
appendtag.insert(pos, '<br />')
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
for rem in appendtag.findAll(attrs={'class':['galeriaNawigator', 'miniaturyPojemnik']}):
rem.extract()
for paginator in appendtag.findAll(attrs={'class':'stronicowanie'}):
paginator.extract()
def preprocess_html(self, soup):
if soup.find('a', attrs={'class':'nastepna'}):
self.image_article(soup, soup.body)
elif soup.find(id='nastepna_strona'):
self.append_page(soup, soup.body)
return soup
def get_cover_url(self):
soup = self.index_to_soup('http://www.prasa24.pl/gazeta/metropolia-warszawska/')
self.cover_url=soup.find(id='pojemnik').img['src']
return getattr(self, 'cover_url', self.cover_url)

33
recipes/pure_pc.recipe Normal file
View File

@ -0,0 +1,33 @@
from calibre.web.feeds.news import BasicNewsRecipe
class PurePC(BasicNewsRecipe):
title = u'PurePC'
oldest_article = 7
max_articles_per_feed = 100
__author__ = 'fenuks'
description = u'Artykuły, aktualności, sprzęt, forum, chłodzenie, modding, urządzenia mobilne - wszystko w jednym miejscu.'
category = 'IT'
language = 'pl'
masthead_url= 'http://www.purepc.pl/themes/new/images/purepc.jpg'
cover_url= 'http://www.purepc.pl/themes/new/images/purepc.jpg'
no_stylesheets = True
keep_only_tags= [dict(id='content')]
remove_tags_after= dict(attrs={'class':'fivestar-widget'})
remove_tags= [dict(id='navigator'), dict(attrs={'class':['box-tools', 'fivestar-widget', 'PageMenuList']})]
feeds = [(u'Wiadomo\u015bci', u'http://www.purepc.pl/node/feed')]
def append_page(self, soup, appendtag):
nexturl= appendtag.find(attrs={'class':'pager-next'})
if nexturl:
while nexturl:
soup2 = self.index_to_soup('http://www.purepc.pl'+ nexturl.a['href'])
nexturl=soup2.find(attrs={'class':'pager-next'})
pagetext = soup2.find(attrs={'class':'article'})
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
for r in appendtag.findAll(attrs={'class':['PageMenuList', 'pager', 'fivestar-widget']}):
r.extract()
def preprocess_html(self, soup):
self.append_page(soup, soup.body)
return soup

View File

@ -1,14 +1,16 @@
from calibre.web.feeds.news import BasicNewsRecipe
import re
class Tablety_pl(BasicNewsRecipe):
title = u'Tablety.pl'
__author__ = 'fenuks'
description = u'tablety.pl - latest tablet news'
masthead_url= 'http://www.tablety.pl/wp-content/themes/kolektyw/img/logo.png'
cover_url = 'http://www.tablety.pl/wp-content/themes/kolektyw/img/logo.png'
category = 'IT'
language = 'pl'
oldest_article = 8
max_articles_per_feed = 100
preprocess_regexps = [(re.compile(ur'<p><strong>Przeczytaj także.*?</a></strong></p>', re.DOTALL), lambda match: ''), (re.compile(ur'<p><strong>Przeczytaj koniecznie.*?</a></strong></p>', re.DOTALL), lambda match: '')]
remove_tags_before=dict(name="h1", attrs={'class':'entry-title'})
remove_tags_after=dict(name="div", attrs={'class':'snap_nopreview sharing robots-nocontent'})
remove_tags=[dict(name='div', attrs={'class':'snap_nopreview sharing robots-nocontent'})]

37
recipes/tanuki.recipe Normal file
View File

@ -0,0 +1,37 @@
from calibre.web.feeds.news import BasicNewsRecipe
import re
class tanuki(BasicNewsRecipe):
title = u'Tanuki'
oldest_article = 7
__author__ = 'fenuks'
category = 'anime, manga'
language = 'pl'
max_articles_per_feed = 100
encoding='utf-8'
extra_css= 'ul {list-style: none; padding: 0; margin: 0;} .kadr{float: left;} .dwazdania {float: right;}'
preprocess_regexps = [(re.compile(ur'<h3><a class="screen".*?</h3>', re.DOTALL), lambda match: ''), (re.compile(ur'<div><a href="/strony/((manga)|(anime))/[0-9]+?/oceny(\-redakcji){0,1}">Zobacz jak ocenili</a></div>', re.DOTALL), lambda match: '')]
remove_empty_feeds= True
no_stylesheets = True
keep_only_tags=[dict(attrs={'class':['animename', 'storyname', 'nextarrow','sideinfov', 'sidelinfov', 'sideinfo', 'sidelinfo']}), dict(name='table', attrs={'summary':'Technikalia'}), dict(attrs={'class':['chaptername','copycat']}), dict(id='rightcolumn'), dict(attrs={'class':['headn_tt', 'subtable']})]
remove_tags=[dict(name='div', attrs={'class':'screen'}), dict(id='randomtoplist'), dict(attrs={'class':'note'})]
feeds = [(u'Anime', u'http://anime.tanuki.pl/rss_anime.xml'), (u'Manga', u'http://manga.tanuki.pl/rss_manga.xml'), (u'Tomiki', u'http://manga.tanuki.pl/rss_mangabooks.xml'), (u'Artyku\u0142y', u'http://czytelnia.tanuki.pl/rss_czytelnia_artykuly.xml'), (u'Opowiadania', u'http://czytelnia.tanuki.pl/rss_czytelnia.xml')]
def append_page(self, soup, appendtag):
nexturl= appendtag.find(attrs={'class':'nextarrow'})
if nexturl:
while nexturl:
soup2 = self.index_to_soup('http://czytelnia.tanuki.pl'+ nexturl['href'])
nexturl=soup2.find(attrs={'class':'nextarrow'})
pagetext = soup2.find(attrs={'class':['chaptername', 'copycat']})
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
pagetext = soup2.find(attrs={'class':'copycat'})
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
for r in appendtag.findAll(attrs={'class':'nextarrow'}):
r.extract()
def preprocess_html(self, soup):
self.append_page(soup, soup.body)
return soup

View File

@ -1,49 +1,57 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
from calibre.web.feeds.recipes import BasicNewsRecipe
class AdvancedUserRecipe1268409464(BasicNewsRecipe):
title = u'The Sun'
__author__ = 'Chaz Ralph'
description = 'News from The Sun'
class AdvancedUserRecipe1325006965(BasicNewsRecipe):
title = u'The Sun UK'
cover_url = 'http://www.thesun.co.uk/img/global/new-masthead-logo.png'
description = 'A Recipe for The Sun tabloid UK - uses feed43'
__author__ = 'Dave Asbury'
# last updated 20/2/12
language = 'en_GB'
oldest_article = 1
max_articles_per_feed = 100
language = 'en'
max_articles_per_feed = 15
remove_empty_feeds = True
no_stylesheets = True
extra_css = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt }'
encoding= 'iso-8859-1'
remove_javascript = True
masthead_url = 'http://www.thesun.co.uk/sol/img/global/Sun-logo.gif'
encoding = 'cp1251'
encoding = 'cp1252'
remove_empty_feeds = True
remove_javascript = True
no_stylesheets = True
extra_css = '''
body{ text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
'''
preprocess_regexps = [
(re.compile(r'<div class="foot-copyright".*?</div>', re.IGNORECASE | re.DOTALL), lambda match: '')]
keep_only_tags = [
dict(id='column-print')
dict(name='h1'),dict(name='h2',attrs={'class' : 'medium centered'}),
dict(name='div',attrs={'class' : 'text-center'}),
dict(name='div',attrs={'id' : 'bodyText'})
# dict(name='p')
]
remove_tags=[
#dict(name='head'),
dict(attrs={'class' : ['mystery-meat-link','ltbx-container','ltbx-var ltbx-hbxpn','ltbx-var ltbx-nav-loop','ltbx-var ltbx-url']}),
dict(name='div',attrs={'class' : 'cf'}),
dict(attrs={'title' : 'download flash'}),
dict(attrs={'style' : 'padding: 5px'})
]
feeds = [
(u'News','http://feed43.com/2517447382644748.xml'),
(u'Sport', u'http://feed43.com/4283846255668687.xml'),
(u'Bizarre', u'http://feed43.com/0233840304242011.xml'),
(u'Film',u'http://feed43.com/1307545221226200.xml'),
(u'Music',u'http://feed43.com/1701513435064132.xml'),
(u'Sun Woman',u'http://feed43.com/0022626854226453.xml'),
]
remove_tags = [
dict(name='div', attrs={'class':[
'clear text-center small padding-left-right-5 text-999 padding-top-5 padding-bottom-10 grey-solid-line',
'clear width-625 bg-fff padding-top-10'
]}),
dict(name='video'),
]
def preprocess_html(self, soup):
h1 = soup.find('h1')
if h1 is not None:
text = self.tag_to_string(h1)
nh = Tag(soup, 'h1')
nh.insert(0, text)
h1.replaceWith(nh)
return soup
feeds = [(u'News', u'http://www.thesun.co.uk/sol/homepage/feeds/rss/article312900.ece')
,(u'Sport', u'http://www.thesun.co.uk/sol/homepage/feeds/rss/article247732.ece')
,(u'Football', u'http://www.thesun.co.uk/sol/homepage/feeds/rss/article247739.ece')
,(u'Gizmo', u'http://www.thesun.co.uk/sol/homepage/feeds/rss/article247829.ece')
,(u'Bizarre', u'http://www.thesun.co.uk/sol/homepage/feeds/rss/article247767.ece')]
def print_version(self, url):
return re.sub(r'\?OTC-RSS&ATTR=[-a-zA-Z]+', '?print=yes', url)

View File

@ -0,0 +1,11 @@
from calibre.web.feeds.news import BasicNewsRecipe
class BasicUserRecipe1317069944(BasicNewsRecipe):
title = u'Times of Malta'
__author__ = 'To Do'
language = 'en'
oldest_article = 7
max_articles_per_feed = 100
auto_cleanup = True
feeds = [(u'Times of Malta', u'http://www.timesofmalta.com/rss')]

24
recipes/tvn24.recipe Normal file
View File

@ -0,0 +1,24 @@
from calibre.web.feeds.news import BasicNewsRecipe
class tvn24(BasicNewsRecipe):
title = u'TVN24'
oldest_article = 7
max_articles_per_feed = 100
__author__ = 'fenuks'
description = u'Sport, Biznes, Gospodarka, Informacje, Wiadomości Zawsze aktualne wiadomości z Polski i ze świata'
category = 'news'
language = 'pl'
masthead_url= 'http://www.tvn24.pl/_d/topmenu/logo2.gif'
cover_url= 'http://www.tvn24.pl/_d/topmenu/logo2.gif'
extra_css= 'ul {list-style: none; padding: 0; margin: 0;} li {float: left;margin: 0 0.15em;}'
remove_empty_feeds = True
remove_javascript = True
no_stylesheets = True
keep_only_tags=[dict(id='tvn24_wiadomosci_detal'), dict(name='h1', attrs={'class':'standardHeader1'}), dict(attrs={'class':['date60m rd5', 'imageBackground fl rd7', 'contentFromCMS']})]
remove_tags_after= dict(name='div', attrs={'class':'socialBoxesBottom'})
remove_tags=[dict(attrs={'class':['tagi_detal', 'socialBoxesBottom', 'twitterBox', 'commentsInfo', 'textSize', 'obj_ukrytydruk obj_ramka1_r', 'related newsNews align-right', 'box', 'newsUserList', 'watchMaterial text']})]
feeds = [(u'Najnowsze', u'http://www.tvn24.pl/najnowsze.xml'), (u'Polska', u'www.tvn24.pl/polska.xml'), (u'\u015awiat', u'http://www.tvn24.pl/swiat.xml'), (u'Sport', u'http://www.tvn24.pl/sport.xml'), (u'Biznes', u'http://www.tvn24.pl/biznes.xml'), (u'Meteo', u'http://www.tvn24.pl/meteo.xml'), (u'Micha\u0142ki', u'http://www.tvn24.pl/michalki.xml'), (u'Kultura', u'http://www.tvn24.pl/kultura.xml')]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -4,10 +4,12 @@ class Ubuntu_pl(BasicNewsRecipe):
title = u'UBUNTU.pl'
__author__ = 'fenuks'
description = 'UBUNTU.pl - polish ubuntu community site'
masthead_url= 'http://ubuntu.pl/img/logo.jpg'
cover_url = 'http://ubuntu.pl/img/logo.jpg'
category = 'linux, IT'
language = 'pl'
no_stylesheets = True
remove_empty_feeds = True
oldest_article = 8
max_articles_per_feed = 100
extra_css = '#main {text-align:left;}'

View File

@ -0,0 +1,39 @@
from calibre.web.feeds.news import BasicNewsRecipe
class webhosting_pl(BasicNewsRecipe):
title = u'Webhosting.pl'
__author__ = 'fenuks'
description = 'Webhosting.pl to pierwszy na polskim rynku serwis poruszający w szerokim aspekcie tematy związane z hostingiem, globalną Siecią i usługami internetowymi. Głównym celem przedsięwzięcia jest dostarczanie przydatnej i bogatej merytorycznie wiedzy osobom, które chcą tworzyć i efektywnie wykorzystywać współczesny Internet.'
category = 'web'
language = 'pl'
cover_url='http://webhosting.pl/images/logo.png'
masthead_url='http://webhosting.pl/images/logo.png'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
remove_empty_feeds = True
#keep_only_tags= [dict(name='div', attrs={'class':'content_article'}), dict(attrs={'class':'paging'})]
#remove_tags=[dict(attrs={'class':['tags', 'wykop', 'facebook_button_count', 'article_bottom']})]
feeds = [(u'Newsy', u'http://webhosting.pl/feed/rss/an'),
(u'Artyku\u0142y', u'http://webhosting.pl/feed/rss/aa'),
(u'Software', u'http://webhosting.pl/feed/rss/n/12'),
(u'Internet', u'http://webhosting.pl/feed/rss/n/9'),
(u'Biznes', u'http://webhosting.pl/feed/rss/n/13'),
(u'Bezpiecze\u0144stwo', u'http://webhosting.pl/feed/rss/n/10'),
(u'Blogi', u'http://webhosting.pl/feed/rss/ab'),
(u'Programowanie', u'http://webhosting.pl/feed/rss/n/8'),
(u'Kursy', u'http://webhosting.pl/feed/rss/n/11'),
(u'Tips&Tricks', u'http://webhosting.pl/feed/rss/n/15'),
(u'Imprezy', u'http://webhosting.pl/feed/rss/n/22'),
(u'Wywiady', u'http://webhosting.pl/feed/rss/n/24'),
(u'Porady', u'http://webhosting.pl/feed/rss/n/3027'),
(u'Znalezione w sieci', u'http://webhosting.pl/feed/rss/n/6804'),
(u'Dev area', u'http://webhosting.pl/feed/rss/n/24504'),
(u"Webmaster's blog", u'http://webhosting.pl/feed/rss/n/29195'),
(u'Domeny', u'http://webhosting.pl/feed/rss/n/11513'),
(u'Praktyka', u'http://webhosting.pl/feed/rss/n/2'),
(u'Serwery', u'http://webhosting.pl/feed/rss/n/11514'),
(u'Inne', u'http://webhosting.pl/feed/rss/n/24811'),
(u'Marketing', u'http://webhosting.pl/feed/rss/n/11535')]
def print_version(self, url):
return url.replace('webhosting.pl', 'webhosting.pl/print')

View File

@ -3,7 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class Worldcrunch(BasicNewsRecipe):
title = u'Worldcrunch'
__author__ = 'Krittika Goyal'
oldest_article = 1 #days
oldest_article = 2 #days
max_articles_per_feed = 25
use_embedded_content = False

View File

@ -128,6 +128,17 @@ categories_collapsed_name_template = r'{first.sort:shorten(4,,0)} - {last.sort:s
categories_collapsed_rating_template = r'{first.avg_rating:4.2f:ifempty(0)} - {last.avg_rating:4.2f:ifempty(0)}'
categories_collapsed_popularity_template = r'{first.count:d} - {last.count:d}'
#: Control order of categories in the tag browser
# Change the following dict to change the order that categories are displayed in
# the tag browser. Items are named using their lookup name, and will be sorted
# using the number supplied. The lookup name '*' stands for all names that
# otherwise do not appear. Two names with the same value will be sorted
# according to the default order; the one used when the dict is empty.
# Example: tag_browser_category_order = {'series':1, 'tags':2, '*':3}
# resulting in the order series, tags, then everything else in default order.
tag_browser_category_order = {'*':1}
#: Specify columns to sort the booklist by on startup
# Provide a set of columns to be sorted on when calibre starts
# The argument is None if saved sort history is to be used
@ -374,10 +385,11 @@ maximum_resort_levels = 5
# the fields that are being displayed.
sort_dates_using_visible_fields = False
#: Specify which font to use when generating a default cover
#: Specify which font to use when generating a default cover or masthead
# Absolute path to .ttf font files to use as the fonts for the title, author
# and footer when generating a default cover. Useful if the default font (Liberation
# Serif) does not contain glyphs for the language of the books in your library.
# and footer when generating a default cover or masthead image. Useful if the
# default font (Liberation Serif) does not contain glyphs for the language of
# the books in your library.
generate_cover_title_font = None
generate_cover_foot_font = None

View File

@ -8,14 +8,14 @@ msgstr ""
"Project-Id-Version: calibre\n"
"Report-Msgid-Bugs-To: FULL NAME <EMAIL@ADDRESS>\n"
"POT-Creation-Date: 2011-11-25 14:01+0000\n"
"PO-Revision-Date: 2012-02-15 11:31+0000\n"
"PO-Revision-Date: 2012-02-22 10:57+0000\n"
"Last-Translator: Vibhav Pant <vibhavp@gmail.com>\n"
"Language-Team: English (United Kingdom) <en_GB@li.org>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"X-Launchpad-Export-Date: 2012-02-16 05:03+0000\n"
"X-Generator: Launchpad (build 14781)\n"
"X-Launchpad-Export-Date: 2012-02-23 04:37+0000\n"
"X-Generator: Launchpad (build 14855)\n"
#. name for aaa
msgid "Ghotuo"
@ -7883,523 +7883,523 @@ msgstr "Gants"
#. name for gap
msgid "Gal"
msgstr ""
msgstr "Gal"
#. name for gaq
msgid "Gata'"
msgstr ""
msgstr "Gata'"
#. name for gar
msgid "Galeya"
msgstr ""
msgstr "Galeya"
#. name for gas
msgid "Garasia; Adiwasi"
msgstr ""
msgstr "Garasia; Adiwasi"
#. name for gat
msgid "Kenati"
msgstr ""
msgstr "Kenati"
#. name for gau
msgid "Gadaba; Mudhili"
msgstr ""
msgstr "Gadaba; Mudhili"
#. name for gaw
msgid "Nobonob"
msgstr ""
msgstr "Nobonob"
#. name for gax
msgid "Oromo; Borana-Arsi-Guji"
msgstr ""
msgstr "Oromo; Borana-Arsi-Guji"
#. name for gay
msgid "Gayo"
msgstr ""
msgstr "Gayo"
#. name for gaz
msgid "Oromo; West Central"
msgstr ""
msgstr "Oromo; West Central"
#. name for gba
msgid "Gbaya (Central African Republic)"
msgstr ""
msgstr "Gbaya (Central African Republic)"
#. name for gbb
msgid "Kaytetye"
msgstr ""
msgstr "Kaytetye"
#. name for gbc
msgid "Garawa"
msgstr ""
msgstr "Garawa"
#. name for gbd
msgid "Karadjeri"
msgstr ""
msgstr "Karadjeri"
#. name for gbe
msgid "Niksek"
msgstr ""
msgstr "Niksek"
#. name for gbf
msgid "Gaikundi"
msgstr ""
msgstr "Gaikundi"
#. name for gbg
msgid "Gbanziri"
msgstr ""
msgstr "Gbanziri"
#. name for gbh
msgid "Gbe; Defi"
msgstr ""
msgstr "Gbe; Defi"
#. name for gbi
msgid "Galela"
msgstr ""
msgstr "Galela"
#. name for gbj
msgid "Gadaba; Bodo"
msgstr ""
msgstr "Gadaba; Bodo"
#. name for gbk
msgid "Gaddi"
msgstr ""
msgstr "Gaddi"
#. name for gbl
msgid "Gamit"
msgstr ""
msgstr "Gamit"
#. name for gbm
msgid "Garhwali"
msgstr ""
msgstr "Garhwali"
#. name for gbn
msgid "Mo'da"
msgstr ""
msgstr "Mo'da"
#. name for gbo
msgid "Grebo; Northern"
msgstr ""
msgstr "Grebo; Northern"
#. name for gbp
msgid "Gbaya-Bossangoa"
msgstr ""
msgstr "Gbaya-Bossangoa"
#. name for gbq
msgid "Gbaya-Bozoum"
msgstr ""
msgstr "Gbaya-Bozoum"
#. name for gbr
msgid "Gbagyi"
msgstr ""
msgstr "Gbagyi"
#. name for gbs
msgid "Gbe; Gbesi"
msgstr ""
msgstr "Gbe; Gbesi"
#. name for gbu
msgid "Gagadu"
msgstr ""
msgstr "Gagadu"
#. name for gbv
msgid "Gbanu"
msgstr ""
msgstr "Gbanu"
#. name for gbx
msgid "Gbe; Eastern Xwla"
msgstr ""
msgstr "Gbe; Eastern Xwla"
#. name for gby
msgid "Gbari"
msgstr ""
msgstr "Gbari"
#. name for gbz
msgid "Dari; Zoroastrian"
msgstr ""
msgstr "Dari; Zoroastrian"
#. name for gcc
msgid "Mali"
msgstr ""
msgstr "Mali"
#. name for gcd
msgid "Ganggalida"
msgstr ""
msgstr "Ganggalida"
#. name for gce
msgid "Galice"
msgstr ""
msgstr "Galice"
#. name for gcf
msgid "Creole French; Guadeloupean"
msgstr ""
msgstr "Creole French; Guadeloupean"
#. name for gcl
msgid "Creole English; Grenadian"
msgstr ""
msgstr "Creole English; Grenadian"
#. name for gcn
msgid "Gaina"
msgstr ""
msgstr "Gaina"
#. name for gcr
msgid "Creole French; Guianese"
msgstr ""
msgstr "Creole French; Guianese"
#. name for gct
msgid "German; Colonia Tovar"
msgstr ""
msgstr "German; Colonia Tovar"
#. name for gda
msgid "Lohar; Gade"
msgstr ""
msgstr "Lohar; Gade"
#. name for gdb
msgid "Gadaba; Pottangi Ollar"
msgstr ""
msgstr "Gadaba; Pottangi Ollar"
#. name for gdc
msgid "Gugu Badhun"
msgstr ""
msgstr "Gugu Badhun"
#. name for gdd
msgid "Gedaged"
msgstr ""
msgstr "Gedaged"
#. name for gde
msgid "Gude"
msgstr ""
msgstr "Gude"
#. name for gdf
msgid "Guduf-Gava"
msgstr ""
msgstr "Guduf-Gava"
#. name for gdg
msgid "Ga'dang"
msgstr ""
msgstr "Ga'dang"
#. name for gdh
msgid "Gadjerawang"
msgstr ""
msgstr "Gadjerawang"
#. name for gdi
msgid "Gundi"
msgstr ""
msgstr "Gundi"
#. name for gdj
msgid "Gurdjar"
msgstr ""
msgstr "Gurdjar"
#. name for gdk
msgid "Gadang"
msgstr ""
msgstr "Gadang"
#. name for gdl
msgid "Dirasha"
msgstr ""
msgstr "Dirasha"
#. name for gdm
msgid "Laal"
msgstr ""
msgstr "Laal"
#. name for gdn
msgid "Umanakaina"
msgstr ""
msgstr "Umanakaina"
#. name for gdo
msgid "Ghodoberi"
msgstr ""
msgstr "Ghodoberi"
#. name for gdq
msgid "Mehri"
msgstr ""
msgstr "Mehri"
#. name for gdr
msgid "Wipi"
msgstr ""
msgstr "Wipi"
#. name for gdu
msgid "Gudu"
msgstr ""
msgstr "Gudu"
#. name for gdx
msgid "Godwari"
msgstr ""
msgstr "Godwari"
#. name for gea
msgid "Geruma"
msgstr ""
msgstr "Geruma"
#. name for geb
msgid "Kire"
msgstr ""
msgstr "Kire"
#. name for gec
msgid "Grebo; Gboloo"
msgstr ""
msgstr "Grebo; Gboloo"
#. name for ged
msgid "Gade"
msgstr ""
msgstr "Gade"
#. name for geg
msgid "Gengle"
msgstr ""
msgstr "Gengle"
#. name for geh
msgid "German; Hutterite"
msgstr ""
msgstr "German; Hutterite"
#. name for gei
msgid "Gebe"
msgstr ""
msgstr "Gebe"
#. name for gej
msgid "Gen"
msgstr ""
msgstr "Gen"
#. name for gek
msgid "Yiwom"
msgstr ""
msgstr "Yiwom"
#. name for gel
msgid "ut-Ma'in"
msgstr ""
msgstr "ut-Ma'in"
#. name for geq
msgid "Geme"
msgstr ""
msgstr "Geme"
#. name for ges
msgid "Geser-Gorom"
msgstr ""
msgstr "Geser-Gorom"
#. name for gew
msgid "Gera"
msgstr ""
msgstr "Gera"
#. name for gex
msgid "Garre"
msgstr ""
msgstr "Garre"
#. name for gey
msgid "Enya"
msgstr ""
msgstr "Enya"
#. name for gez
msgid "Geez"
msgstr ""
msgstr "Geez"
#. name for gfk
msgid "Patpatar"
msgstr ""
msgstr "Patpatar"
#. name for gft
msgid "Gafat"
msgstr ""
msgstr "Gafat"
#. name for gga
msgid "Gao"
msgstr ""
msgstr "Gao"
#. name for ggb
msgid "Gbii"
msgstr ""
msgstr "Gbii"
#. name for ggd
msgid "Gugadj"
msgstr ""
msgstr "Gugadj"
#. name for gge
msgid "Guragone"
msgstr ""
msgstr "Guragone"
#. name for ggg
msgid "Gurgula"
msgstr ""
msgstr "Gurgula"
#. name for ggk
msgid "Kungarakany"
msgstr ""
msgstr "Kungarakany"
#. name for ggl
msgid "Ganglau"
msgstr ""
msgstr "Ganglau"
#. name for ggn
msgid "Gurung; Eastern"
msgstr ""
msgstr "Gurung; Eastern"
#. name for ggo
msgid "Gondi; Southern"
msgstr ""
msgstr "Gondi; Southern"
#. name for ggr
msgid "Aghu Tharnggalu"
msgstr ""
msgstr "Aghu Tharnggalu"
#. name for ggt
msgid "Gitua"
msgstr ""
msgstr "Gitua"
#. name for ggu
msgid "Gagu"
msgstr ""
msgstr "Gagu"
#. name for ggw
msgid "Gogodala"
msgstr ""
msgstr "Gogodala"
#. name for gha
msgid "Ghadamès"
msgstr ""
msgstr "Ghadamès"
#. name for ghc
msgid "Gaelic; Hiberno-Scottish"
msgstr ""
msgstr "Gaelic; Hiberno-Scottish"
#. name for ghe
msgid "Ghale; Southern"
msgstr ""
msgstr "Ghale; Southern"
#. name for ghh
msgid "Ghale; Northern"
msgstr ""
msgstr "Ghale; Northern"
#. name for ghk
msgid "Karen; Geko"
msgstr ""
msgstr "Karen; Geko"
#. name for ghl
msgid "Ghulfan"
msgstr ""
msgstr "Ghulfan"
#. name for ghn
msgid "Ghanongga"
msgstr ""
msgstr "Ghanongga"
#. name for gho
msgid "Ghomara"
msgstr ""
msgstr "Ghomara"
#. name for ghr
msgid "Ghera"
msgstr ""
msgstr "Ghera"
#. name for ghs
msgid "Guhu-Samane"
msgstr ""
msgstr "Guhu-Samane"
#. name for ght
msgid "Ghale; Kutang"
msgstr ""
msgstr "Ghale; Kutang"
#. name for gia
msgid "Kitja"
msgstr ""
msgstr "Kitja"
#. name for gib
msgid "Gibanawa"
msgstr ""
msgstr "Gibanawa"
#. name for gic
msgid "Gail"
msgstr ""
msgstr "Gail"
#. name for gid
msgid "Gidar"
msgstr ""
msgstr "Gidar"
#. name for gig
msgid "Goaria"
msgstr ""
msgstr "Goaria"
#. name for gil
msgid "Gilbertese"
msgstr ""
msgstr "Gilbertese"
#. name for gim
msgid "Gimi (Eastern Highlands)"
msgstr ""
msgstr "Gimi (Eastern Highlands)"
#. name for gin
msgid "Hinukh"
msgstr ""
msgstr "Hinukh"
#. name for gio
msgid "Gelao"
msgstr ""
msgstr "Gelao"
#. name for gip
msgid "Gimi (West New Britain)"
msgstr ""
msgstr "Gimi (West New Britain)"
#. name for giq
msgid "Gelao; Green"
msgstr ""
msgstr "Gelao; Green"
#. name for gir
msgid "Gelao; Red"
msgstr ""
msgstr "Gelao; Red"
#. name for gis
msgid "Giziga; North"
msgstr ""
msgstr "Giziga; North"
#. name for git
msgid "Gitxsan"
msgstr ""
msgstr "Gitxsan"
#. name for giw
msgid "Gelao; White"
msgstr ""
msgstr "Gelao; White"
#. name for gix
msgid "Gilima"
msgstr ""
msgstr "Gilima"
#. name for giy
msgid "Giyug"
msgstr ""
msgstr "Giyug"
#. name for giz
msgid "Giziga; South"
msgstr ""
msgstr "Giziga; South"
#. name for gji
msgid "Geji"
msgstr ""
msgstr "Geji"
#. name for gjk
msgid "Koli; Kachi"
msgstr ""
msgstr "Koli; Kachi"
#. name for gjn
msgid "Gonja"
msgstr ""
msgstr "Gonja"
#. name for gju
msgid "Gujari"
msgstr ""
msgstr "Gujari"
#. name for gka
msgid "Guya"
msgstr ""
msgstr "Guya"
#. name for gke
msgid "Ndai"
msgstr ""
msgstr "Ndai"
#. name for gkn
msgid "Gokana"
msgstr ""
msgstr "Gokana"
#. name for gkp
msgid "Kpelle; Guinea"
msgstr ""
msgstr "Kpelle; Guinea"
#. name for gla
msgid "Gaelic; Scottish"
msgstr ""
msgstr "Gaelic; Scottish"
#. name for glc
msgid "Bon Gula"

View File

@ -419,7 +419,7 @@ class CurrentDir(object):
self.cwd = None
def __enter__(self, *args):
self.cwd = os.getcwd()
self.cwd = os.getcwdu()
os.chdir(self.path)
return self.cwd

View File

@ -4,7 +4,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
__appname__ = u'calibre'
numeric_version = (0, 8, 40)
numeric_version = (0, 8, 41)
__version__ = u'.'.join(map(unicode, numeric_version))
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"
@ -190,3 +190,16 @@ def get_windows_username():
return buf.value
return get_unicode_windows_env_var(u'USERNAME')
def get_windows_temp_path():
import ctypes
n = ctypes.windll.kernel32.GetTempPathW(0, None)
if n == 0:
return None
buf = ctypes.create_unicode_buffer(u'\0'*n)
ctypes.windll.kernel32.GetTempPathW(n, buf)
ans = buf.value
if ans[-1] == u'\\':
ans = ans[:-1]
return ans if ans else None

View File

@ -284,7 +284,7 @@ class OPFMetadataReader(MetadataReaderPlugin):
def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.opf2 import OPF
return OPF(stream, os.getcwd()).to_book_metadata()
return OPF(stream, os.getcwdu()).to_book_metadata()
class PDBMetadataReader(MetadataReaderPlugin):
@ -1217,7 +1217,7 @@ class StoreArchiveOrgStore(StoreBase):
formats = ['DAISY', 'DJVU', 'EPUB', 'MOBI', 'PDF', 'TXT']
class StoreBaenWebScriptionStore(StoreBase):
name = 'Baen WebScription'
name = 'Baen Ebooks'
description = u'Sci-Fi & Fantasy brought to you by Jim Baen.'
actual_plugin = 'calibre.gui2.store.stores.baen_webscription_plugin:BaenWebScriptionStore'

View File

@ -192,9 +192,13 @@ class InputFormatPlugin(Plugin):
def __call__(self, stream, options, file_ext, log,
accelerators, output_dir):
log('InputFormatPlugin: %s running'%self.name)
if hasattr(stream, 'name'):
log('on', stream.name)
try:
log('InputFormatPlugin: %s running'%self.name)
if hasattr(stream, 'name'):
log('on', stream.name)
except:
# In case stdout is broken
pass
with CurrentDir(output_dir):
for x in os.listdir('.'):

View File

@ -137,7 +137,7 @@ def add_simple_plugin(path_to_plugin):
tdir = tempfile.mkdtemp()
open(os.path.join(tdir, 'custom_plugin.py'),
'wb').write(open(path_to_plugin, 'rb').read())
odir = os.getcwd()
odir = os.getcwdu()
os.chdir(tdir)
zf = zipfile.ZipFile('plugin.zip', 'w')
zf.write('custom_plugin.py')

View File

@ -68,6 +68,7 @@ class ANDROID(USBMS):
# Sony Ericsson
0xfce : {
0xd12e : [0x0100],
0xe15d : [0x226],
0xe14f : [0x0226],
0x614f : [0x0226, 0x100],
0x6156 : [0x0226, 0x100],
@ -184,14 +185,14 @@ class ANDROID(USBMS):
'ALPANDIGITAL', 'ANDROID_MID', 'VTAB1008', 'EMX51_BBG_ANDROI',
'UMS', '.K080', 'P990', 'LTE', 'MB853', 'GT-S5660_CARD', 'A107',
'GT-I9003_CARD', 'XT912', 'FILE-CD_GADGET', 'RK29_SDK', 'MB855',
'XT910', 'BOOK_A10', 'USB_2.0_DRIVER']
'XT910', 'BOOK_A10', 'USB_2.0_DRIVER', 'I9100T']
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
'__UMS_COMPOSITE', 'SGH-I997_CARD', 'MB870', 'ALPANDIGITAL',
'ANDROID_MID', 'P990_SD_CARD', '.K080', 'LTE_CARD', 'MB853',
'A1-07___C0541A4F', 'XT912', 'MB855', 'XT910', 'BOOK_A10_CARD',
'USB_2.0_DRIVER']
'USB_2.0_DRIVER', 'I9100T']
OSX_MAIN_MEM = 'Android Device Main Memory'

View File

@ -103,17 +103,6 @@ class AppleOpenFeedback(OpenFeedback):
if isosx:
try:
import appscript
appscript
except:
# appscript fails to load on 10.4
appscript = None
if iswindows:
import pythoncom, win32com.client
class DriverBase(DeviceConfig, DevicePlugin):
# Needed for config_widget to work
FORMATS = ['epub', 'pdf']
@ -467,6 +456,7 @@ class ITUNES(DriverBase):
self._purge_orphans(library_books, cached_books)
elif iswindows:
import pythoncom, win32com.client
try:
pythoncom.CoInitialize()
self.iTunes = win32com.client.Dispatch("iTunes.Application")
@ -533,6 +523,11 @@ class ITUNES(DriverBase):
instantiate iTunes if necessary
This gets called ~1x/second while device fingerprint is sensed
'''
try:
import appscript
appscript
except:
appscript = None
if appscript is None:
return False
@ -600,6 +595,7 @@ class ITUNES(DriverBase):
'''
if self.iTunes:
import pythoncom
# We've previously run, so the user probably ejected the device
try:
pythoncom.CoInitialize()
@ -709,6 +705,7 @@ class ITUNES(DriverBase):
if self.manual_sync_mode:
self._remove_from_device(self.cached_books[path])
elif iswindows:
import pythoncom, win32com.client
try:
pythoncom.CoInitialize()
self.iTunes = win32com.client.Dispatch("iTunes.Application")
@ -754,6 +751,8 @@ class ITUNES(DriverBase):
self.iTunes.eject(self.sources['iPod'])
elif iswindows:
if 'iPod' in self.sources:
import pythoncom, win32com.client
try:
pythoncom.CoInitialize()
self.iTunes = win32com.client.Dispatch("iTunes.Application")
@ -788,6 +787,7 @@ class ITUNES(DriverBase):
elif iswindows:
if 'iPod' in self.sources:
import pythoncom, win32com.client
while True:
try:
@ -1098,6 +1098,8 @@ class ITUNES(DriverBase):
_('%(num)d of %(tot)d') % dict(num=i+1, tot=file_count))
elif iswindows:
import pythoncom, win32com.client
try:
pythoncom.CoInitialize()
self.iTunes = win32com.client.Dispatch("iTunes.Application")
@ -1163,6 +1165,7 @@ class ITUNES(DriverBase):
'''
logger().info(" ITUNES._add_device_book()")
if isosx:
import appscript
if 'iPod' in self.sources:
connected_device = self.sources['iPod']
device = self.iTunes.sources[connected_device]
@ -1257,6 +1260,7 @@ class ITUNES(DriverBase):
if DEBUG:
logger().info(" ITUNES._add_library_book()")
if isosx:
import appscript
added = self.iTunes.add(appscript.mactypes.File(file))
elif iswindows:
@ -1541,6 +1545,7 @@ class ITUNES(DriverBase):
if wait:
time.sleep(wait)
if isosx:
import appscript
connected_device = self.sources['iPod']
dev_books = None
device = self.iTunes.sources[connected_device]
@ -2077,6 +2082,7 @@ class ITUNES(DriverBase):
device_books = []
if isosx:
import appscript
if 'iPod' in self.sources:
connected_device = self.sources['iPod']
device = self.iTunes.sources[connected_device]
@ -2104,6 +2110,8 @@ class ITUNES(DriverBase):
logger().info()
elif iswindows:
import pythoncom
if 'iPod' in self.sources:
try:
pythoncom.CoInitialize()
@ -2171,6 +2179,7 @@ class ITUNES(DriverBase):
lib = None
if isosx:
import appscript
for source in self.iTunes.sources():
if source.kind() == appscript.k.library:
lib = source
@ -2341,6 +2350,7 @@ class ITUNES(DriverBase):
logger().info(" ITUNES:_launch_iTunes():\n Instantiating iTunes")
if isosx:
import appscript
'''
Launch iTunes if not already running
'''
@ -2382,6 +2392,8 @@ class ITUNES(DriverBase):
logger().info(" calibre_library_path: %s" % self.calibre_library_path)
if iswindows:
import win32com.client
'''
Launch iTunes if not already running
Assumes pythoncom wrapper
@ -2752,6 +2764,8 @@ class ITUNES(DriverBase):
time.sleep(2)
print
elif iswindows:
import pythoncom, win32com.client
try:
pythoncom.CoInitialize()
self.iTunes = win32com.client.Dispatch("iTunes.Application")
@ -3088,6 +3102,12 @@ class ITUNES_ASYNC(ITUNES):
if DEBUG:
logger().info("ITUNES_ASYNC:__init__()")
try:
import appscript
appscript
except:
appscript = None
if isosx and appscript is None:
self.connected = False
raise UserFeedback('OSX 10.5 or later required', details=None, level=UserFeedback.WARN)
@ -3099,6 +3119,8 @@ class ITUNES_ASYNC(ITUNES):
self._launch_iTunes()
if iswindows:
import pythoncom
try:
pythoncom.CoInitialize()
self._launch_iTunes()
@ -3180,6 +3202,8 @@ class ITUNES_ASYNC(ITUNES):
_('%(num)d of %(tot)d') % dict(num=i+1, tot=book_count))
elif iswindows:
import pythoncom, win32com.client
try:
pythoncom.CoInitialize()
self.iTunes = win32com.client.Dispatch("iTunes.Application")

View File

@ -215,7 +215,11 @@ def unit_convert(value, base, font, dpi):
def generate_masthead(title, output_path=None, width=600, height=60):
from calibre.ebooks.conversion.config import load_defaults
from calibre.utils.fonts import fontconfig
font_path = default_font = P('fonts/liberation/LiberationSerif-Bold.ttf')
from calibre.utils.config import tweaks
fp = tweaks['generate_cover_title_font']
if not fp:
fp = P('fonts/liberation/LiberationSerif-Bold.ttf')
font_path = default_font = fp
recs = load_defaults('mobi_output')
masthead_font_family = recs.get('masthead_font', 'Default')

View File

@ -22,6 +22,6 @@ class AZW4Input(InputFormatPlugin):
header = PdbHeaderReader(stream)
reader = Reader(header, stream, log, options)
opf = reader.extract_content(os.getcwd())
opf = reader.extract_content(os.getcwdu())
return opf

View File

@ -173,7 +173,7 @@ class ComicInput(InputFormatPlugin):
comics = []
for i, x in enumerate(comics_):
title, fname = x
cdir = 'comic_%d'%(i+1) if len(comics_) > 1 else '.'
cdir = u'comic_%d'%(i+1) if len(comics_) > 1 else u'.'
cdir = os.path.abspath(cdir)
if not os.path.exists(cdir):
os.makedirs(cdir)
@ -187,7 +187,7 @@ class ComicInput(InputFormatPlugin):
mi = MetaInformation(os.path.basename(stream.name).rpartition('.')[0],
[_('Unknown')])
opf = OPFCreator(os.path.abspath('.'), mi)
opf = OPFCreator(os.getcwdu(), mi)
entries = []
def href(x):
@ -225,9 +225,9 @@ class ComicInput(InputFormatPlugin):
_('Page')+' %d'%(i+1), play_order=po)
po += 1
opf.set_toc(toc)
m, n = open('metadata.opf', 'wb'), open('toc.ncx', 'wb')
opf.render(m, n, 'toc.ncx')
return os.path.abspath('metadata.opf')
m, n = open(u'metadata.opf', 'wb'), open('toc.ncx', 'wb')
opf.render(m, n, u'toc.ncx')
return os.path.abspath(u'metadata.opf')
def create_wrappers(self, pages):
from calibre.ebooks.oeb.base import XHTML_NS
@ -252,7 +252,7 @@ class ComicInput(InputFormatPlugin):
dir = os.path.dirname(pages[0])
for i, page in enumerate(pages):
wrapper = WRAPPER%(XHTML_NS, i+1, os.path.basename(page), i+1)
page = os.path.join(dir, 'page_%d.xhtml'%(i+1))
page = os.path.join(dir, u'page_%d.xhtml'%(i+1))
open(page, 'wb').write(wrapper)
wrappers.append(page)
return wrappers

View File

@ -138,7 +138,7 @@ class EPUBInput(InputFormatPlugin):
from calibre.ebooks import DRMError
from calibre.ebooks.metadata.opf2 import OPF
zf = ZipFile(stream)
zf.extractall(os.getcwd())
zf.extractall(os.getcwdu())
encfile = os.path.abspath(os.path.join('META-INF', 'encryption.xml'))
opf = self.find_opf()
if opf is None:
@ -150,7 +150,7 @@ class EPUBInput(InputFormatPlugin):
path = getattr(stream, 'name', 'stream')
if opf is None:
raise ValueError('%s is not a valid EPUB file'%path)
raise ValueError('%s is not a valid EPUB file (could not find opf)'%path)
opf = os.path.relpath(opf, os.getcwdu())
parts = os.path.split(opf)
@ -197,4 +197,4 @@ class EPUBInput(InputFormatPlugin):
with open('content.opf', 'wb') as nopf:
nopf.write(opf.render())
return os.path.abspath('content.opf')
return os.path.abspath(u'content.opf')

View File

@ -196,7 +196,7 @@ class EPUBOutput(OutputFormatPlugin):
uuid = str(uuid4())
oeb.metadata.add('identifier', uuid, scheme='uuid', id=uuid)
with TemporaryDirectory('_epub_output') as tdir:
with TemporaryDirectory(u'_epub_output') as tdir:
from calibre.customize.ui import plugin_for_output_format
metadata_xml = None
extra_entries = []
@ -204,7 +204,7 @@ class EPUBOutput(OutputFormatPlugin):
if self.opts.output_profile.epub_periodical_format == 'sony':
from calibre.ebooks.epub.periodical import sony_metadata
metadata_xml, atom_xml = sony_metadata(oeb)
extra_entries = [('atom.xml', 'application/atom+xml', atom_xml)]
extra_entries = [(u'atom.xml', 'application/atom+xml', atom_xml)]
oeb_output = plugin_for_output_format('oeb')
oeb_output.convert(oeb, tdir, input_plugin, opts, log)
opf = [x for x in os.listdir(tdir) if x.endswith('.opf')][0]

View File

@ -33,8 +33,6 @@ class FB2Input(InputFormatPlugin):
),
])
def convert(self, stream, options, file_ext, log,
accelerators):
from lxml import etree
@ -92,8 +90,8 @@ class FB2Input(InputFormatPlugin):
src = img.get('src')
img.set('src', self.binary_map.get(src, src))
index = transform.tostring(result)
open('index.xhtml', 'wb').write(index)
open('inline-styles.css', 'wb').write(css)
open(u'index.xhtml', 'wb').write(index)
open(u'inline-styles.css', 'wb').write(css)
stream.seek(0)
mi = get_metadata(stream, 'fb2')
if not mi.title:
@ -102,9 +100,9 @@ class FB2Input(InputFormatPlugin):
mi.authors = [_('Unknown')]
cpath = None
if mi.cover_data and mi.cover_data[1]:
with open('fb2_cover_calibre_mi.jpg', 'wb') as f:
with open(u'fb2_cover_calibre_mi.jpg', 'wb') as f:
f.write(mi.cover_data[1])
cpath = os.path.abspath('fb2_cover_calibre_mi.jpg')
cpath = os.path.abspath(u'fb2_cover_calibre_mi.jpg')
else:
for img in doc.xpath('//f:coverpage/f:image', namespaces=NAMESPACES):
href = img.get('{%s}href'%XLINK_NS, img.get('href', None))
@ -115,14 +113,14 @@ class FB2Input(InputFormatPlugin):
break
opf = OPFCreator(os.getcwdu(), mi)
entries = [(f, guess_type(f)[0]) for f in os.listdir('.')]
entries = [(f, guess_type(f)[0]) for f in os.listdir(u'.')]
opf.create_manifest(entries)
opf.create_spine(['index.xhtml'])
opf.create_spine([u'index.xhtml'])
if cpath:
opf.guide.set_cover(cpath)
with open('metadata.opf', 'wb') as f:
with open(u'metadata.opf', 'wb') as f:
opf.render(f)
return os.path.join(os.getcwd(), 'metadata.opf')
return os.path.join(os.getcwdu(), u'metadata.opf')
def extract_embedded_content(self, doc):
self.binary_map = {}

View File

@ -57,7 +57,7 @@ class HTMLInput(InputFormatPlugin):
def convert(self, stream, opts, file_ext, log,
accelerators):
self._is_case_sensitive = None
basedir = os.getcwd()
basedir = os.getcwdu()
self.opts = opts
fname = None

View File

@ -37,18 +37,18 @@ class HTMLZInput(InputFormatPlugin):
index = u''
multiple_html = False
# Get a list of all top level files in the archive.
for x in os.listdir('.'):
for x in os.listdir(u'.'):
if os.path.isfile(x):
top_levels.append(x)
# Try to find an index. file.
for x in top_levels:
if x.lower() in ('index.html', 'index.xhtml', 'index.htm'):
if x.lower() in (u'index.html', u'index.xhtml', u'index.htm'):
index = x
break
# Look for multiple HTML files in the archive. We look at the
# top level files only as only they matter in HTMLZ.
for x in top_levels:
if os.path.splitext(x)[1].lower() in ('.html', '.xhtml', '.htm'):
if os.path.splitext(x)[1].lower() in (u'.html', u'.xhtml', u'.htm'):
# Set index to the first HTML file found if it's not
# called index.
if not index:
@ -85,11 +85,11 @@ class HTMLZInput(InputFormatPlugin):
setattr(options, opt.option.name, opt.recommended_value)
options.input_encoding = 'utf-8'
base = os.getcwdu()
fname = os.path.join(base, 'index.html')
fname = os.path.join(base, u'index.html')
c = 0
while os.path.exists(fname):
c += 1
fname = 'index%d.html'%c
fname = u'index%d.html'%c
htmlfile = open(fname, 'wb')
with htmlfile:
htmlfile.write(html.encode('utf-8'))
@ -111,16 +111,16 @@ class HTMLZInput(InputFormatPlugin):
cover_path = None
opf = None
for x in top_levels:
if os.path.splitext(x)[1].lower() in ('.opf'):
if os.path.splitext(x)[1].lower() == u'.opf':
opf = x
break
if opf:
opf = OPF(opf, basedir=os.getcwd())
opf = OPF(opf, basedir=os.getcwdu())
cover_path = opf.raster_cover
# Set the cover.
if cover_path:
cdata = None
with open(os.path.join(os.getcwd(), cover_path), 'rb') as cf:
with open(os.path.join(os.getcwdu(), cover_path), 'rb') as cf:
cdata = cf.read()
cover_name = os.path.basename(cover_path)
id, href = oeb.manifest.generate('cover', cover_name)

View File

@ -55,30 +55,30 @@ class HTMLZOutput(OutputFormatPlugin):
else:
from calibre.ebooks.htmlz.oeb2html import OEB2HTMLClassCSSizer as OEB2HTMLizer
with TemporaryDirectory('_htmlz_output') as tdir:
with TemporaryDirectory(u'_htmlz_output') as tdir:
htmlizer = OEB2HTMLizer(log)
html = htmlizer.oeb2html(oeb_book, opts)
with open(os.path.join(tdir, 'index.html'), 'wb') as tf:
with open(os.path.join(tdir, u'index.html'), 'wb') as tf:
tf.write(html)
# CSS
if opts.htmlz_css_type == 'class' and opts.htmlz_class_style == 'external':
with open(os.path.join(tdir, 'style.css'), 'wb') as tf:
with open(os.path.join(tdir, u'style.css'), 'wb') as tf:
tf.write(htmlizer.get_css(oeb_book))
# Images
images = htmlizer.images
if images:
if not os.path.exists(os.path.join(tdir, 'images')):
os.makedirs(os.path.join(tdir, 'images'))
if not os.path.exists(os.path.join(tdir, u'images')):
os.makedirs(os.path.join(tdir, u'images'))
for item in oeb_book.manifest:
if item.media_type in OEB_IMAGES and item.href in images:
if item.media_type == SVG_MIME:
data = unicode(etree.tostring(item.data, encoding=unicode))
else:
data = item.data
fname = os.path.join(tdir, 'images', images[item.href])
fname = os.path.join(tdir, u'images', images[item.href])
with open(fname, 'wb') as img:
img.write(data)
@ -91,7 +91,7 @@ class HTMLZOutput(OutputFormatPlugin):
cover_data = oeb_book.guide[term].item.data
if cover_data:
from calibre.utils.magick.draw import save_cover_data_to
cover_path = os.path.join(tdir, 'cover.jpg')
cover_path = os.path.join(tdir, u'cover.jpg')
with open(cover_path, 'w') as cf:
cf.write('')
save_cover_data_to(cover_data, cover_path)
@ -100,11 +100,11 @@ class HTMLZOutput(OutputFormatPlugin):
traceback.print_exc()
# Metadata
with open(os.path.join(tdir, 'metadata.opf'), 'wb') as mdataf:
with open(os.path.join(tdir, u'metadata.opf'), 'wb') as mdataf:
opf = OPF(StringIO(etree.tostring(oeb_book.metadata.to_opf1())))
mi = opf.to_book_metadata()
if cover_path:
mi.cover = 'cover.jpg'
mi.cover = u'cover.jpg'
mdataf.write(metadata_to_opf(mi))
htmlz = ZipFile(output_path, 'w')

View File

@ -28,7 +28,7 @@ class LRFInput(InputFormatPlugin):
d.parse()
xml = d.to_xml(write_files=True)
if options.verbose > 2:
open('lrs.xml', 'wb').write(xml.encode('utf-8'))
open(u'lrs.xml', 'wb').write(xml.encode('utf-8'))
parser = etree.XMLParser(no_network=True, huge_tree=True)
try:
doc = etree.fromstring(xml, parser=parser)
@ -84,4 +84,4 @@ class LRFInput(InputFormatPlugin):
with open('content.opf', 'wb') as f:
f.write(result)
styles.write()
return os.path.abspath('content.opf')
return os.path.abspath(u'content.opf')

View File

@ -182,7 +182,7 @@ class LRFOutput(OutputFormatPlugin):
self.flatten_toc()
from calibre.ptempfile import TemporaryDirectory
with TemporaryDirectory('_lrf_output') as tdir:
with TemporaryDirectory(u'_lrf_output') as tdir:
from calibre.customize.ui import plugin_for_output_format
oeb_output = plugin_for_output_format('oeb')
oeb_output.convert(oeb, tdir, input_plugin, opts, log)

View File

@ -20,17 +20,17 @@ class MOBIInput(InputFormatPlugin):
try:
mr = MobiReader(stream, log, options.input_encoding,
options.debug_pipeline)
mr.extract_content('.', parse_cache)
mr.extract_content(u'.', parse_cache)
except:
mr = MobiReader(stream, log, options.input_encoding,
options.debug_pipeline, try_extra_data_fix=True)
mr.extract_content('.', parse_cache)
mr.extract_content(u'.', parse_cache)
raw = parse_cache.pop('calibre_raw_mobi_markup', False)
if raw:
if isinstance(raw, unicode):
raw = raw.encode('utf-8')
open('debug-raw.html', 'wb').write(raw)
open(u'debug-raw.html', 'wb').write(raw)
for f, root in parse_cache.items():
with open(f, 'wb') as q:
q.write(html.tostring(root, encoding='utf-8', method='xml',

View File

@ -29,6 +29,6 @@ class PDBInput(InputFormatPlugin):
log.debug('Detected ebook format as: %s with identity: %s' % (IDENTITY_TO_NAME[header.ident], header.ident))
reader = Reader(header, stream, log, options)
opf = reader.extract_content(os.getcwd())
opf = reader.extract_content(os.getcwdu())
return opf

View File

@ -35,9 +35,9 @@ class PDFInput(InputFormatPlugin):
if pdfreflow_err:
raise RuntimeError('Failed to load pdfreflow: ' + pdfreflow_err)
pdfreflow.reflow(stream.read(), 1, -1)
xml = clean_ascii_chars(open('index.xml', 'rb').read())
xml = clean_ascii_chars(open(u'index.xml', 'rb').read())
PDFDocument(xml, self.opts, self.log)
return os.path.join(os.getcwd(), 'metadata.opf')
return os.path.join(os.getcwdu(), u'metadata.opf')
def convert(self, stream, options, file_ext, log,
@ -50,25 +50,25 @@ class PDFInput(InputFormatPlugin):
self.opts, self.log = options, log
if options.new_pdf_engine:
return self.convert_new(stream, accelerators)
pdftohtml(os.getcwd(), stream.name, options.no_images)
pdftohtml(os.getcwdu(), stream.name, options.no_images)
from calibre.ebooks.metadata.meta import get_metadata
log.debug('Retrieving document metadata...')
mi = get_metadata(stream, 'pdf')
opf = OPFCreator(os.getcwd(), mi)
opf = OPFCreator(os.getcwdu(), mi)
manifest = [('index.html', None)]
manifest = [(u'index.html', None)]
images = os.listdir(os.getcwd())
images = os.listdir(os.getcwdu())
images.remove('index.html')
for i in images:
manifest.append((i, None))
log.debug('Generating manifest...')
opf.create_manifest(manifest)
opf.create_spine(['index.html'])
opf.create_spine([u'index.html'])
log.debug('Rendering manifest...')
with open('metadata.opf', 'wb') as opffile:
with open(u'metadata.opf', 'wb') as opffile:
opf.render(opffile)
return os.path.join(os.getcwd(), 'metadata.opf')
return os.path.join(os.getcwdu(), u'metadata.opf')

View File

@ -69,12 +69,12 @@ class PMLInput(InputFormatPlugin):
imgs = glob.glob(os.path.join(tdir, os.path.splitext(os.path.basename(stream.name))[0] + '_img', '*.png'))
# No images in Dropbook location try generic images directory
if not imgs:
imgs = glob.glob(os.path.join(os.path.join(tdir, 'images'), '*.png'))
imgs = glob.glob(os.path.join(os.path.join(tdir, u'images'), u'*.png'))
if imgs:
os.makedirs(os.path.join(os.getcwd(), 'images'))
os.makedirs(os.path.join(os.getcwdu(), u'images'))
for img in imgs:
pimg_name = os.path.basename(img)
pimg_path = os.path.join(os.getcwd(), 'images', pimg_name)
pimg_path = os.path.join(os.getcwdu(), 'images', pimg_name)
images.append('images/' + pimg_name)
@ -94,14 +94,14 @@ class PMLInput(InputFormatPlugin):
if file_ext == 'pmlz':
log.debug('De-compressing content to temporary directory...')
with TemporaryDirectory('_unpmlz') as tdir:
with TemporaryDirectory(u'_unpmlz') as tdir:
zf = ZipFile(stream)
zf.extractall(tdir)
pmls = glob.glob(os.path.join(tdir, '*.pml'))
pmls = glob.glob(os.path.join(tdir, u'*.pml'))
for pml in pmls:
html_name = os.path.splitext(os.path.basename(pml))[0]+'.html'
html_path = os.path.join(os.getcwd(), html_name)
html_path = os.path.join(os.getcwdu(), html_name)
pages.append(html_name)
log.debug('Processing PML item %s...' % pml)
@ -109,8 +109,8 @@ class PMLInput(InputFormatPlugin):
toc += ttoc
images = self.get_images(stream, tdir, True)
else:
toc = self.process_pml(stream, 'index.html')
pages.append('index.html')
toc = self.process_pml(stream, u'index.html')
pages.append(u'index.html')
if hasattr(stream, 'name'):
images = self.get_images(stream, os.path.abspath(os.path.dirname(stream.name)))
@ -126,14 +126,14 @@ class PMLInput(InputFormatPlugin):
log.debug('Reading metadata from input file...')
mi = get_metadata(stream, 'pml')
if 'images/cover.png' in images:
mi.cover = 'images/cover.png'
opf = OPFCreator(os.getcwd(), mi)
mi.cover = u'images/cover.png'
opf = OPFCreator(os.getcwdu(), mi)
log.debug('Generating manifest...')
opf.create_manifest(manifest_items)
opf.create_spine(pages)
opf.set_toc(toc)
with open('metadata.opf', 'wb') as opffile:
with open('toc.ncx', 'wb') as tocfile:
opf.render(opffile, tocfile, 'toc.ncx')
with open(u'metadata.opf', 'wb') as opffile:
with open(u'toc.ncx', 'wb') as tocfile:
opf.render(opffile, tocfile, u'toc.ncx')
return os.path.join(os.getcwd(), 'metadata.opf')
return os.path.join(os.getcwdu(), u'metadata.opf')

View File

@ -20,6 +20,6 @@ class RBInput(InputFormatPlugin):
from calibre.ebooks.rb.reader import Reader
reader = Reader(stream, log, options.input_encoding)
opf = reader.extract_content(os.getcwd())
opf = reader.extract_content(os.getcwdu())
return opf

View File

@ -58,7 +58,7 @@ class RecipeInput(InputFormatPlugin):
zf = ZipFile(recipe_or_file, 'r')
zf.extractall()
zf.close()
self.recipe_source = open('download.recipe', 'rb').read()
self.recipe_source = open(u'download.recipe', 'rb').read()
recipe = compile_recipe(self.recipe_source)
recipe.needs_subscription = False
self.recipe_object = recipe(opts, log, self.report_progress)
@ -108,11 +108,11 @@ class RecipeInput(InputFormatPlugin):
for key, val in self.recipe_object.conversion_options.items():
setattr(opts, key, val)
for f in os.listdir('.'):
for f in os.listdir(u'.'):
if f.endswith('.opf'):
return os.path.abspath(f)
for f in walk('.'):
for f in walk(u'.'):
if f.endswith('.opf'):
return os.path.abspath(f)

View File

@ -47,12 +47,12 @@ class RTFInput(InputFormatPlugin):
def generate_xml(self, stream):
from calibre.ebooks.rtf2xml.ParseRtf import ParseRtf
ofile = 'dataxml.xml'
ofile = u'dataxml.xml'
run_lev, debug_dir, indent_out = 1, None, 0
if getattr(self.opts, 'debug_pipeline', None) is not None:
try:
os.mkdir('rtfdebug')
debug_dir = 'rtfdebug'
os.mkdir(u'rtfdebug')
debug_dir = u'rtfdebug'
run_lev = 4
indent_out = 1
self.log('Running RTFParser in debug mode')
@ -124,7 +124,7 @@ class RTFInput(InputFormatPlugin):
if fmt is None:
fmt = 'wmf'
count += 1
name = '%04d.%s' % (count, fmt)
name = u'%04d.%s' % (count, fmt)
with open(name, 'wb') as f:
f.write(data)
imap[count] = name
@ -201,7 +201,7 @@ class RTFInput(InputFormatPlugin):
for cls, val in border_styles.iteritems():
css += '\n\n.%s {\n%s\n}'%(cls, val)
with open('styles.css', 'ab') as f:
with open(u'styles.css', 'ab') as f:
f.write(css)
def convert_borders(self, doc):
@ -271,7 +271,7 @@ class RTFInput(InputFormatPlugin):
extensions = { ('calibre', 'inline-class') : inline_class }
transform = etree.XSLT(styledoc, extensions=extensions)
result = transform(doc)
html = 'index.xhtml'
html = u'index.xhtml'
with open(html, 'wb') as f:
res = transform.tostring(result)
# res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
@ -289,10 +289,10 @@ class RTFInput(InputFormatPlugin):
mi.title = _('Unknown')
if not mi.authors:
mi.authors = [_('Unknown')]
opf = OPFCreator(os.getcwd(), mi)
opf.create_manifest([('index.xhtml', None)])
opf.create_spine(['index.xhtml'])
opf.render(open('metadata.opf', 'wb'))
return os.path.abspath('metadata.opf')
opf = OPFCreator(os.getcwdu(), mi)
opf.create_manifest([(u'index.xhtml', None)])
opf.create_spine([u'index.xhtml'])
opf.render(open(u'metadata.opf', 'wb'))
return os.path.abspath(u'metadata.opf')

View File

@ -208,6 +208,7 @@ OptionRecommendation(name='level1_toc',
'should be added to the Table of Contents at level one. If '
'this is specified, it takes precedence over other forms '
'of auto-detection.'
' See the XPath Tutorial in the calibre User Manual for examples.'
)
),
@ -216,6 +217,7 @@ OptionRecommendation(name='level2_toc',
help=_('XPath expression that specifies all tags that should be '
'added to the Table of Contents at level two. Each entry is added '
'under the previous level one entry.'
' See the XPath Tutorial in the calibre User Manual for examples.'
)
),
@ -224,6 +226,7 @@ OptionRecommendation(name='level3_toc',
help=_('XPath expression that specifies all tags that should be '
'added to the Table of Contents at level three. Each entry '
'is added under the previous level two entry.'
' See the XPath Tutorial in the calibre User Manual for examples.'
)
),

View File

@ -664,7 +664,8 @@ class LitWriter(object):
quickref = []
name = directory[0].name
for entry in directory:
next = ''.join([decint(len(entry.name)), entry.name,
en = entry.name.encode('utf-8') if entry.name else entry.name
next = ''.join([decint(len(en)), en,
decint(entry.section), decint(entry.offset),
decint(entry.size)])
usedlen = dchunk.tell() + len(next) + (len(quickref) * 2) + 52
@ -673,7 +674,7 @@ class LitWriter(object):
dchunk = StringIO()
dcount = 0
quickref = []
name = entry.name
name = en
if (dcount % qrn) == 0:
quickref.append(dchunk.tell())
dchunk.write(next)

View File

@ -374,13 +374,12 @@ class HTMLConverter(object):
else:
self.css[selector] = self.override_css[selector]
upath = path.encode(sys.getfilesystemencoding()) if isinstance(path, unicode) else path
self.file_name = os.path.basename(upath.decode(sys.getfilesystemencoding()))
self.log.info(_('Processing %s')%( repr(upath) if self.verbose else repr(self.file_name)))
self.file_name = os.path.basename(path)
self.log.info(_('Processing %s')%( path if self.verbose else self.file_name))
if not os.path.exists(upath):
upath = upath.replace('&', '%26') #convertlit replaces & with %26 in file names
f = open(upath, 'rb')
if not os.path.exists(path):
path = path.replace('&', '%26') #convertlit replaces & with %26 in file names
f = open(path, 'rb')
raw = f.read()
if self.pdftohtml: # Bug in pdftohtml that causes it to output invalid UTF-8 files
raw = raw.decode('utf-8', 'ignore')
@ -1938,7 +1937,7 @@ def process_file(path, options, logger):
if not oname:
suffix = '.lrs' if options.lrs else '.lrf'
name = os.path.splitext(os.path.basename(path))[0] + suffix
oname = os.path.join(os.getcwd(), name)
oname = os.path.join(os.getcwdu(), name)
oname = os.path.abspath(os.path.expanduser(oname))
conv.writeto(oname, lrs=options.lrs)
conv.cleanup()

View File

@ -187,7 +187,7 @@ class Resource(object):
'''
def __init__(self, href_or_path, basedir=os.getcwd(), is_path=True):
def __init__(self, href_or_path, basedir=os.getcwdu(), is_path=True):
self._href = None
self._basedir = basedir
self.path = None
@ -230,7 +230,7 @@ class Resource(object):
if self._basedir:
basedir = self._basedir
else:
basedir = os.getcwd()
basedir = os.getcwdu()
if self.path is None:
return self._href
f = self.fragment.encode('utf-8') if isinstance(self.fragment, unicode) else self.fragment

View File

@ -14,7 +14,7 @@ def get_metadata(stream):
litfile = LitContainer(stream, Log())
src = litfile.get_metadata().encode('utf-8')
litfile = litfile._litfile
opf = OPF(cStringIO.StringIO(src), os.getcwd())
opf = OPF(cStringIO.StringIO(src), os.getcwdu())
mi = opf.to_book_metadata()
covers = []
for item in opf.iterguide():

View File

@ -199,7 +199,7 @@ def metadata_from_filename(name, pat=None):
def opf_metadata(opfpath):
if hasattr(opfpath, 'read'):
f = opfpath
opfpath = getattr(f, 'name', os.getcwd())
opfpath = getattr(f, 'name', os.getcwdu())
else:
f = open(opfpath, 'rb')
try:

View File

@ -153,7 +153,9 @@ def get_metadata(stream):
mi = MetaInformation(None, [])
if data.has_key('title'):
mi.title = data['title']
if data.has_key('creator'):
if data.get('initial-creator', '').strip():
mi.authors = string_to_authors(data['initial-creator'])
elif data.has_key('creator'):
mi.authors = string_to_authors(data['creator'])
if data.has_key('description'):
mi.comments = data['description']

View File

@ -36,7 +36,7 @@ class Resource(object): # {{{
:method:`href`
'''
def __init__(self, href_or_path, basedir=os.getcwd(), is_path=True):
def __init__(self, href_or_path, basedir=os.getcwdu(), is_path=True):
self.orig = href_or_path
self._href = None
self._basedir = basedir
@ -81,7 +81,7 @@ class Resource(object): # {{{
if self._basedir:
basedir = self._basedir
else:
basedir = os.getcwd()
basedir = os.getcwdu()
if self.path is None:
return self._href
f = self.fragment.encode('utf-8') if isinstance(self.fragment, unicode) else self.fragment
@ -1487,7 +1487,7 @@ class OPFTest(unittest.TestCase):
</package>
'''
)
self.opf = OPF(self.stream, os.getcwd())
self.opf = OPF(self.stream, os.getcwdu())
def testReading(self, opf=None):
if opf is None:
@ -1518,11 +1518,11 @@ class OPFTest(unittest.TestCase):
self.opf.render()
def testCreator(self):
opf = OPFCreator(os.getcwd(), self.opf)
opf = OPFCreator(os.getcwdu(), self.opf)
buf = cStringIO.StringIO()
opf.render(buf)
raw = buf.getvalue()
self.testReading(opf=OPF(cStringIO.StringIO(raw), os.getcwd()))
self.testReading(opf=OPF(cStringIO.StringIO(raw), os.getcwdu()))
def testSmartUpdate(self):
self.opf.smart_update(MetaInformation(self.opf))
@ -1547,7 +1547,7 @@ def test_user_metadata():
}
mi.set_all_user_metadata(um)
raw = metadata_to_opf(mi)
opfc = OPFCreator(os.getcwd(), other=mi)
opfc = OPFCreator(os.getcwdu(), other=mi)
out = StringIO()
opfc.render(out)
raw2 = out.getvalue()

View File

@ -29,9 +29,9 @@ C = ElementMaker(namespace=CALIBRE_NS, nsmap=NSMAP)
class TOC(list):
def __init__(self, href=None, fragment=None, text=None, parent=None, play_order=0,
base_path=os.getcwd(), type='unknown', author=None,
description=None, toc_thumbnail=None):
def __init__(self, href=None, fragment=None, text=None, parent=None,
play_order=0, base_path=os.getcwdu(), type='unknown', author=None,
description=None, toc_thumbnail=None):
self.href = href
self.fragment = fragment
if not self.fragment:
@ -272,7 +272,7 @@ class TOC(list):
elem.append(C.meta(desc, name='description'))
idx = getattr(np, 'toc_thumbnail', None)
if idx:
elem.append(C.meta(idx, name='toc_thumbnail'))
elem.append(C.meta(idx, name='toc_thumbnail'))
parent.append(elem)
for np2 in np:
navpoint(elem, np2)

View File

@ -44,7 +44,7 @@ def zip_opf_metadata(opfpath, zf):
from calibre.ebooks.metadata.opf2 import OPF
if hasattr(opfpath, 'read'):
f = opfpath
opfpath = getattr(f, 'name', os.getcwd())
opfpath = getattr(f, 'name', os.getcwdu())
else:
f = open(opfpath, 'rb')
opf = OPF(f, os.path.dirname(opfpath))

View File

@ -785,11 +785,11 @@ class MobiReader(object):
mi = MetaInformation(self.book_header.title, [_('Unknown')])
opf = OPFCreator(os.path.dirname(htmlfile), mi)
if hasattr(self.book_header.exth, 'cover_offset'):
opf.cover = 'images/%05d.jpg' % (self.book_header.exth.cover_offset + 1)
opf.cover = u'images/%05d.jpg' % (self.book_header.exth.cover_offset + 1)
elif mi.cover is not None:
opf.cover = mi.cover
else:
opf.cover = 'images/%05d.jpg' % 1
opf.cover = u'images/%05d.jpg' % 1
if not os.path.exists(os.path.join(os.path.dirname(htmlfile),
* opf.cover.split('/'))):
opf.cover = None
@ -799,7 +799,7 @@ class MobiReader(object):
if cover is not None:
cover = cover.replace('/', os.sep)
if os.path.exists(cover):
ncover = 'images'+os.sep+'calibre_cover.jpg'
ncover = u'images'+os.sep+u'calibre_cover.jpg'
if os.path.exists(ncover):
os.remove(ncover)
shutil.copyfile(cover, ncover)
@ -807,7 +807,7 @@ class MobiReader(object):
opf.cover = ncover.replace(os.sep, '/')
manifest = [(htmlfile, 'application/xhtml+xml'),
(os.path.abspath('styles.css'), 'text/css')]
(os.path.abspath(u'styles.css'), 'text/css')]
bp = os.path.dirname(htmlfile)
added = set([])
for i in getattr(self, 'image_names', []):

View File

@ -306,7 +306,9 @@ class Serializer(object):
if id_:
href = '#'.join((item.href, id_))
offset = self.anchor_offset or buf.tell()
self.id_offsets[urlnormalize(href)] = offset
key = urlnormalize(href)
# Only set this id_offset if it wasn't previously seen
self.id_offsets[key] = self.id_offsets.get(key, offset)
if self.anchor_offset is not None and \
tag == 'a' and not elem.attrib and \
not len(elem) and not elem.text:

View File

@ -774,6 +774,8 @@ class Manifest(object):
def __init__(self, oeb, id, href, media_type,
fallback=None, loader=str, data=None):
if href:
href = unicode(href)
self.oeb = oeb
self.id = id
self.href = self.path = urlnormalize(href)
@ -1106,7 +1108,7 @@ class Manifest(object):
while href.lower() in lhrefs:
href = base + str(index) + ext
index += 1
return id, href
return id, unicode(href)
def __iter__(self):
for item in self.items:
@ -1320,6 +1322,8 @@ class Guide(object):
def add(self, type, title, href):
"""Add a new reference to the `Guide`."""
if href:
href = unicode(href)
ref = self.Reference(self.oeb, type, title, href)
self.refs[type] = ref
return ref

View File

@ -104,7 +104,7 @@ class CoverManager(object):
img_data = calibre_cover(title, authors_to_string(authors),
series_string=series_string)
id, href = self.oeb.manifest.generate('cover',
'cover_image.jpg')
u'cover_image.jpg')
item = self.oeb.manifest.add(id, href, guess_type('t.jpg')[0],
data=img_data)
m.clear('cover')
@ -154,7 +154,7 @@ class CoverManager(object):
templ = self.non_svg_template if self.no_svg_cover \
else self.svg_template
tp = templ%unquote(href)
id, href = m.generate('titlepage', 'titlepage.xhtml')
id, href = m.generate('titlepage', u'titlepage.xhtml')
item = m.add(id, href, guess_type('t.xhtml')[0],
data=etree.fromstring(tp))
else:

View File

@ -48,6 +48,8 @@ class RescaleImages(object):
scaled, new_width, new_height = fit_image(width, height,
page_width, page_height)
if scaled:
new_width = max(1, new_width)
new_height = max(1, new_height)
self.log('Rescaling image from %dx%d to %dx%d'%(
width, height, new_width, new_height), item.href)
try:

View File

@ -188,7 +188,7 @@ class FlowSplitter(object):
self.csp_counter = 0
base, ext = os.path.splitext(self.base)
self.base = base.replace('%', '%%')+'_split_%.3d'+ext
self.base = base.replace('%', '%%')+u'_split_%.3d'+ext
self.trees = [self.item.data.getroottree()]
self.splitting_on_page_breaks = True

View File

@ -5,15 +5,13 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>, ' \
'2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import errno
import os
import sys
import subprocess
import errno, os, sys, subprocess, shutil
from functools import partial
from calibre.ebooks import ConversionError, DRMError
from calibre.ptempfile import PersistentTemporaryFile
from calibre.constants import isosx, iswindows, islinux, isbsd
from calibre.constants import (isosx, iswindows, islinux, isbsd,
filesystem_encoding)
from calibre import CurrentDir
PDFTOHTML = 'pdftohtml'
@ -30,31 +28,45 @@ def pdftohtml(output_dir, pdf_path, no_images):
'''
Convert the pdf into html using the pdftohtml app.
This will write the html as index.html into output_dir.
It will also wirte all extracted images to the output_dir
It will also write all extracted images to the output_dir
'''
if isinstance(pdf_path, unicode):
pdf_path = pdf_path.encode(sys.getfilesystemencoding())
if not os.access(pdf_path, os.R_OK):
raise ConversionError('Cannot read from ' + pdf_path)
pdfsrc = os.path.join(output_dir, u'src.pdf')
index = os.path.join(output_dir, u'index.html')
with open(pdf_path, 'rb') as src, open(pdfsrc, 'wb') as dest:
shutil.copyfileobj(src, dest)
with CurrentDir(output_dir):
index = os.path.join(os.getcwd(), 'index.html')
# This is neccessary as pdftohtml doesn't always (linux) respect absolute paths
pdf_path = os.path.abspath(pdf_path)
cmd = [PDFTOHTML, '-enc', 'UTF-8', '-noframes', '-p', '-nomerge', '-nodrm', '-q', pdf_path, os.path.basename(index)]
if isbsd:
cmd.remove('-nodrm')
if no_images:
cmd.append('-i')
# This is necessary as pdftohtml doesn't always (linux) respect
# absolute paths. Also, it allows us to safely pass only bytestring
# arguments to subprocess on widows
logf = PersistentTemporaryFile('pdftohtml_log')
# subprocess in python 2 cannot handle unicode arguments on windows
# that cannot be encoded with mbcs. Ensure all args are
# bytestrings.
def a(x):
return os.path.basename(x).encode('ascii')
exe = PDFTOHTML.encode(filesystem_encoding) if isinstance(PDFTOHTML,
unicode) else PDFTOHTML
cmd = [exe, b'-enc', b'UTF-8', b'-noframes', b'-p', b'-nomerge',
b'-nodrm', b'-q', a(pdfsrc), a(index)]
if isbsd:
cmd.remove(b'-nodrm')
if no_images:
cmd.append(b'-i')
logf = PersistentTemporaryFile(u'pdftohtml_log')
try:
p = popen(cmd, stderr=logf._fd, stdout=logf._fd,
stdin=subprocess.PIPE)
except OSError as err:
if err.errno == errno.ENOENT:
raise ConversionError(_('Could not find pdftohtml, check it is in your PATH'))
raise ConversionError(
_('Could not find pdftohtml, check it is in your PATH'))
else:
raise
@ -70,6 +82,10 @@ def pdftohtml(output_dir, pdf_path, no_images):
logf.flush()
logf.close()
out = open(logf.name, 'rb').read().strip()
try:
os.remove(pdfsrc)
except:
pass
if ret != 0:
raise ConversionError(out)
if out:
@ -84,3 +100,4 @@ def pdftohtml(output_dir, pdf_path, no_images):
i.seek(0)
i.truncate()
i.write(raw)

Some files were not shown because too many files have changed in this diff Show More