diff --git a/Changelog.yaml b/Changelog.yaml
index 17f3ebcf97..01425ec2ca 100644
--- a/Changelog.yaml
+++ b/Changelog.yaml
@@ -19,6 +19,57 @@
# new recipes:
# - title:
+- version: 0.8.48
+ date: 2012-04-20
+
+ new features:
+ - title: "Conversion: The search and replace feature has been completely revamped."
+ description: "You can now use any number of search and replace
+ expression, not just three. You can also store and load frequently used
+ sets of search and replace expressions. Also, the wizard generates its
+ preview in a separate process to protect against crashes/memory leaks."
+ tickets: [983476,983484,983478]
+
+ - title: "Support for the new '.azw3' files that Amazon recently started generating. calibre will now detect them as ebooks. It can also view/convert them, if they are DRM free."
+
+ - title: "Drivers for Samsung Galaxy ACE GT-S5830L and HTC One X"
+ tickets: [981185]
+
+ bug fixes:
+ - title: "Get Books: Support the new website design of Barnes & Noble"
+
+ - title: "T1 driver: Fix books sent to SD card sometimes resulting problems when deleted."
+ tickets: [943586]
+
+ - title: "Do not allow author names to be set to blank via the Manage authors function. Blank authors are now automatically set to 'Unknown'"
+
+ - title: "MOBI Output: Handle background color specified on
and | in addition to tags."
+ tickets: [980813]
+
+ - title: "MOBI Output: Fix underline style applied to parent element not getting inherited by children."
+ tickets: [985711]
+
+ improved recipes:
+ - xkcd
+ - Metro Nieuws
+ - Calgary Herald
+ - Orlando Sentinel
+ - countryfile
+ - Heise
+
+ new recipes:
+ - title: Various new Polish news sources
+ author: fenuks
+
+ - title: Various Italian news sources
+ author: faber1971
+
+ - title: Jakarta Globe
+ author: rty
+
+ - title: Acim Bilim Dergisi
+ author: thomass
+
- version: 0.8.47
date: 2012-04-13
diff --git a/recipes/acim_bilim_dergisi.recipe b/recipes/acim_bilim_dergisi.recipe
new file mode 100644
index 0000000000..5d674fe93a
--- /dev/null
+++ b/recipes/acim_bilim_dergisi.recipe
@@ -0,0 +1,27 @@
+# -*- coding: utf-8 -*-
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1334868409(BasicNewsRecipe):
+ title = u'AÇIK BİLİM DERGİSİ'
+ description = ' Aylık çevrimiçi bilim dergisi'
+ __author__ = u'thomass'
+ oldest_article = 30
+ max_articles_per_feed = 300
+ auto_cleanup = True
+ encoding = 'UTF-8'
+ publisher = 'açık bilim'
+ category = 'haber, bilim,TR,dergi'
+ language = 'tr'
+ publication_type = 'magazine '
+ conversion_options = {
+ 'tags' : category
+ ,'language' : language
+ ,'publisher' : publisher
+ ,'linearize_tables': True
+ }
+ cover_img_url = 'http://www.acikbilim.com/wp-content/themes/Equilibrium/images/logodene.jpg'
+ masthead_url = 'http://www.acikbilim.com/wp-content/themes/Equilibrium/images/logodene.jpg'
+
+
+ feeds = [(u'Tüm Yayınlar', u'http://www.acikbilim.com/feed')]
diff --git a/recipes/adventure_zone_pl.recipe b/recipes/adventure_zone_pl.recipe
index 65f4e3e52d..bb311606ac 100644
--- a/recipes/adventure_zone_pl.recipe
+++ b/recipes/adventure_zone_pl.recipe
@@ -9,6 +9,7 @@ class Adventure_zone(BasicNewsRecipe):
no_stylesheets = True
oldest_article = 20
max_articles_per_feed = 100
+ index='http://www.adventure-zone.info/fusion/'
use_embedded_content=False
preprocess_regexps = [(re.compile(r"Komentarze | ", re.IGNORECASE), lambda m: '')]
remove_tags_before= dict(name='td', attrs={'class':'main-bg'})
@@ -45,6 +46,19 @@ class Adventure_zone(BasicNewsRecipe):
skip_tag = skip_tag.findAll(name='a')
for r in skip_tag:
if r.strong:
- word=r.strong.string
- if word and (('zapowied' in word) or ('recenzj' in word) or ('solucj' in word)):
- return self.index_to_soup('http://www.adventure-zone.info/fusion/print.php?type=A&item'+r['href'][r['href'].find('article_id')+7:], raw=True)
\ No newline at end of file
+ word=r.strong.string.lower()
+ if word and (('zapowied' in word) or ('recenzj' in word) or ('solucj' in word) or ('poradnik' in word)):
+ return self.index_to_soup('http://www.adventure-zone.info/fusion/print.php?type=A&item'+r['href'][r['href'].find('article_id')+7:], raw=True)
+
+ def preprocess_html(self, soup):
+ footer=soup.find(attrs={'class':'news-footer middle-border'})
+ if footer and len(footer('a'))>=2:
+ footer('a')[1].extract()
+ for item in soup.findAll(style=True):
+ del item['style']
+ for a in soup('a'):
+ if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
+ a['href']=self.index + a['href']
+ return soup
+
+
\ No newline at end of file
diff --git a/recipes/benchmark_pl.recipe b/recipes/benchmark_pl.recipe
index cc74cc9128..00eea1be68 100644
--- a/recipes/benchmark_pl.recipe
+++ b/recipes/benchmark_pl.recipe
@@ -68,4 +68,7 @@ class Benchmark_pl(BasicNewsRecipe):
self.image_article(soup, soup.body)
else:
self.append_page(soup, soup.body)
+ for a in soup('a'):
+ if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
+ a['href']=self.INDEX + a['href']
return soup
diff --git a/recipes/calgary_herald.recipe b/recipes/calgary_herald.recipe
index dc919a76f8..12134bc9a4 100644
--- a/recipes/calgary_herald.recipe
+++ b/recipes/calgary_herald.recipe
@@ -1,220 +1,35 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-__license__ = 'GPL v3'
-
-'''
-www.canada.com
-'''
-
-import re
-from calibre.web.feeds.news import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
-
-
-class CanWestPaper(BasicNewsRecipe):
-
- # un-comment the following four lines for the Victoria Times Colonist
-## title = u'Victoria Times Colonist'
-## url_prefix = 'http://www.timescolonist.com'
-## description = u'News from Victoria, BC'
-## fp_tag = 'CAN_TC'
-
- # un-comment the following four lines for the Vancouver Province
-## title = u'Vancouver Province'
-## url_prefix = 'http://www.theprovince.com'
-## description = u'News from Vancouver, BC'
-## fp_tag = 'CAN_VP'
-
- # un-comment the following four lines for the Vancouver Sun
-## title = u'Vancouver Sun'
-## url_prefix = 'http://www.vancouversun.com'
-## description = u'News from Vancouver, BC'
-## fp_tag = 'CAN_VS'
-
- # un-comment the following four lines for the Edmonton Journal
-## title = u'Edmonton Journal'
-## url_prefix = 'http://www.edmontonjournal.com'
-## description = u'News from Edmonton, AB'
-## fp_tag = 'CAN_EJ'
-
- # un-comment the following four lines for the Calgary Herald
- title = u'Calgary Herald'
- url_prefix = 'http://www.calgaryherald.com'
- description = u'News from Calgary, AB'
- fp_tag = 'CAN_CH'
-
- # un-comment the following four lines for the Regina Leader-Post
-## title = u'Regina Leader-Post'
-## url_prefix = 'http://www.leaderpost.com'
-## description = u'News from Regina, SK'
-## fp_tag = ''
-
- # un-comment the following four lines for the Saskatoon Star-Phoenix
-## title = u'Saskatoon Star-Phoenix'
-## url_prefix = 'http://www.thestarphoenix.com'
-## description = u'News from Saskatoon, SK'
-## fp_tag = ''
-
- # un-comment the following four lines for the Windsor Star
-## title = u'Windsor Star'
-## url_prefix = 'http://www.windsorstar.com'
-## description = u'News from Windsor, ON'
-## fp_tag = 'CAN_'
-
- # un-comment the following four lines for the Ottawa Citizen
-## title = u'Ottawa Citizen'
-## url_prefix = 'http://www.ottawacitizen.com'
-## description = u'News from Ottawa, ON'
-## fp_tag = 'CAN_OC'
-
- # un-comment the following four lines for the Montreal Gazette
-## title = u'Montreal Gazette'
-## url_prefix = 'http://www.montrealgazette.com'
-## description = u'News from Montreal, QC'
-## fp_tag = 'CAN_MG'
-
-
- language = 'en_CA'
- __author__ = 'Nick Redding'
- no_stylesheets = True
- timefmt = ' [%b %d]'
- extra_css = '''
- .timestamp { font-size:xx-small; display: block; }
- #storyheader { font-size: medium; }
- #storyheader h1 { font-size: x-large; }
- #storyheader h2 { font-size: large; font-style: italic; }
- .byline { font-size:xx-small; }
- #photocaption { font-size: small; font-style: italic }
- #photocredit { font-size: xx-small; }'''
- keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
- remove_tags = [{'class':'comments'},
- dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
- dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
- dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
- dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
- dict(name='div', attrs={'class':'rule_grey_solid'}),
- dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
-
- def get_cover_url(self):
- from datetime import timedelta, date
- if self.fp_tag=='':
- return None
- cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
- br = BasicNewsRecipe.get_browser()
- daysback=1
- try:
- br.open(cover)
- except:
- while daysback<7:
- cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str((date.today() - timedelta(days=daysback)).day)+'/lg/'+self.fp_tag+'.jpg'
- br = BasicNewsRecipe.get_browser()
- try:
- br.open(cover)
- except:
- daysback = daysback+1
- continue
- break
- if daysback==7:
- self.log("\nCover unavailable")
- cover = None
- return cover
-
- def fixChars(self,string):
- # Replace lsquo (\x91)
- fixed = re.sub("\x91","‘",string)
- # Replace rsquo (\x92)
- fixed = re.sub("\x92","’",fixed)
- # Replace ldquo (\x93)
- fixed = re.sub("\x93","“",fixed)
- # Replace rdquo (\x94)
- fixed = re.sub("\x94","”",fixed)
- # Replace ndash (\x96)
- fixed = re.sub("\x96","–",fixed)
- # Replace mdash (\x97)
- fixed = re.sub("\x97","—",fixed)
- fixed = re.sub("’","’",fixed)
- return fixed
-
- def massageNCXText(self, description):
- # Kindle TOC descriptions won't render certain characters
- if description:
- massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
- # Replace '&' with '&'
- massaged = re.sub("&","&", massaged)
- return self.fixChars(massaged)
- else:
- return description
-
- def populate_article_metadata(self, article, soup, first):
- if first:
- picdiv = soup.find('body').find('img')
- if picdiv is not None:
- self.add_toc_thumbnail(article,re.sub(r'links\\link\d+\\','',picdiv['src']))
- xtitle = article.text_summary.strip()
- if len(xtitle) == 0:
- desc = soup.find('meta',attrs={'property':'og:description'})
- if desc is not None:
- article.summary = article.text_summary = desc['content']
-
- def strip_anchors(self,soup):
- paras = soup.findAll(True)
- for para in paras:
- aTags = para.findAll('a')
- for a in aTags:
- if a.img is None:
- a.replaceWith(a.renderContents().decode('cp1252','replace'))
- return soup
-
- def preprocess_html(self, soup):
- return self.strip_anchors(soup)
-
-
-
- def parse_index(self):
- soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
-
- articles = {}
- key = 'News'
- ans = ['News']
-
- # Find each instance of class="sectiontitle", class="featurecontent"
- for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
- #self.log(" div class = %s" % divtag['class'])
- if divtag['class'].startswith('section_title'):
- # div contains section title
- if not divtag.h3:
- continue
- key = self.tag_to_string(divtag.h3,False)
- ans.append(key)
- self.log("Section name %s" % key)
- continue
- # div contains article data
- h1tag = divtag.find('h1')
- if not h1tag:
- continue
- atag = h1tag.find('a',href=True)
- if not atag:
- continue
- url = self.url_prefix+'/news/todays-paper/'+atag['href']
- #self.log("Section %s" % key)
- #self.log("url %s" % url)
- title = self.tag_to_string(atag,False)
- #self.log("title %s" % title)
- pubdate = ''
- description = ''
- ptag = divtag.find('p');
- if ptag:
- description = self.tag_to_string(ptag,False)
- #self.log("description %s" % description)
- author = ''
- autag = divtag.find('h4')
- if autag:
- author = self.tag_to_string(autag,False)
- #self.log("author %s" % author)
- if not articles.has_key(key):
- articles[key] = []
- articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
-
- ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
- return ans
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class CalgaryHerald(BasicNewsRecipe):
+ title = u'Calgary Herald'
+ oldest_article = 3
+ max_articles_per_feed = 100
+
+ feeds = [
+ (u'News', u'http://rss.canada.com/get/?F233'),
+ (u'Calgary', u'http://www.calgaryherald.com/scripts/sp6query.aspx?catalog=cahr&tags=keyword|calgary&output=rss?link=http%3a%2f%2fwww.calgaryherald'),
+ (u'Alberta', u'http://www.calgaryherald.com/scripts/Sp6Query.aspx?catalog=CAHR&tags=Keyword|Alberta&output=rss?link=http%3A%2F%2Fwww.calgaryherald.com%2Fnews%2Falberta%2Findex.html'),
+ (u'Politics', u'http://rss.canada.com/get/?F7551'),
+ (u'National', u'http://rss.canada.com/get/?F7552'),
+ (u'World', u'http://rss.canada.com/get/?F7553'),
+ ]
+ __author__ = 'rty'
+ pubisher = 'Calgary Herald'
+ description = 'Calgary, Alberta, Canada'
+ category = 'News, Calgary, Alberta, Canada'
+
+
+ remove_javascript = True
+ use_embedded_content = False
+ no_stylesheets = True
+ language = 'en_CA'
+ encoding = 'utf-8'
+ conversion_options = {'linearize_tables':True}
+ ##masthead_url = 'http://www.calgaryherald.com/index.html'
+ keep_only_tags = [
+ dict(name='div', attrs={'id':'storyheader'}),
+ dict(name='div', attrs={'id':'storycontent'})
+
+ ]
+ remove_tags_after = {'class':"story_tool_hr"}
+
diff --git a/recipes/camera_di_commercio_di_bari.recipe b/recipes/camera_di_commercio_di_bari.recipe
new file mode 100644
index 0000000000..c80a825883
--- /dev/null
+++ b/recipes/camera_di_commercio_di_bari.recipe
@@ -0,0 +1,17 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1331729727(BasicNewsRecipe):
+ title = u'Camera di Commercio di Bari'
+ oldest_article = 7
+ __author__ = 'faber1971'
+ description = 'News from the Chamber of Commerce of Bari'
+ language = 'it'
+ max_articles_per_feed = 100
+ auto_cleanup = True
+ masthead_url = 'http://www.ba.camcom.it/grafica/layout-bordo/logo_camcom_bari.png'
+ feeds = [(u'Camera di Commercio di Bari', u'http://feed43.com/4715147488845101.xml')]
+
+__license__ = 'GPL v3'
+__copyright__ = '2012, faber1971'
+__version__ = 'v1.00'
+__date__ = '17, April 2012'
diff --git a/recipes/cd_action.recipe b/recipes/cd_action.recipe
index ff46774dc9..4e19fbc6c1 100644
--- a/recipes/cd_action.recipe
+++ b/recipes/cd_action.recipe
@@ -6,6 +6,7 @@ class CD_Action(BasicNewsRecipe):
description = 'cdaction.pl - polish games magazine site'
category = 'games'
language = 'pl'
+ index='http://www.cdaction.pl'
oldest_article = 8
max_articles_per_feed = 100
no_stylesheets= True
@@ -17,4 +18,10 @@ class CD_Action(BasicNewsRecipe):
def get_cover_url(self):
soup = self.index_to_soup('http://www.cdaction.pl/magazyn/')
self.cover_url='http://www.cdaction.pl'+ soup.find(id='wspolnik').div.a['href']
- return getattr(self, 'cover_url', self.cover_url)
\ No newline at end of file
+ return getattr(self, 'cover_url', self.cover_url)
+
+ def preprocess_html(self, soup):
+ for a in soup('a'):
+ if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
+ a['href']=self.index + a['href']
+ return soup
\ No newline at end of file
diff --git a/recipes/countryfile.recipe b/recipes/countryfile.recipe
index 7a41b5b905..0502129791 100644
--- a/recipes/countryfile.recipe
+++ b/recipes/countryfile.recipe
@@ -1,11 +1,12 @@
+from calibre import browser
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1325006965(BasicNewsRecipe):
title = u'Countryfile.com'
- cover_url = 'http://www.buysubscriptions.com/static_content/the-immediate/en/images/covers/CFIL_maxi.jpg'
+ #cover_url = 'http://www.countryfile.com/sites/default/files/imagecache/160px_wide/cover/2_1.jpg'
__author__ = 'Dave Asbury'
description = 'The official website of Countryfile Magazine'
- # last updated 29/1/12
+ # last updated 15/4/12
language = 'en_GB'
oldest_article = 30
max_articles_per_feed = 25
@@ -13,7 +14,23 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
no_stylesheets = True
auto_cleanup = True
#articles_are_obfuscated = True
+ def get_cover_url(self):
+ soup = self.index_to_soup('http://www.countryfile.com/')
+ cov = soup.find(attrs={'class' : 'imagecache imagecache-160px_wide imagecache-linked imagecache-160px_wide_linked'})
+ #print '******** ',cov,' ***'
+ cov2 = str(cov)
+ cov2=cov2[124:-90]
+ #print '******** ',cov2,' ***'
+ # try to get cover - if can't get known cover
+ br = browser()
+ br.set_handle_redirect(False)
+ try:
+ br.open_novisit(cov2)
+ cover_url = cov2
+ except:
+ cover_url = 'http://www.countryfile.com/sites/default/files/imagecache/160px_wide/cover/2_1.jpg'
+ return cover_url
remove_tags = [
# dict(attrs={'class' : ['player']}),
diff --git a/recipes/dobreprogamy.recipe b/recipes/dobreprogamy.recipe
index a27a9b0877..0614cf98ee 100644
--- a/recipes/dobreprogamy.recipe
+++ b/recipes/dobreprogamy.recipe
@@ -11,6 +11,7 @@ class Dobreprogramy_pl(BasicNewsRecipe):
cover_url = 'http://userlogos.org/files/logos/Karmody/dobreprogramy_01.png'
description = u'Aktualności i blogi z dobreprogramy.pl'
encoding = 'utf-8'
+ index='http://www.dobreprogramy.pl/'
no_stylesheets = True
language = 'pl'
extra_css = '.title {font-size:22px;}'
@@ -22,3 +23,10 @@ class Dobreprogramy_pl(BasicNewsRecipe):
#remove_tags = [dict(name='div', attrs={'class':['komentarze', 'block', 'portalInfo', 'menuBar', 'topBar']})]
feeds = [(u'Aktualności', 'http://feeds.feedburner.com/dobreprogramy/Aktualnosci'),
('Blogi', 'http://feeds.feedburner.com/dobreprogramy/BlogCzytelnikow')]
+
+
+ def preprocess_html(self, soup):
+ for a in soup('a'):
+ if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
+ a['href']=self.index + a['href']
+ return soup
\ No newline at end of file
diff --git a/recipes/dzieje_pl.recipe b/recipes/dzieje_pl.recipe
index d80161e71a..4c583e4815 100644
--- a/recipes/dzieje_pl.recipe
+++ b/recipes/dzieje_pl.recipe
@@ -7,6 +7,7 @@ class Dzieje(BasicNewsRecipe):
cover_url = 'http://www.dzieje.pl/sites/default/files/dzieje_logo.png'
category = 'history'
language = 'pl'
+ index='http://dzieje.pl'
oldest_article = 8
max_articles_per_feed = 100
remove_javascript=True
@@ -15,3 +16,10 @@ class Dzieje(BasicNewsRecipe):
remove_tags_after= dict(id='dogory')
remove_tags=[dict(id='dogory')]
feeds = [(u'Dzieje', u'http://dzieje.pl/rss.xml')]
+
+
+ def preprocess_html(self, soup):
+ for a in soup('a'):
+ if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
+ a['href']=self.index + a['href']
+ return soup
\ No newline at end of file
diff --git a/recipes/eioba.recipe b/recipes/eioba.recipe
index 14256c5811..1df79d64bd 100644
--- a/recipes/eioba.recipe
+++ b/recipes/eioba.recipe
@@ -21,3 +21,8 @@ class eioba(BasicNewsRecipe):
(u'Rozrywka', u'http://www.eioba.pl/feed/categories/10.xml'),
(u'Rożne', u'http://www.eioba.pl/feed/categories/9.xml')
]
+
+ def preprocess_html(self, soup):
+ for item in soup.findAll(style=True):
+ del item['style']
+ return soup
diff --git a/recipes/emuzica_pl.recipe b/recipes/emuzica_pl.recipe
index 75271c510a..2fbf9ff514 100644
--- a/recipes/emuzica_pl.recipe
+++ b/recipes/emuzica_pl.recipe
@@ -7,6 +7,7 @@ class eMuzyka(BasicNewsRecipe):
description = u'Emuzyka to największa i najpopularniejsza strona o muzyce w Polsce'
category = 'music'
language = 'pl'
+ index='http://www.emuzyka.pl'
cover_url='http://s.emuzyka.pl/img/emuzyka_invert_small.jpg'
no_stylesheets = True
oldest_article = 7
@@ -14,3 +15,9 @@ class eMuzyka(BasicNewsRecipe):
keep_only_tags=[dict(name='div', attrs={'id':'news_container'}), dict(name='h3'), dict(name='div', attrs={'class':'review_text'})]
remove_tags=[dict(name='span', attrs={'id':'date'})]
feeds = [(u'Aktualno\u015bci', u'http://www.emuzyka.pl/rss.php?f=1'), (u'Recenzje', u'http://www.emuzyka.pl/rss.php?f=2')]
+
+ def preprocess_html(self, soup):
+ for a in soup('a'):
+ if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
+ a['href']=self.index + a['href']
+ return soup
\ No newline at end of file
diff --git a/recipes/fhm_uk.recipe b/recipes/fhm_uk.recipe
index 0e2d5c1ebe..07f2b4b64e 100644
--- a/recipes/fhm_uk.recipe
+++ b/recipes/fhm_uk.recipe
@@ -7,7 +7,7 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
# cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/373529_38324934806_64930243_n.jpg'
masthead_url = 'http://www.fhm.com/App_Resources/Images/Site/re-design/logo.gif'
__author__ = 'Dave Asbury'
- # last updated 17/3/12
+ # last updated 14/4/12
language = 'en_GB'
oldest_article = 28
max_articles_per_feed = 12
@@ -28,7 +28,8 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
#]
feeds = [
- (u'From the Homepage',u'http://feed43.com/8053226782885416.xml'),
+ (u'From the Homepage',u'http://feed43.com/0032328550253453.xml'),
+ #http://feed43.com/8053226782885416.xml'),
(u'Funny - The Very Best Of The Internet',u'http://feed43.com/4538510106331565.xml'),
(u'Upgrade',u'http://feed43.com/0877305847443234.xml'),
#(u'The Final Countdown', u'http://feed43.com/3576106158530118.xml'),
diff --git a/recipes/film_web.recipe b/recipes/film_web.recipe
index 877d4472bc..2a6e00d501 100644
--- a/recipes/film_web.recipe
+++ b/recipes/film_web.recipe
@@ -7,6 +7,7 @@ class Filmweb_pl(BasicNewsRecipe):
cover_url = 'http://userlogos.org/files/logos/crudus/filmweb.png'
category = 'movies'
language = 'pl'
+ index='http://www.filmweb.pl'
oldest_article = 8
max_articles_per_feed = 100
no_stylesheets= True
@@ -39,3 +40,9 @@ class Filmweb_pl(BasicNewsRecipe):
self.log.warn(skip_tag)
return self.index_to_soup(skip_tag['href'], raw=True)
+
+ def preprocess_html(self, soup):
+ for a in soup('a'):
+ if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
+ a['href']=self.index + a['href']
+ return soup
\ No newline at end of file
diff --git a/recipes/fotoblogia_pl.recipe b/recipes/fotoblogia_pl.recipe
new file mode 100644
index 0000000000..99df46419a
--- /dev/null
+++ b/recipes/fotoblogia_pl.recipe
@@ -0,0 +1,16 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Fotoblogia_pl(BasicNewsRecipe):
+ title = u'Fotoblogia.pl'
+ __author__ = 'fenuks'
+ category = 'photography'
+ language = 'pl'
+ masthead_url = 'http://img.interia.pl/komputery/nimg/u/0/fotoblogia21.jpg'
+ cover_url= 'http://fotoblogia.pl/images/2009/03/fotoblogia2.jpg'
+ oldest_article = 7
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ use_embedded_content = False
+ keep_only_tags=[dict(name='div', attrs={'class':'post-view post-standard'})]
+ remove_tags=[dict(attrs={'class':['external fotoblogia', 'categories', 'tags']})]
+ feeds = [(u'Wszystko', u'http://fotoblogia.pl/feed/rss2')]
diff --git a/recipes/gameplay_pl.recipe b/recipes/gameplay_pl.recipe
index f3384263d6..7b0ccb4f55 100644
--- a/recipes/gameplay_pl.recipe
+++ b/recipes/gameplay_pl.recipe
@@ -6,16 +6,24 @@ class Gameplay_pl(BasicNewsRecipe):
description = u'gameplay.pl - serwis o naszych zainteresowaniach, grach, filmach, książkach, muzyce, fotografii i konsolach.'
category = 'games, movies, books, music'
language = 'pl'
+ index='http://gameplay.pl'
masthead_url= 'http://gameplay.pl/img/gpy_top_logo.png'
cover_url= 'http://gameplay.pl/img/gpy_top_logo.png'
max_articles_per_feed = 100
+ remove_javascript= True
no_stylesheets= True
keep_only_tags=[dict(name='div', attrs={'class':['news_endpage_tit', 'news']})]
- remove_tags=[dict(name='div', attrs={'class':['galeria', 'noedit center im']})]
+ remove_tags=[dict(name='div', attrs={'class':['galeria', 'noedit center im', 'news_list', 'news_list_autor', 'stop_bot', 'tagi']}), dict(attrs={'usemap':'#map'})]
feeds = [(u'Wiadomo\u015bci', u'http://gameplay.pl/rss/')]
def image_url_processor(self, baseurl, url):
if 'http' not in url:
return 'http://gameplay.pl'+ url[2:]
else:
- return url
+ return url
+
+ def preprocess_html(self, soup):
+ for a in soup('a'):
+ if a.has_key('href') and '../' in a['href']:
+ a['href']=self.index + a['href'][2:]
+ return soup
\ No newline at end of file
diff --git a/recipes/gildia_pl.recipe b/recipes/gildia_pl.recipe
index 042902b5fc..36d3ef4da2 100644
--- a/recipes/gildia_pl.recipe
+++ b/recipes/gildia_pl.recipe
@@ -9,6 +9,7 @@ class Gildia(BasicNewsRecipe):
language = 'pl'
oldest_article = 8
max_articles_per_feed = 100
+ remove_empty_feeds=True
no_stylesheets=True
remove_tags=[dict(name='div', attrs={'class':'backlink'}), dict(name='div', attrs={'class':'im_img'}), dict(name='div', attrs={'class':'addthis_toolbox addthis_default_style'})]
keep_only_tags=dict(name='div', attrs={'class':'widetext'})
@@ -24,3 +25,16 @@ class Gildia(BasicNewsRecipe):
self.log.warn('odnosnik')
self.log.warn(link['href'])
return self.index_to_soup(link['href'], raw=True)
+
+ def preprocess_html(self, soup):
+ for a in soup('a'):
+ if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
+ if '/gry/' in a['href']:
+ a['href']='http://www.gry.gildia.pl' + a['href']
+ elif u'książk' in soup.title.string.lower() or u'komiks' in soup.title.string.lower():
+ a['href']='http://www.literatura.gildia.pl' + a['href']
+ elif u'komiks' in soup.title.string.lower():
+ a['href']='http://www.literatura.gildia.pl' + a['href']
+ else:
+ a['href']='http://www.gildia.pl' + a['href']
+ return soup
diff --git a/recipes/gram_pl.recipe b/recipes/gram_pl.recipe
index 07927796c0..1f8147ba3d 100644
--- a/recipes/gram_pl.recipe
+++ b/recipes/gram_pl.recipe
@@ -7,6 +7,7 @@ class Gram_pl(BasicNewsRecipe):
category = 'games'
language = 'pl'
oldest_article = 8
+ index='http://www.gram.pl'
max_articles_per_feed = 100
no_stylesheets= True
extra_css = 'h2 {font-style: italic; font-size:20px;} .picbox div {float: left;}'
@@ -52,4 +53,7 @@ class Gram_pl(BasicNewsRecipe):
tag=soup.findAll(name='div', attrs={'class':'picbox'})
for t in tag:
t['style']='float: left;'
+ for a in soup('a'):
+ if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
+ a['href']=self.index + a['href']
return soup
\ No newline at end of file
diff --git a/recipes/heise.recipe b/recipes/heise.recipe
index 56d5516656..ba93ea96ce 100644
--- a/recipes/heise.recipe
+++ b/recipes/heise.recipe
@@ -59,6 +59,7 @@ class heiseDe(BasicNewsRecipe):
dict(name='span', attrs={'class':'rsaquo'}),
dict(name='div', attrs={'class':'news_logo'}),
dict(name='div', attrs={'class':'bcadv ISI_IGNORE'}),
+ dict(name='div', attrs={'class':'navi_top_container'}),
dict(name='p', attrs={'class':'news_option'}),
dict(name='p', attrs={'class':'news_navi'}),
dict(name='div', attrs={'class':'news_foren'})]
@@ -69,3 +70,5 @@ class heiseDe(BasicNewsRecipe):
+
+
diff --git a/recipes/historia_news.recipe b/recipes/historia_news.recipe
new file mode 100644
index 0000000000..4eca8ade91
--- /dev/null
+++ b/recipes/historia_news.recipe
@@ -0,0 +1,20 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class historia_news(BasicNewsRecipe):
+ title = u'historia-news'
+ __author__ = 'fenuks'
+ description = u'Historia-news to portal dla ludzi kochających historię. Najnowsze newsy z historii bliższej i dalszej, archeologii, paleontologii oraz ciekawostki i podcasty z historii kultury, sportu, motoryzacji i inne.'
+ masthead_url = 'http://historia-news.pl/templates/hajak4/images/header.jpg'
+ cover_url= 'http://www.historia-news.pl/templates/hajak4/images/header.jpg'
+ category = 'history'
+ language = 'pl'
+ oldest_article = 7
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ remove_empty_feeds = True
+ remove_tags=[dict(name='form'), dict(name='img', attrs={'alt':'Print'}), dict(attrs={'class':['commbutt', 'cpr']}), dict(id=['plusone', 'facebook'])]
+ feeds = [(u'Wiadomo\u015bci', u'http://historia-news.pl/wiadomoci.feed?type=rss'), (u'Artyku\u0142y', u'http://historia-news.pl/artykuy.feed?type=rss')]
+
+
+ def print_version(self, url):
+ return url + '?tmpl=component&print=1&layout=default&page='
diff --git a/recipes/icons/fotoblogia_pl.png b/recipes/icons/fotoblogia_pl.png
new file mode 100644
index 0000000000..0204a04e62
Binary files /dev/null and b/recipes/icons/fotoblogia_pl.png differ
diff --git a/recipes/icons/historia_news.png b/recipes/icons/historia_news.png
new file mode 100644
index 0000000000..79b1b52859
Binary files /dev/null and b/recipes/icons/historia_news.png differ
diff --git a/recipes/icons/swiat_obrazu.png b/recipes/icons/swiat_obrazu.png
new file mode 100644
index 0000000000..a61662a864
Binary files /dev/null and b/recipes/icons/swiat_obrazu.png differ
diff --git a/recipes/in4_pl.recipe b/recipes/in4_pl.recipe
index 16ad622b46..e385522714 100644
--- a/recipes/in4_pl.recipe
+++ b/recipes/in4_pl.recipe
@@ -8,6 +8,7 @@ class in4(BasicNewsRecipe):
description = u'Serwis Informacyjny - Aktualnosci, recenzje'
category = 'IT'
language = 'pl'
+ index='http://www.in4.pl/'
#cover_url= 'http://www.in4.pl/recenzje/337/in4pl.jpg'
no_stylesheets = True
remove_empty_feeds = True
@@ -39,6 +40,7 @@ class in4(BasicNewsRecipe):
def preprocess_html(self, soup):
self.append_page(soup, soup.body)
+ for a in soup('a'):
+ if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
+ a['href']=self.index + a['href']
return soup
-
-
diff --git a/recipes/infra_pl.recipe b/recipes/infra_pl.recipe
index 0e035e0980..e021fa0c17 100644
--- a/recipes/infra_pl.recipe
+++ b/recipes/infra_pl.recipe
@@ -8,6 +8,7 @@ class INFRA(BasicNewsRecipe):
description = u'Serwis Informacyjny INFRA - UFO, Zjawiska Paranormalne, Duchy, Tajemnice świata.'
cover_url = 'http://npn.nazwa.pl/templates/ja_teline_ii/images/logo.jpg'
category = 'UFO'
+ index='http://infra.org.pl'
language = 'pl'
max_articles_per_feed = 100
no_stylesheers=True
@@ -15,3 +16,11 @@ class INFRA(BasicNewsRecipe):
remove_tags_after=dict(attrs={'class':'pagenav'})
remove_tags=[dict(attrs={'class':'pagenav'})]
feeds = [(u'Najnowsze wiadomo\u015bci', u'http://www.infra.org.pl/index.php?option=com_rd_rss&id=1')]
+
+ def preprocess_html(self, soup):
+ for item in soup.findAll(style=True):
+ del item['style']
+ for a in soup('a'):
+ if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
+ a['href']=self.index + a['href']
+ return soup
\ No newline at end of file
diff --git a/recipes/jakarta_globe.recipe b/recipes/jakarta_globe.recipe
new file mode 100644
index 0000000000..1414ac6e5b
--- /dev/null
+++ b/recipes/jakarta_globe.recipe
@@ -0,0 +1,34 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class JakartaGlobe(BasicNewsRecipe):
+ title = u'Jakarta Globe'
+ oldest_article = 3
+ max_articles_per_feed = 100
+
+ feeds = [
+ (u'News', u'http://www.thejakartaglobe.com/pages/getrss/getrss-news.php'),
+ (u'Business', u'http://www.thejakartaglobe.com/pages/getrss/getrss-business.php'),
+ (u'Technology', u'http://www.thejakartaglobe.com/pages/getrss/getrss-tech.php'),
+ (u'My Jakarta', u'http://www.thejakartaglobe.com/pages/getrss/getrss-myjakarta.php'),
+ (u'International', u'http://www.thejakartaglobe.com/pages/getrss/getrss-international.php'),
+ (u'Life and Times', u'http://www.thejakartaglobe.com/pages/getrss/getrss-lifeandtimes.php'),
+ ]
+ __author__ = 'rty'
+ pubisher = 'JakartaGlobe.com'
+ description = 'JakartaGlobe, Indonesia, Newspaper'
+ category = 'News, Indonesia'
+
+
+ remove_javascript = True
+ use_embedded_content = False
+ no_stylesheets = True
+ language = 'en_ID'
+ encoding = 'utf-8'
+ conversion_options = {'linearize_tables':True}
+ masthead_url = 'http://www.thejakartaglobe.com/pages/2010/images/jak-globe-logo.jpg'
+ keep_only_tags = [
+ dict(name='div', attrs={'class':'story'}),
+ dict(name='span', attrs={'class':'headline'}),
+ dict(name='div', attrs={'class':'story'}),
+ dict(name='p', attrs={'id':'bodytext'})
+ ]
diff --git a/recipes/konflikty_zbrojne.recipe b/recipes/konflikty_zbrojne.recipe
index 7921e98f48..e8b28b49bf 100644
--- a/recipes/konflikty_zbrojne.recipe
+++ b/recipes/konflikty_zbrojne.recipe
@@ -1,5 +1,6 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup
class Konflikty(BasicNewsRecipe):
title = u'Konflikty Zbrojne'
@@ -10,6 +11,23 @@ class Konflikty(BasicNewsRecipe):
category='military, history'
oldest_article = 7
max_articles_per_feed = 100
- auto_cleanup = True
+ no_stylesheets = True
+ keep_only_tags=[dict(attrs={'class':['title1', 'image']}), dict(id='body')]
- feeds = [(u'Aktualności', u'http://www.konflikty.pl/rss_aktualnosci_10.xml'), (u'Artyku\u0142y', u'http://www.konflikty.pl/rss_artykuly_10.xml'), (u'Relacje', u'http://www.konflikty.pl/rss_relacje_10.xml'), (u'Recenzje', u'http://www.konflikty.pl/rss_recenzje_10.xml')]
+ feeds = [(u'Aktualności', u'http://www.konflikty.pl/rss_aktualnosci_10.xml'),
+ (u'Artyku\u0142y', u'http://www.konflikty.pl/rss_artykuly_10.xml'),
+ (u'Historia', u'http://www.konflikty.pl/rss_historia_10.xml'),
+ (u'Militaria', u'http://www.konflikty.pl/rss_militaria_10.xml'),
+ (u'Relacje', u'http://www.konflikty.pl/rss_relacje_10.xml'),
+ (u'Recenzje', u'http://www.konflikty.pl/rss_recenzje_10.xml'),
+ (u'Teksty źródłowe', u'http://www.konflikty.pl/rss_tekstyzrodlowe_10.xml')]
+
+ def preprocess_html(self, soup):
+ for item in soup.findAll(style=True):
+ del item['style']
+ for image in soup.findAll(name='a', attrs={'class':'image'}):
+ if image.img and image.img.has_key('alt'):
+ image.name='div'
+ pos = len(image.contents)
+ image.insert(pos, BeautifulSoup(''+image.img['alt']+'
'))
+ return soup
diff --git a/recipes/liberatorio_politico.recipe b/recipes/liberatorio_politico.recipe
new file mode 100644
index 0000000000..bbffcd89b1
--- /dev/null
+++ b/recipes/liberatorio_politico.recipe
@@ -0,0 +1,12 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1334649829(BasicNewsRecipe):
+ title = u'Liberatorio Politico'
+ oldest_article = 7
+ max_articles_per_feed = 100
+ auto_cleanup = True
+ masthead_url = 'http://liberatorio.altervista.org/wp-content/uploads/2012/01/Testata-LIBERATORIO-Altervista1.jpg'
+ feeds = [(u'Liberatorio Politico', u'http://liberatorio.altervista.org/feed/')]
+ __author__ = 'faber1971'
+ description = 'Inquiry journalism - a blog on Molfetta, Land of Bari, Apulia and Italy - v1.00 (07, April 2012)'
+ language = 'it'
diff --git a/recipes/limes.recipe b/recipes/limes.recipe
new file mode 100644
index 0000000000..2290b7099e
--- /dev/null
+++ b/recipes/limes.recipe
@@ -0,0 +1,50 @@
+#!/usr/bin/env python
+__license__ = 'GPL v3'
+__copyright__ = '2012, faber1971'
+__version__ = 'v1.00'
+__date__ = '16, April 2012'
+__description__ = 'Geopolitical Italian magazine'
+
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Limes(BasicNewsRecipe):
+ description = 'Italian weekly magazine'
+ __author__ = 'faber1971'
+
+ cover_url = 'http://temi.repubblica.it/UserFiles/limes/Image/Loghi/logo-limes.gif'
+ title = 'Limes'
+ category = 'Geopolitical news'
+
+ language = 'it'
+# encoding = 'cp1252'
+ timefmt = '[%a, %d %b, %Y]'
+
+ oldest_article = 16
+ max_articles_per_feed = 100
+ use_embedded_content = False
+ recursion = 10
+
+ remove_javascript = True
+ no_stylesheets = True
+ masthead_url = 'http://temi.repubblica.it/UserFiles/limes/Image/Loghi/logo-limes.gif'
+
+ feeds = [
+ (u'Limes', u'http://temi.repubblica.it/limes/feed/')
+ ]
+
+
+
+ keep_only_tags = [
+ dict(name='div', attrs={'class':['testo','copertina','occhiello','firma','didascalia','content-second-right','detail-articles','titolo-local','generic-articles']}),
+ dict(name='div', attrs={'class':['generic-articles','summary','detail-articles']}),
+ dict(name='div', attrs={'id':['content-second-right','content2']})
+ ]
+
+ remove_tags = [
+ dict(name='div',attrs={'class':['servizi','aggiungi','label-web','bottom-mobile','box-abbonamenti','box-cerca','big','little','stampaweb']}),
+ dict(name='div',attrs={'id':['topheader','header','navigation-new','navigation','content-second-left','menutext']}),
+ dict(name='ul',attrs={'id':'user-utility'}),
+ dict(name=['script','noscript','iframe'])
+ ]
+
diff --git a/recipes/metro_news_nl.recipe b/recipes/metro_news_nl.recipe
index ac3e23869b..d95f9bdfd7 100644
--- a/recipes/metro_news_nl.recipe
+++ b/recipes/metro_news_nl.recipe
@@ -3,25 +3,6 @@ from calibre.web.feeds.news import BasicNewsRecipe
import re
from calibre.utils.magick import Image
from BeautifulSoup import BeautifulSoup
-try:
- from calibre_plugins.drMerry.debug import debuglogger as mlog
- print 'drMerry debuglogger found, debug options can be used'
- from calibre_plugins.drMerry.stats import statslogger as mstat
- print 'drMerry stats tracker found, stat can be tracked'
- mlog.setLoglevel(1) #-1 == no log; 0 for normal output
- mstat.calculateStats(False) #track stats (to track stats loglevel must be > 0
- KEEPSTATS = mstat.keepmystats()
- SHOWDEBUG0 = mlog.showdebuglevel(0)
- SHOWDEBUG1 = mlog.showdebuglevel(1)
- SHOWDEBUG2 = mlog.showdebuglevel(2)
-except:
- #print 'drMerry debuglogger not found, skipping debug options'
- SHOWDEBUG0 = False
- SHOWDEBUG1 = False
- SHOWDEBUG2 = False
- KEEPSTATS = False
-
-#print ('level0: %s\nlevel1: %s\nlevel2: %s' % (SHOWDEBUG0,SHOWDEBUG1,SHOWDEBUG2))
''' Version 1.2, updated cover image to match the changed website.
added info date on title
@@ -43,6 +24,9 @@ except:
extended timeout from 2 to 10
changed oldest article from 10 to 1.2
changed max articles from 15 to 25
+ Version 1.9.1 18-04-2012
+ removed some debug settings
+ updated code to match new metro-layout
'''
class AdvancedUserRecipe1306097511(BasicNewsRecipe):
@@ -70,34 +54,40 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
'author_sort' : 'Metro Nederland & calibre & DrMerry',
'publisher' : 'DrMerry/Metro Nederland'
}
- extra_css = 'body {padding:5px 0px; background:#fff;font-size: 13px;}\
- #date, div.share-and-byline div.byline div.text div.title, div.share-and-byline div.byline div.text div.name {clear: both;margin-bottom: 10px;font-size:0.5em; color: #616262;}\
- .article-box-fact.module-title {clear:both;padding: 8px 0;color: #24763b;font-family: arial, sans-serif;font-size: 14px;font-weight: bold;}\
- h1.title {color: #000000;font-size: 44px;padding-bottom: 10px;font-weight: 300;} h2.subtitle {font-size: 13px;font-weight: 700;padding-bottom: 10px;}\
- .article-body p{padding-bottom:10px;}div.column-1-3{margin-left: 19px;padding-right: 9px;}\
- div.column-1-2 {display: inline;padding-right: 7px;}\
- p.article-image-caption {font-size: 12px;font-weight: 300;color: #616262;margin-top: 5px;} \
- p.article-image-caption .credits {font-style: italic;font-size: 10px;}\
- div.article-image-caption {width: 246px;margin-bottom: 5px;margin-left: 10px;}\
- div.article-image-caption-2column {margin-bottom: 10px;width: 373px;} div.article-image-caption-3column {}\
- img {border:0px; padding:2px;} hr.merryhr {width:30%; border-width:0px; color:green; margin-left:5px; background-color: green} div.column-3 {background-color:#eee; width:50%; margin:2px; float:right; padding:2px;} div.column-3 module-title {border: 1px solid #aaa} div.article-box-fact div.subtitle {font-weight:bold; color:green;}'
+ extra_css = 'body {padding:5px 0; background-color:#fff;font-size: 1em}\
+ #date, div.share-and-byline div.byline div.text div.title, div.share-and-byline div.byline div.text div.name {margin-bottom: 10px}\
+ #date, div.share-and-byline div.byline div.text div.title, div.share-and-byline div.byline div.text div.name, p.article-image-caption .credits {font-size:0.5em}\
+ .article-box-fact.module-title, #date, div.share-and-byline div.byline div.text div.title, div.share-and-byline div.byline div.text div.name {clear:both}\
+ .article-box-fact.module-title {padding: 8px 0}\
+ h1.title {color: #000;font-size: 1.4em}\
+ .article-box-fact.module-title, h2.subtitle {font-size: 1.2em}\
+ h1.title, h2.subtitle, .article-body p{padding-bottom:10px}\
+ h1.title, p.article-image-caption {font-weight: 300}\
+ div.column-1-3{margin-left: 19px;padding-right: 9px}\
+ div.column-1-2 {display: inline;padding-right: 7px}\
+ p.article-image-caption {font-size: 0.6em;margin-top: 5px}\
+ p.article-image-caption, #date, div.share-and-byline div.byline div.text div.title, div.share-and-byline div.byline div.text div.name {color: #616262}\
+ p.article-image-caption .credits {font-style: italic}\
+ div.article-image-caption {width: 246px;margin: 5px}\
+ div.article-image-caption-2column {width: 373px}\
+ div.article-image-caption-2column, div.article-image-caption-3column {margin-bottom: 5px}\
+ img {border:0}\
+ img, div.column-3 {padding:2px}\
+ hr.merryhr {width:30%; border-width:0; margin-left:5px; background-color: #24763b}\
+ div.column-3 {background-color:#eee; width:50%; margin:2px; float:right}\
+ div.column-3 module-title {border: 1px solid #aaa}\
+ div.article-box-fact div.subtitle, .article-box-fact.module-title, h2.subtitle {font-weight:bold}\
+ div.article-box-fact div.subtitle, hr.merryhr, .article-box-fact.module-title {color: #24763b}'
+
preprocess_regexps = [
(re.compile(r'
]+top-line[^>]+>', re.DOTALL|re.IGNORECASE),
lambda match: '
'),
- (re.compile(r'(
]+metronieuws\.nl/[^>]+/templates/[^>]+jpe?g[^>]+>|metronieuws\.nl/internal\-roxen\-unit\.gif)', re.DOTALL|re.IGNORECASE),
+ (re.compile(r'
]+(metronieuws\.nl/[^>]+/templates/[^>]+jpe?g|metronieuws\.nl/internal\-roxen\-unit\.gif)[^>]+>', re.DOTALL|re.IGNORECASE),
lambda match: ''),
]
def preprocess_html(self, soup):
- if SHOWDEBUG0 == True:
- mlog.setdefaults()
- mlog.addTextAndTag(['Show debug = on with level'], [str(mlog.debuglevel)])
- if KEEPSTATS == True:
- mlog.addDebug('Stats will be calculated')
- else:
- mlog.addTextAndTag(['Stats won\'t be calculated\nTo be enabled, stats must be true, currently','and debug level must be 1 or higher, currently'],[mstat.dokeepmystats, mlog.debuglevel])
- mlog.showDebug()
myProcess = MerryProcess()
myProcess.removeUnwantedTags(soup)
return soup
@@ -105,18 +95,6 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
def postprocess_html(self, soup, first):
myProcess = MerryProcess()
myProcess.optimizeLayout(soup)
- if SHOWDEBUG0 == True:
- if KEEPSTATS == True:
- statinfo = 'generated stats:'
- statinfo += str(mstat.stats(mstat.statslist))
- print statinfo
- statinfo = 'generated stats (for removed tags):'
- statinfo += str(mstat.stats(mstat.removedtagslist))
- print statinfo
- #show all Debug info we forgot to report
- #Using print to be sure that this text will not be added at the end of the log.
- print '\n!!!!!unreported messages:\n(should be empty)\n'
- mlog.showDebug()
return soup
feeds = [
@@ -142,44 +120,24 @@ class MerryPreProcess():
return soup
def optimizePicture(self,soup):
- if SHOWDEBUG0 == True:
- mlog.addDebug('start image optimize')
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
iurl = tag['src']
img = Image()
img.open(iurl)
img.trim(0)
img.save(iurl)
- if SHOWDEBUG0 == True:
- mlog.addDebug('Images optimized')
- mlog.showDebug()
return soup
class MerryExtract():
def safeRemovePart(self, killingSoup, soupIsArray):
if killingSoup and not killingSoup == None:
- if SHOWDEBUG2 == True:
- mlog.addTextAndTag(['items to remove'],[killingSoup])
try:
if soupIsArray == True:
for killer in killingSoup:
killer.extract()
else:
killingSoup.extract()
- if SHOWDEBUG1 == True:
- mlog.addDebug('tag extracted')
- mlog.showDebug()
- if KEEPSTATS == True:
- try:
- mstat.addstat(mstat.removedtagslist,str(killingSoup.name))
- except:
- mstat.addstat(mstat.removedtagslist,'unknown')
except:
- if SHOWDEBUG1 == True:
- mlog.addDebug('tag extraction failed')
- mlog.showDebug()
- if KEEPSTATS == True:
- mstat.addstat(mstat.removedtagslist,'exception')
return False
else:
return False
@@ -230,60 +188,26 @@ class MerryProcess(BeautifulSoup):
def optimizeLayout(self,soup):
self.myPrepare.optimizePicture(soup)
- if SHOWDEBUG0 == True:
- mlog.addDebug('End of Optimize Layout')
- mlog.showDebug()
return soup
def insertFacts(self, soup):
allfacts = soup.findAll('div', {'class':re.compile('^article-box-fact.*$')})
- if SHOWDEBUG0 == True:
- mlog.addTextAndTag(['allfacts'],[allfacts])
- mlog.showDebug()
if allfacts and not allfacts == None:
allfactsparent = soup.find('div', {'class':re.compile('^article-box-fact.*$')}).parent
- if SHOWDEBUG0 == True:
- mlog.addTextAndTag(['allfactsparent'],[allfactsparent])
- mlog.showDebug()
for part in allfactsparent:
if not part in allfacts:
- if SHOWDEBUG0 == True:
- mlog.addTextAndTag(['FOUND A non-fact'],[part])
- mlog.showDebug()
self.myKiller.safeRemovePart(part, True)
- if SHOWDEBUG1 == True:
- mlog.addTextAndTag(['New All Facts'],[allfacts])
- mlog.showDebug()
articlefacts = soup.find('div', {'class':'article-box-fact column'})
- errorOccured=False
if (articlefacts and not articlefacts==None):
try:
contenttag = soup.find('div', {'class':'article-body'})
- if SHOWDEBUG0 == True:
- mlog.addTextAndTag(['curcontag'],[contenttag])
- mlog.showDebug()
foundrighttag = False
if contenttag and not contenttag == None:
foundrighttag = True
- if SHOWDEBUG0 == True:
- if errorOccured == False:
- mlog.addTextAndTag(['type','curcontag (in while)'],[type(contenttag),contenttag])
- else:
- mlog.addDebug('Could not find right parent tag. Error Occured')
- mlog.showDebug()
if foundrighttag == True:
contenttag.insert(0, allfactsparent)
- if SHOWDEBUG2 == True:
- mlog.addTextAndTag(['added parent'],[soup.prettify()])
- mlog.showDebug()
except:
- errorOccured=True
- mlog.addTrace()
- else:
- errorOccured=True
- if SHOWDEBUG0 == True and errorOccured == True:
- mlog.addTextAndTag(['no articlefacts'],[articlefacts])
- mlog.showDebug()
+ pass
return soup
def previousNextSibRemover(self, soup, previous=True, soupIsArray=False):
@@ -300,71 +224,38 @@ class MerryProcess(BeautifulSoup):
sibs = findsibsof.nextSiblingGenerator()
for sib in sibs:
self.myKiller.safeRemovePart(sib, True)
- else:
- if SHOWDEBUG1 == True:
- mlog.addDebug('Not any sib found')
return
def removeUnwantedTags(self,soup):
- if SHOWDEBUG1 == True:
- mlog.addTextAndTag(['Len of Soup before RemoveTagsByName'],[len(str(soup))])
- mlog.showDebug()
self.removeTagsByName(soup)
- if SHOWDEBUG1 == True:
- mlog.addDebug('Len of Soup before firstandlastpart: %s' % len(str(soup)))
- mlog.showDebug()
self.insertFacts(soup)
self.removeFirstAndLastPart(soup)
- if SHOWDEBUG1 == True:
- mlog.addDebug('Len of Soup before unwantedpart: %s' % len(str(soup)))
- mlog.showDebug()
self.removeUnwantedParts(soup)
- if SHOWDEBUG1 == True:
- mlog.addDebug('Len of Soup before EmptyParts: %s' % len(str(soup)))
- mlog.showDebug()
self.removeEmptyTags(soup)
- if SHOWDEBUG1 == True:
- mlog.addDebug('Len of Soup after EmptyParts: %s' % len(str(soup)))
- mlog.showDebug()
self.myReplacer.replaceATag(soup)
return soup
def removeUnwantedParts(self, soup):
- if SHOWDEBUG1 == True:
- mlog.addDebug('Len of Soup before UnwantedID: %s' % len(str(soup)))
- mlog.showDebug()
self.removeUnwantedTagsByID(soup)
- if SHOWDEBUG1 == True:
- mlog.addDebug('Len of Soup before Class: %s' % len(str(soup)))
- mlog.showDebug()
self.removeUnwantedTagsByClass(soup)
- if SHOWDEBUG1 == True:
- mlog.addDebug('Len of Soup before Style: %s' % len(str(soup)))
- mlog.showDebug()
self.removeUnwantedTagsByStyle(soup)
return soup
def removeUnwantedTagsByStyle(self,soup):
- self.removeArrayOfTags(soup.findAll(attrs={'style' : re.compile("^(.*(display\s?:\s?none|img-mask|white)\s?;?.*)$")}))
- if SHOWDEBUG0 == True:
- mlog.addDebug('end remove by style')
+ self.removeArrayOfTags(soup.findAll(attrs={'style':re.compile("^(.*(display\s?:\s?none|img-mask|white)\s?;?.*)$")}))
+ self.removeArrayOfTags(soup.findAll(attrs={'title':'volledig scherm'}))
return soup
def removeArrayOfTags(self,souparray):
return self.myKiller.safeRemovePart(souparray, True)
def removeUnwantedTagsByClass(self,soup):
- if SHOWDEBUG0 == True:
- mlog.addDebug('start remove by class')
- self.removeArrayOfTags(soup.findAll("div", { "class" :re.compile('^(promo.*?|article-tools-below-title|metroCommentFormWrap|ad|share-tools|tools|header-links|related-links|padding-top-15)$')}))
+ self.removeArrayOfTags(soup.findAll("div", { "class" :re.compile('^(promo.*?|share-tools-top|share-tools-bottom|article-tools-below-title|metroCommentFormWrap|ad|share-tools|tools|header-links|related-links|padding-top-15|footer-[a-zA-Z0-9]+)$')}))
return soup
def removeUnwantedTagsByID(self,soup):
- defaultids = ['footer-extra',re.compile('^ad(\d+|adcomp.*?)?$'),'column-4-5','navigation','header',re.compile('^column-1-5-(top|bottom)$'),'footer','hidden_div','sidebar',re.compile('^article-\d$'),'comments','footer']
+ defaultids = ['footer-extra',re.compile('^ad(\d+|adcomp.*?)?$'),'column-4-5','navigation','header',re.compile('^column-1-5-(top|bottom)$'),'footer','hidden_div','sidebar',re.compile('^article-\d$'),'comments','footer','gallery-1']
for removeid in defaultids:
- if SHOWDEBUG1 == True:
- mlog.addDebug('RemoveTagByID, tag: %s, Len of Soup: %s' % (str(removeid), len(str(soup))))
- mlog.showDebug()
self.removeArrayOfTags(soup.findAll(id=removeid))
return soup
@@ -380,33 +271,12 @@ class MerryProcess(BeautifulSoup):
return soup
def removeEmptyTags(self,soup,run=0):
- if SHOWDEBUG0 == True:
- mlog.addDebug('starting removeEmptyTags')
- if SHOWDEBUG1 == True:
- run += 1
- mlog.addDebug(run)
- if SHOWDEBUG2 == True:
- mlog.addDebug(str(soup.prettify()))
- mlog.showDebug()
emptymatches = re.compile('^( |\s|\n|\r|\t)*$')
emptytags = soup.findAll(lambda tag: tag.find(True) is None and (tag.string is None or tag.string.strip()=="" or tag.string.strip()==emptymatches) and not tag.isSelfClosing)
if emptytags and not (emptytags == None or emptytags == []):
- if SHOWDEBUG1 == True:
- mlog.addDebug('tags found')
- mlog.addDebug(str(emptytags))
self.removeArrayOfTags(emptytags)
#recursive in case removing empty tag creates new empty tag
self.removeEmptyTags(soup, run=run)
- else:
- if SHOWDEBUG1 == True:
- mlog.addDebug('no empty tags found')
- mlog.showDebug()
- if SHOWDEBUG0 == True:
- if SHOWDEBUG2 == True:
- mlog.addDebug('new soup:')
- mlog.addDebug(str(soup.prettify()))
- mlog.addDebug('RemoveEmptyTags Completed')
- mlog.showDebug()
return soup
def removeFirstAndLastPart(self,soup):
diff --git a/recipes/national_geographic_pl.recipe b/recipes/national_geographic_pl.recipe
index a2f759e878..07fc0da666 100644
--- a/recipes/national_geographic_pl.recipe
+++ b/recipes/national_geographic_pl.recipe
@@ -9,8 +9,9 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
class recipeMagic(BasicNewsRecipe):
title = 'National Geographic PL'
__author__ = 'Marcin Urban 2011'
+ __modified_by__ = 'fenuks'
description = 'legenda wśród magazynów z historią sięgającą 120 lat'
- cover_url = 'http://www.guj.pl/var/guj/storage/images/media/nasze_magazyny/national_geographic/logo/ng_logo/2606-1-pol-PL/ng_logo.jpg'
+ #cover_url = 'http://www.guj.pl/var/guj/storage/images/media/nasze_magazyny/national_geographic/logo/ng_logo/2606-1-pol-PL/ng_logo.jpg'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
@@ -42,11 +43,43 @@ class recipeMagic(BasicNewsRecipe):
]
remove_attributes = ['width','height']
+ feeds=[]
- feeds = [
- ('National Geographic PL', 'http://www.national-geographic.pl/rss/'),
- ]
+ def find_articles(self, url):
+ articles = []
+ soup=self.index_to_soup(url)
+ tag=soup.find(attrs={'class':'arl'})
+ art=tag.ul.findAll('li')
+ for i in art:
+ title=i.a['title']
+ url=i.a['href']
+ #date=soup.find(id='footer').ul.li.string[41:-1]
+ desc=i.div.p.string
+ articles.append({'title' : title,
+ 'url' : url,
+ 'date' : '',
+ 'description' : desc
+ })
+ return articles
+
+ def parse_index(self):
+ feeds = []
+ feeds.append((u"Aktualności", self.find_articles('http://www.national-geographic.pl/aktualnosci/')))
+ feeds.append((u"Artykuły", self.find_articles('http://www.national-geographic.pl/artykuly/')))
+
+ return feeds
def print_version(self, url):
- return url.replace('artykuly0Cpokaz', 'drukuj-artykul')
+ if 'artykuly' in url:
+ return url.replace('artykuly/pokaz', 'drukuj-artykul')
+ elif 'aktualnosci' in url:
+ return url.replace('aktualnosci/pokaz', 'drukuj-artykul')
+ else:
+ return url
+
+ def get_cover_url(self):
+ soup = self.index_to_soup('http://www.national-geographic.pl/biezace-wydania/')
+ tag=soup.find(attrs={'class':'txt jus'})
+ self.cover_url=tag.img['src']
+ return getattr(self, 'cover_url', self.cover_url)
diff --git a/recipes/nowa_fantastyka.recipe b/recipes/nowa_fantastyka.recipe
index ec556da5fa..0371cb1f58 100644
--- a/recipes/nowa_fantastyka.recipe
+++ b/recipes/nowa_fantastyka.recipe
@@ -81,5 +81,7 @@ class Nowa_Fantastyka(BasicNewsRecipe):
title=soup.find(attrs={'class':'tytul'})
if title:
title['style']='font-size: 20px; font-weight: bold;'
- self.log.warn(soup)
+ for a in soup('a'):
+ if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
+ a['href']=self.INDEX + a['href']
return soup
diff --git a/recipes/orlando_sentinel.recipe b/recipes/orlando_sentinel.recipe
index 7a59f6f6ba..b327bc2b74 100644
--- a/recipes/orlando_sentinel.recipe
+++ b/recipes/orlando_sentinel.recipe
@@ -1,3 +1,4 @@
+import urllib, re
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1279258912(BasicNewsRecipe):
@@ -27,12 +28,30 @@ class AdvancedUserRecipe1279258912(BasicNewsRecipe):
encoding = 'utf-8'
conversion_options = {'linearize_tables':True}
masthead_url = 'http://www.orlandosentinel.com/media/graphic/2009-07/46844851.gif'
- keep_only_tags = [
- dict(name='div', attrs={'class':'story'})
- ]
- remove_tags = [
- dict(name='div', attrs={'class':['articlerail','tools','comment-group','clearfix']}),
- ]
- remove_tags_after = [
- dict(name='p', attrs={'class':'copyright'}),
- ]
+
+ auto_cleanup = True
+
+ def get_article_url(self, article):
+ ans = None
+ try:
+ s = article.summary
+ ans = urllib.unquote(
+ re.search(r'href=".+?bookmark.cfm.+?link=(.+?)"', s).group(1))
+ except:
+ pass
+ if ans is None:
+ link = article.get('feedburner_origlink', None)
+ if link and link.split('/')[-1]=="story01.htm":
+ link=link.split('/')[-2]
+ encoding = {'0B': '.', '0C': '/', '0A': '0', '0F': '=', '0G': '&',
+ '0D': '?', '0E': '-', '0N': '.com', '0L': 'http:',
+ '0S':'//'}
+ for k, v in encoding.iteritems():
+ link = link.replace(k, v)
+ ans = link
+ elif link:
+ ans = link
+ if ans is not None:
+ return ans.replace('?track=rss', '')
+
+
diff --git a/recipes/pc_arena.recipe b/recipes/pc_arena.recipe
index 952db30c3e..56bb601f70 100644
--- a/recipes/pc_arena.recipe
+++ b/recipes/pc_arena.recipe
@@ -7,6 +7,7 @@ class PC_Arena(BasicNewsRecipe):
description = u'Najnowsze informacje z branży IT - testy, recenzje, aktualności, rankingi, wywiady. Twoje źródło informacji o sprzęcie komputerowym.'
category = 'IT'
language = 'pl'
+ index='http://pcarena.pl'
masthead_url='http://pcarena.pl/pcarena/img/logo.png'
cover_url= 'http://pcarena.pl/pcarena/img/logo.png'
no_stylesheets = True
@@ -22,4 +23,10 @@ class PC_Arena(BasicNewsRecipe):
if 'http' not in url:
return 'http://pcarena.pl' + url
else:
- return url
\ No newline at end of file
+ return url
+
+ def preprocess_html(self, soup):
+ for a in soup('a'):
+ if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
+ a['href']=self.index + a['href']
+ return soup
\ No newline at end of file
diff --git a/recipes/readitlater.recipe b/recipes/readitlater.recipe
index 38f7ec1a9a..92c9aaf9d6 100644
--- a/recipes/readitlater.recipe
+++ b/recipes/readitlater.recipe
@@ -1,5 +1,5 @@
"""
-readitlaterlist.com
+Pocket Calibre Recipe v1.0
"""
__license__ = 'GPL v3'
__copyright__ = '''
@@ -12,22 +12,23 @@ from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
-class Readitlater(BasicNewsRecipe):
- title = 'ReadItLater'
+class Pocket(BasicNewsRecipe):
+ title = 'Pocket'
__author__ = 'Darko Miletic, Przemyslaw Kryger, Keith Callenberg, tBunnyMan'
- description = '''Personalized news feeds. Go to readitlaterlist.com to setup \
- up your news. This version displays pages of articles from \
+ description = '''Personalized news feeds. Go to getpocket.com to setup up \
+ your news. This version displays pages of articles from \
oldest to newest, with max & minimum counts, and marks articles \
read after downloading.'''
- publisher = 'readitlaterlist.com'
+ publisher = 'getpocket.com'
category = 'news, custom'
oldest_article = 7
max_articles_per_feed = 50
- minimum_articles = 1
+ minimum_articles = 10
+ mark_as_read_after_dl = True
no_stylesheets = True
use_embedded_content = False
needs_subscription = True
- INDEX = u'http://readitlaterlist.com'
+ INDEX = u'http://getpocket.com'
LOGIN = INDEX + u'/l'
readList = []
@@ -100,9 +101,31 @@ class Readitlater(BasicNewsRecipe):
br = self.get_browser()
for link in markList:
url = self.INDEX + link
+ print 'Marking read: ', url
response = br.open(url)
- response
+ print response.info()
def cleanup(self):
- self.mark_as_read(self.readList)
+ if self.mark_as_read_after_dl:
+ self.mark_as_read(self.readList)
+ else:
+ pass
+ def default_cover(self, cover_file):
+ '''
+ Create a generic cover for recipes that don't have a cover
+ This override adds time to the cover
+ '''
+ try:
+ from calibre.ebooks import calibre_cover
+ title = self.title if isinstance(self.title, unicode) else \
+ self.title.decode('utf-8', 'replace')
+ date = strftime(self.timefmt)
+ time = strftime('[%I:%M %p]')
+ img_data = calibre_cover(title, date, time)
+ cover_file.write(img_data)
+ cover_file.flush()
+ except:
+ self.log.exception('Failed to generate default cover')
+ return False
+ return True
diff --git a/recipes/swiat_obrazu.recipe b/recipes/swiat_obrazu.recipe
new file mode 100644
index 0000000000..68740fa4dd
--- /dev/null
+++ b/recipes/swiat_obrazu.recipe
@@ -0,0 +1,25 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Swiat_Obrazu(BasicNewsRecipe):
+ title = u'Swiat Obrazu'
+ __author__ = 'fenuks'
+ description = u'Internetowy Dziennik o Fotografii i Wideo www.SwiatObrazu.pl to źródło informacji o technice fotografii i wideo, o sprzęcie najbardziej znanych i uznanych firm: Canon, Nikon, Sony, Hasselblad i wielu innych. Znajdziecie tu programy do obróbki zdjęć, forum foto i forum wideo i galerie zdjęć. Codziennie najświeższe informacje: aktualności, testy, poradniki, wywiady, felietony. Swiatobrazu.pl stale organizuje konkursy oraz warsztaty fotograficzne i wideo.'
+ category = 'photography'
+ masthead_url = 'http://www.swiatobrazu.pl/img/logo.jpg'
+ cover_url = 'http://www.swiatobrazu.pl/img/logo.jpg'
+ language = 'pl'
+ oldest_article = 7
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ remove_javascript= True
+ use_embedded_content = False
+ feeds = [(u'Wszystko', u'http://www.swiatobrazu.pl/rss')]
+
+ def print_version(self, url):
+ return url + ',drukuj'
+
+ def image_url_processor(self, baseurl, url):
+ if 'http://' not in url or 'https://' not in url:
+ return 'http://www.swiatobrazu.pl' + url[5:]
+ else:
+ return url
diff --git a/recipes/tanuki.recipe b/recipes/tanuki.recipe
index 666cb8aa77..a615763307 100644
--- a/recipes/tanuki.recipe
+++ b/recipes/tanuki.recipe
@@ -34,4 +34,12 @@ class tanuki(BasicNewsRecipe):
def preprocess_html(self, soup):
self.append_page(soup, soup.body)
+ for a in soup('a'):
+ if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
+ if 'tanuki-anime' in soup.title.string.lower():
+ a['href']='http://anime.tanuki.pl' + a['href']
+ elif 'tanuki-manga' in soup.title.string.lower():
+ a['href']='http://manga.tanuki.pl' + a['href']
+ elif 'tanuki-czytelnia' in soup.title.string.lower():
+ a['href']='http://czytelnia.tanuki.pl' + a['href']
return soup
\ No newline at end of file
diff --git a/recipes/webhosting_pl.recipe b/recipes/webhosting_pl.recipe
index aeb98477f3..8ebb91c4ba 100644
--- a/recipes/webhosting_pl.recipe
+++ b/recipes/webhosting_pl.recipe
@@ -8,6 +8,7 @@ class webhosting_pl(BasicNewsRecipe):
cover_url='http://webhosting.pl/images/logo.png'
masthead_url='http://webhosting.pl/images/logo.png'
oldest_article = 7
+ index='http://webhosting.pl'
max_articles_per_feed = 100
no_stylesheets = True
remove_empty_feeds = True
@@ -36,4 +37,10 @@ class webhosting_pl(BasicNewsRecipe):
(u'Marketing', u'http://webhosting.pl/feed/rss/n/11535')]
def print_version(self, url):
- return url.replace('webhosting.pl', 'webhosting.pl/print')
\ No newline at end of file
+ return url.replace('webhosting.pl', 'webhosting.pl/print')
+
+ def preprocess_html(self, soup):
+ for a in soup('a'):
+ if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
+ a['href']=self.index + a['href']
+ return soup
\ No newline at end of file
diff --git a/recipes/xkcd.recipe b/recipes/xkcd.recipe
index ce63b0a99a..42dceda65b 100644
--- a/recipes/xkcd.recipe
+++ b/recipes/xkcd.recipe
@@ -21,7 +21,7 @@ class XkcdCom(BasicNewsRecipe):
use_embedded_content = False
oldest_article = 60
- keep_only_tags = [dict(id='middleContent')]
+ keep_only_tags = [dict(id='middleContainer')]
remove_tags = [dict(name='ul'), dict(name='h3'), dict(name='br')]
no_stylesheets = True
# turn image bubblehelp into a paragraph
diff --git a/setup/hosting.py b/setup/hosting.py
index 394d32702e..33bb3bff04 100644
--- a/setup/hosting.py
+++ b/setup/hosting.py
@@ -26,7 +26,7 @@ def login_to_google(username, password):
br.form['Email'] = username
br.form['Passwd'] = password
raw = br.submit().read()
- if re.search(br'.*?Account Settings', raw) is None:
+ if re.search(br'(?i).*?Account Settings', raw) is None:
x = re.search(br'(?is).*?', raw)
if x is not None:
print ('Title of post login page: %s'%x.group())
diff --git a/setup/iso_639/ca.po b/setup/iso_639/ca.po
index 1286dcebc2..63b910ff93 100644
--- a/setup/iso_639/ca.po
+++ b/setup/iso_639/ca.po
@@ -12,14 +12,14 @@ msgstr ""
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
"POT-Creation-Date: 2011-11-25 14:01+0000\n"
-"PO-Revision-Date: 2011-12-14 19:48+0000\n"
-"Last-Translator: Ferran Rius \n"
+"PO-Revision-Date: 2012-04-12 09:56+0000\n"
+"Last-Translator: Dídac Rios \n"
"Language-Team: Catalan \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-12-15 05:18+0000\n"
-"X-Generator: Launchpad (build 14487)\n"
+"X-Launchpad-Export-Date: 2012-04-13 05:26+0000\n"
+"X-Generator: Launchpad (build 15070)\n"
"Language: ca\n"
#. name for aaa
@@ -9584,31 +9584,31 @@ msgstr ""
#. name for hoi
msgid "Holikachuk"
-msgstr ""
+msgstr "Holikachuk"
#. name for hoj
msgid "Hadothi"
-msgstr ""
+msgstr "Hadothi"
#. name for hol
msgid "Holu"
-msgstr ""
+msgstr "Holu"
#. name for hom
msgid "Homa"
-msgstr ""
+msgstr "Homa"
#. name for hoo
msgid "Holoholo"
-msgstr ""
+msgstr "Holoholo"
#. name for hop
msgid "Hopi"
-msgstr ""
+msgstr "Hopi"
#. name for hor
msgid "Horo"
-msgstr ""
+msgstr "Horo"
#. name for hos
msgid "Ho Chi Minh City Sign Language"
@@ -9616,15 +9616,15 @@ msgstr "Llenguatge de signes de la ciutat de Ho Chi Minh"
#. name for hot
msgid "Hote"
-msgstr ""
+msgstr "Hote"
#. name for hov
msgid "Hovongan"
-msgstr ""
+msgstr "Hovongan"
#. name for how
msgid "Honi"
-msgstr ""
+msgstr "Honi"
#. name for hoy
msgid "Holiya"
@@ -9636,7 +9636,7 @@ msgstr ""
#. name for hpo
msgid "Hpon"
-msgstr ""
+msgstr "Hpon"
#. name for hps
msgid "Hawai'i Pidgin Sign Language"
@@ -9644,35 +9644,35 @@ msgstr "Hawaià Pidgin; llenguatge de signes"
#. name for hra
msgid "Hrangkhol"
-msgstr ""
+msgstr "Hrangkhol"
#. name for hre
msgid "Hre"
-msgstr ""
+msgstr "Hre"
#. name for hrk
msgid "Haruku"
-msgstr ""
+msgstr "Haruku"
#. name for hrm
msgid "Miao; Horned"
-msgstr ""
+msgstr "Miao; Horned"
#. name for hro
msgid "Haroi"
-msgstr ""
+msgstr "Haroi"
#. name for hrr
msgid "Horuru"
-msgstr ""
+msgstr "Horuru"
#. name for hrt
msgid "Hértevin"
-msgstr ""
+msgstr "Hértevin"
#. name for hru
msgid "Hruso"
-msgstr ""
+msgstr "Hruso"
#. name for hrv
msgid "Croatian"
@@ -9680,11 +9680,11 @@ msgstr "Croat"
#. name for hrx
msgid "Hunsrik"
-msgstr ""
+msgstr "Hunsrik"
#. name for hrz
msgid "Harzani"
-msgstr ""
+msgstr "Harzani"
#. name for hsb
msgid "Sorbian; Upper"
@@ -9704,31 +9704,31 @@ msgstr "Xinès; Xiang"
#. name for hss
msgid "Harsusi"
-msgstr ""
+msgstr "Harsusi"
#. name for hti
msgid "Hoti"
-msgstr ""
+msgstr "Hoti"
#. name for hto
msgid "Huitoto; Minica"
-msgstr ""
+msgstr "Huitoto; Minica"
#. name for hts
msgid "Hadza"
-msgstr ""
+msgstr "Hadza"
#. name for htu
msgid "Hitu"
-msgstr ""
+msgstr "Hitu"
#. name for htx
msgid "Hittite; Middle"
-msgstr ""
+msgstr "Hittite; Middle"
#. name for hub
msgid "Huambisa"
-msgstr ""
+msgstr "Huambisa"
#. name for huc
msgid "=/Hua"
@@ -9736,27 +9736,27 @@ msgstr ""
#. name for hud
msgid "Huaulu"
-msgstr ""
+msgstr "Huaulu"
#. name for hue
msgid "Huave; San Francisco Del Mar"
-msgstr ""
+msgstr "Huave; San Francisco Del Mar"
#. name for huf
msgid "Humene"
-msgstr ""
+msgstr "Humene"
#. name for hug
msgid "Huachipaeri"
-msgstr ""
+msgstr "Huachipaeri"
#. name for huh
msgid "Huilliche"
-msgstr ""
+msgstr "Huilliche"
#. name for hui
msgid "Huli"
-msgstr ""
+msgstr "Huli"
#. name for huj
msgid "Miao; Northern Guiyang"
@@ -9764,15 +9764,15 @@ msgstr "Miao; Guiyang septentrional"
#. name for huk
msgid "Hulung"
-msgstr ""
+msgstr "Hulung"
#. name for hul
msgid "Hula"
-msgstr ""
+msgstr "Hula"
#. name for hum
msgid "Hungana"
-msgstr ""
+msgstr "Hungana"
#. name for hun
msgid "Hungarian"
@@ -9780,43 +9780,43 @@ msgstr "Hongarès"
#. name for huo
msgid "Hu"
-msgstr ""
+msgstr "Hu"
#. name for hup
msgid "Hupa"
-msgstr ""
+msgstr "Hupa"
#. name for huq
msgid "Tsat"
-msgstr ""
+msgstr "Tsat"
#. name for hur
msgid "Halkomelem"
-msgstr ""
+msgstr "Halkomelem"
#. name for hus
msgid "Huastec"
-msgstr ""
+msgstr "Huastec"
#. name for hut
msgid "Humla"
-msgstr ""
+msgstr "Humla"
#. name for huu
msgid "Huitoto; Murui"
-msgstr ""
+msgstr "Huitoto; Murui"
#. name for huv
msgid "Huave; San Mateo Del Mar"
-msgstr ""
+msgstr "Huave; San Mateo Del Mar"
#. name for huw
msgid "Hukumina"
-msgstr ""
+msgstr "Hukumina"
#. name for hux
msgid "Huitoto; Nüpode"
-msgstr ""
+msgstr "Huitoto; Nüpode"
#. name for huy
msgid "Hulaulá"
diff --git a/setup/iso_639/es.po b/setup/iso_639/es.po
index 0bd14a5857..8e0046ddf9 100644
--- a/setup/iso_639/es.po
+++ b/setup/iso_639/es.po
@@ -8,14 +8,14 @@ msgstr ""
"Project-Id-Version: calibre\n"
"Report-Msgid-Bugs-To: FULL NAME \n"
"POT-Creation-Date: 2011-11-25 14:01+0000\n"
-"PO-Revision-Date: 2012-03-11 10:13+0000\n"
-"Last-Translator: Jellby \n"
+"PO-Revision-Date: 2012-04-18 20:56+0000\n"
+"Last-Translator: David de Obregon \n"
"Language-Team: Spanish \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2012-03-12 04:38+0000\n"
-"X-Generator: Launchpad (build 14933)\n"
+"X-Launchpad-Export-Date: 2012-04-19 04:37+0000\n"
+"X-Generator: Launchpad (build 15108)\n"
#. name for aaa
msgid "Ghotuo"
@@ -4931,7 +4931,7 @@ msgstr "Como karim"
#. name for cfm
msgid "Chin; Falam"
-msgstr ""
+msgstr "Chin; Falam"
#. name for cga
msgid "Changriwa"
@@ -5071,7 +5071,7 @@ msgstr "Chinali"
#. name for cik
msgid "Kinnauri; Chitkuli"
-msgstr ""
+msgstr "Kinnauri; Chitkuli"
#. name for cim
msgid "Cimbrian"
@@ -5147,7 +5147,7 @@ msgstr "Chino jin"
#. name for cka
msgid "Chin; Khumi Awa"
-msgstr ""
+msgstr "Chin; Khumi Awa"
#. name for ckb
msgid "Kurdish; Central"
@@ -5287,7 +5287,7 @@ msgstr "Mnong central"
#. name for cmr
msgid "Chin; Mro"
-msgstr ""
+msgstr "Chin; Mro"
#. name for cms
msgid "Messapic"
@@ -5303,7 +5303,7 @@ msgstr "Changthang"
#. name for cnb
msgid "Chin; Chinbon"
-msgstr ""
+msgstr "Chin; Chinbon"
#. name for cnc
msgid "Côông"
@@ -5315,7 +5315,7 @@ msgstr "Qiang septentrional"
#. name for cnh
msgid "Chin; Haka"
-msgstr ""
+msgstr "Chin; Haka"
#. name for cni
msgid "Asháninka"
@@ -5323,7 +5323,7 @@ msgstr "Asháninka"
#. name for cnk
msgid "Chin; Khumi"
-msgstr ""
+msgstr "Chin; Khumi"
#. name for cnl
msgid "Chinantec; Lalana"
@@ -5347,7 +5347,7 @@ msgstr "Chenoua"
#. name for cnw
msgid "Chin; Ngawn"
-msgstr ""
+msgstr "Chin; Ngawn"
#. name for cnx
msgid "Cornish; Middle"
@@ -5459,7 +5459,7 @@ msgstr "Chinanteco de Palantla"
#. name for cpb
msgid "Ashéninka; Ucayali-Yurúa"
-msgstr ""
+msgstr "Ashéninka; Ucayali-Yurúa"
#. name for cpc
msgid "Ajyíninka Apurucayali"
@@ -5483,7 +5483,7 @@ msgstr "Capiznon"
#. name for cpu
msgid "Ashéninka; Pichis"
-msgstr ""
+msgstr "Ashéninka; Pichis"
#. name for cpx
msgid "Chinese; Pu-Xian"
@@ -5491,11 +5491,11 @@ msgstr "Chino puxian"
#. name for cpy
msgid "Ashéninka; South Ucayali"
-msgstr ""
+msgstr "Ashéninka; South Ucayali"
#. name for cqd
msgid "Miao; Chuanqiandian Cluster"
-msgstr ""
+msgstr "Miao; Chuanqiandian Cluster"
#. name for cqu
msgid "Quechua; Chilean"
@@ -5507,7 +5507,7 @@ msgstr "Chara"
#. name for crb
msgid "Carib; Island"
-msgstr ""
+msgstr "Carib; Island"
#. name for crc
msgid "Lonwolwol"
@@ -5539,23 +5539,23 @@ msgstr "Forro"
#. name for crj
msgid "Cree; Southern East"
-msgstr ""
+msgstr "Cree; Southern East"
#. name for crk
msgid "Cree; Plains"
-msgstr ""
+msgstr "Cree; Plains"
#. name for crl
msgid "Cree; Northern East"
-msgstr ""
+msgstr "Cree; Northern East"
#. name for crm
msgid "Cree; Moose"
-msgstr ""
+msgstr "Cree; Moose"
#. name for crn
msgid "Cora; El Nayar"
-msgstr ""
+msgstr "Cora; El Nayar"
#. name for cro
msgid "Crow"
@@ -5563,11 +5563,11 @@ msgstr "Crow"
#. name for crq
msgid "Chorote; Iyo'wujwa"
-msgstr ""
+msgstr "Chorote; Iyo'wujwa"
#. name for crr
msgid "Algonquian; Carolina"
-msgstr ""
+msgstr "Algonquian; Carolina"
#. name for crs
msgid "Creole French; Seselwa"
@@ -5575,7 +5575,7 @@ msgstr "Francés criollo seychellense"
#. name for crt
msgid "Chorote; Iyojwa'ja"
-msgstr ""
+msgstr "Chorote; Iyojwa'ja"
#. name for crv
msgid "Chaura"
@@ -5627,11 +5627,11 @@ msgstr "Lengua de signos chilena"
#. name for csh
msgid "Chin; Asho"
-msgstr ""
+msgstr "Chin; Asho"
#. name for csi
msgid "Miwok; Coast"
-msgstr ""
+msgstr "Miwok; Coast"
#. name for csk
msgid "Jola-Kasa"
@@ -5643,7 +5643,7 @@ msgstr "Lengua de signos china"
#. name for csm
msgid "Miwok; Central Sierra"
-msgstr ""
+msgstr "Miwok; Central Sierra"
#. name for csn
msgid "Colombian Sign Language"
@@ -5671,11 +5671,11 @@ msgstr "Ohlone septentrional"
#. name for csw
msgid "Cree; Swampy"
-msgstr ""
+msgstr "Cree; Swampy"
#. name for csy
msgid "Chin; Siyin"
-msgstr ""
+msgstr "Chin; Siyin"
#. name for csz
msgid "Coos"
@@ -5691,7 +5691,7 @@ msgstr "Chetco"
#. name for ctd
msgid "Chin; Tedim"
-msgstr ""
+msgstr "Chin; Tedim"
#. name for cte
msgid "Chinantec; Tepinapa"
@@ -5727,7 +5727,7 @@ msgstr "Pandan"
#. name for ctt
msgid "Chetti; Wayanad"
-msgstr ""
+msgstr "Chetti; Wayanad"
#. name for ctu
msgid "Chol"
@@ -5767,7 +5767,7 @@ msgstr "Mashco piro"
#. name for cuk
msgid "Kuna; San Blas"
-msgstr ""
+msgstr "Kuna; San Blas"
#. name for cul
msgid "Culina"
@@ -5795,7 +5795,7 @@ msgstr "Chhulung"
#. name for cut
msgid "Cuicatec; Teutila"
-msgstr ""
+msgstr "Cuicatec; Teutila"
#. name for cuu
msgid "Tai Ya"
@@ -5811,7 +5811,7 @@ msgstr "Chukwa"
#. name for cux
msgid "Cuicatec; Tepeuxila"
-msgstr ""
+msgstr "Cuicatec; Tepeuxila"
#. name for cvg
msgid "Chug"
@@ -5831,7 +5831,7 @@ msgstr "Maindo"
#. name for cwd
msgid "Cree; Woods"
-msgstr ""
+msgstr "Cree; Woods"
#. name for cwe
msgid "Kwere"
@@ -5879,7 +5879,7 @@ msgstr "Chino minzhong"
#. name for czt
msgid "Chin; Zotung"
-msgstr ""
+msgstr "Chin; Zotung"
#. name for daa
msgid "Dangaléat"
@@ -5935,7 +5935,7 @@ msgstr "Danés"
#. name for dao
msgid "Chin; Daai"
-msgstr ""
+msgstr "Chin; Daai"
#. name for dap
msgid "Nisi (India)"
@@ -5943,7 +5943,7 @@ msgstr "Nisi (India)"
#. name for daq
msgid "Maria; Dandami"
-msgstr ""
+msgstr "Maria; Dandami"
#. name for dar
msgid "Dargwa"
@@ -5995,7 +5995,7 @@ msgstr "Edopi"
#. name for dbg
msgid "Dogon; Dogul Dom"
-msgstr ""
+msgstr "Dogon; Dogul Dom"
#. name for dbi
msgid "Doka"
@@ -6035,7 +6035,7 @@ msgstr "Dabarre"
#. name for dbu
msgid "Dogon; Bondum Dom"
-msgstr ""
+msgstr "Dogon; Bondum Dom"
#. name for dbv
msgid "Dungu"
@@ -6067,7 +6067,7 @@ msgstr "Fataluku"
#. name for ddi
msgid "Goodenough; West"
-msgstr ""
+msgstr "Goodenough; West"
#. name for ddj
msgid "Jaru"
@@ -6083,7 +6083,7 @@ msgstr "Dido"
#. name for dds
msgid "Dogon; Donno So"
-msgstr ""
+msgstr "Dogon; Donno So"
#. name for ddw
msgid "Dawera-Daweloor"
@@ -6135,7 +6135,7 @@ msgstr "Slave (atabascano)"
#. name for dep
msgid "Delaware; Pidgin"
-msgstr ""
+msgstr "Delaware; Pidgin"
#. name for deq
msgid "Dendi (Central African Republic)"
@@ -6167,11 +6167,11 @@ msgstr "Dagaare meridional"
#. name for dgb
msgid "Dogon; Bunoge"
-msgstr ""
+msgstr "Dogon; Bunoge"
#. name for dgc
msgid "Agta; Casiguran Dumagat"
-msgstr ""
+msgstr "Agta; Casiguran Dumagat"
#. name for dgd
msgid "Dagaari Dioula"
@@ -6283,7 +6283,7 @@ msgstr "Dinka centromeridional"
#. name for dic
msgid "Dida; Lakota"
-msgstr ""
+msgstr "Dida; Lakota"
#. name for did
msgid "Didinga"
@@ -6411,7 +6411,7 @@ msgstr "Djiwarli"
#. name for djm
msgid "Dogon; Jamsay"
-msgstr ""
+msgstr "Dogon; Jamsay"
#. name for djn
msgid "Djauan"
@@ -6471,7 +6471,7 @@ msgstr "Duma"
#. name for dmb
msgid "Dogon; Mombo"
-msgstr ""
+msgstr "Dogon; Mombo"
#. name for dmc
msgid "Dimir"
@@ -6483,7 +6483,7 @@ msgstr "Dugwor"
#. name for dmg
msgid "Kinabatangan; Upper"
-msgstr ""
+msgstr "Kinabatangan; Upper"
#. name for dmk
msgid "Domaaki"
@@ -6503,7 +6503,7 @@ msgstr "Kemezung"
#. name for dmr
msgid "Damar; East"
-msgstr ""
+msgstr "Damar; East"
#. name for dms
msgid "Dampelas"
@@ -6527,7 +6527,7 @@ msgstr "Demta"
#. name for dna
msgid "Dani; Upper Grand Valley"
-msgstr ""
+msgstr "Dani; Upper Grand Valley"
#. name for dnd
msgid "Daonda"
@@ -6543,7 +6543,7 @@ msgstr "Dungan"
#. name for dni
msgid "Dani; Lower Grand Valley"
-msgstr ""
+msgstr "Dani; Lower Grand Valley"
#. name for dnk
msgid "Dengka"
@@ -6559,7 +6559,7 @@ msgstr "Danaru"
#. name for dnt
msgid "Dani; Mid Grand Valley"
-msgstr ""
+msgstr "Dani; Mid Grand Valley"
#. name for dnu
msgid "Danau"
@@ -6695,7 +6695,7 @@ msgstr "Damar occidental"
#. name for dro
msgid "Melanau; Daro-Matu"
-msgstr ""
+msgstr "Melanau; Daro-Matu"
#. name for drq
msgid "Dura"
@@ -6723,7 +6723,7 @@ msgstr "Darai"
#. name for dsb
msgid "Sorbian; Lower"
-msgstr ""
+msgstr "Sorbian; Lower"
#. name for dse
msgid "Dutch Sign Language"
@@ -6759,7 +6759,7 @@ msgstr "Daur"
#. name for dtb
msgid "Kadazan; Labuk-Kinabatangan"
-msgstr ""
+msgstr "Kadazan; Labuk-Kinabatangan"
#. name for dtd
msgid "Ditidaht"
@@ -6767,15 +6767,15 @@ msgstr "Ditidaht"
#. name for dti
msgid "Dogon; Ana Tinga"
-msgstr ""
+msgstr "Dogon; Ana Tinga"
#. name for dtk
msgid "Dogon; Tene Kan"
-msgstr ""
+msgstr "Dogon; Tene Kan"
#. name for dtm
msgid "Dogon; Tomo Kan"
-msgstr ""
+msgstr "Dogon; Tomo Kan"
#. name for dtp
msgid "Dusun; Central"
@@ -6787,15 +6787,15 @@ msgstr "Lotud"
#. name for dts
msgid "Dogon; Toro So"
-msgstr ""
+msgstr "Dogon; Toro So"
#. name for dtt
msgid "Dogon; Toro Tegu"
-msgstr ""
+msgstr "Dogon; Toro Tegu"
#. name for dtu
msgid "Dogon; Tebul Ure"
-msgstr ""
+msgstr "Dogon; Tebul Ure"
#. name for dua
msgid "Duala"
@@ -6815,7 +6815,7 @@ msgstr "Hun-saare"
#. name for due
msgid "Agta; Umiray Dumaget"
-msgstr ""
+msgstr "Agta; Umiray Dumaget"
#. name for duf
msgid "Dumbea"
@@ -6843,7 +6843,7 @@ msgstr "Uyajitaya"
#. name for dul
msgid "Agta; Alabat Island"
-msgstr ""
+msgstr "Agta; Alabat Island"
#. name for dum
msgid "Dutch; Middle (ca. 1050-1350)"
@@ -6855,7 +6855,7 @@ msgstr "Dusun deyah"
#. name for duo
msgid "Agta; Dupaninan"
-msgstr ""
+msgstr "Agta; Dupaninan"
#. name for dup
msgid "Duano"
@@ -6891,7 +6891,7 @@ msgstr "Duungooma"
#. name for duy
msgid "Agta; Dicamay"
-msgstr ""
+msgstr "Agta; Dicamay"
#. name for duz
msgid "Duli"
@@ -6907,7 +6907,7 @@ msgstr "Diri"
#. name for dwl
msgid "Dogon; Walo Kumbe"
-msgstr ""
+msgstr "Dogon; Walo Kumbe"
#. name for dwr
msgid "Dawro"
@@ -6935,15 +6935,15 @@ msgstr "Dyugun"
#. name for dyg
msgid "Agta; Villa Viciosa"
-msgstr ""
+msgstr "Agta; Villa Viciosa"
#. name for dyi
msgid "Senoufo; Djimini"
-msgstr ""
+msgstr "Senoufo; Djimini"
#. name for dym
msgid "Dogon; Yanda Dom"
-msgstr ""
+msgstr "Dogon; Yanda Dom"
#. name for dyn
msgid "Dyangadi"
@@ -7095,19 +7095,19 @@ msgstr "Kol"
#. name for ekm
msgid "Elip"
-msgstr ""
+msgstr "Elip"
#. name for eko
msgid "Koti"
-msgstr ""
+msgstr "Koti"
#. name for ekp
msgid "Ekpeye"
-msgstr ""
+msgstr "Ekpeye"
#. name for ekr
msgid "Yace"
-msgstr ""
+msgstr "Yace"
#. name for eky
msgid "Kayah; Eastern"
@@ -7115,19 +7115,19 @@ msgstr "Kayah oriental"
#. name for ele
msgid "Elepi"
-msgstr ""
+msgstr "Elepi"
#. name for elh
msgid "El Hugeirat"
-msgstr ""
+msgstr "El Hugeirat"
#. name for eli
msgid "Nding"
-msgstr ""
+msgstr "Nding"
#. name for elk
msgid "Elkei"
-msgstr ""
+msgstr "Elkei"
#. name for ell
msgid "Greek; Modern (1453-)"
@@ -7135,19 +7135,19 @@ msgstr "Griego moderno (1453-)"
#. name for elm
msgid "Eleme"
-msgstr ""
+msgstr "Eleme"
#. name for elo
msgid "El Molo"
-msgstr ""
+msgstr "El Molo"
#. name for elp
msgid "Elpaputih"
-msgstr ""
+msgstr "Elpaputih"
#. name for elu
msgid "Elu"
-msgstr ""
+msgstr "Elu"
#. name for elx
msgid "Elamite"
@@ -7155,15 +7155,15 @@ msgstr "Elamita"
#. name for ema
msgid "Emai-Iuleha-Ora"
-msgstr ""
+msgstr "Emai-Iuleha-Ora"
#. name for emb
msgid "Embaloh"
-msgstr ""
+msgstr "Embaloh"
#. name for eme
msgid "Emerillon"
-msgstr ""
+msgstr "Emerillon"
#. name for emg
msgid "Meohang; Eastern"
@@ -7171,7 +7171,7 @@ msgstr "Meohang oriental"
#. name for emi
msgid "Mussau-Emira"
-msgstr ""
+msgstr "Mussau-Emira"
#. name for emk
msgid "Maninkakan; Eastern"
@@ -7179,15 +7179,15 @@ msgstr "Maninkakan oriental"
#. name for emm
msgid "Mamulique"
-msgstr ""
+msgstr "Mamulique"
#. name for emn
msgid "Eman"
-msgstr ""
+msgstr "Eman"
#. name for emo
msgid "Emok"
-msgstr ""
+msgstr "Emok"
#. name for emp
msgid "Emberá; Northern"
@@ -7203,11 +7203,11 @@ msgstr "Muria oriental"
#. name for emw
msgid "Emplawas"
-msgstr ""
+msgstr "Emplawas"
#. name for emx
msgid "Erromintxela"
-msgstr ""
+msgstr "Erromintxela"
#. name for emy
msgid "Mayan; Epigraphic"
diff --git a/setup/iso_639/eu.po b/setup/iso_639/eu.po
index bcae01cb23..a262c93085 100644
--- a/setup/iso_639/eu.po
+++ b/setup/iso_639/eu.po
@@ -9,14 +9,14 @@ msgstr ""
"Report-Msgid-Bugs-To: Debian iso-codes team \n"
"POT-Creation-Date: 2011-11-25 14:01+0000\n"
-"PO-Revision-Date: 2012-03-06 13:55+0000\n"
+"PO-Revision-Date: 2012-04-18 13:08+0000\n"
"Last-Translator: Asier Iturralde Sarasola \n"
"Language-Team: Euskara \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2012-03-07 05:12+0000\n"
-"X-Generator: Launchpad (build 14907)\n"
+"X-Launchpad-Export-Date: 2012-04-19 04:36+0000\n"
+"X-Generator: Launchpad (build 15108)\n"
"Language: eu\n"
#. name for aaa
@@ -27125,7 +27125,7 @@ msgstr ""
#. name for vie
msgid "Vietnamese"
-msgstr "Mahastiak"
+msgstr "Vietnamera"
#. name for vif
msgid "Vili"
diff --git a/src/calibre/constants.py b/src/calibre/constants.py
index 402fef4c67..1db9c90466 100644
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@@ -4,7 +4,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
__appname__ = u'calibre'
-numeric_version = (0, 8, 47)
+numeric_version = (0, 8, 48)
__version__ = u'.'.join(map(unicode, numeric_version))
__author__ = u"Kovid Goyal "
diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index d91fc97a1d..af5590cc53 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -259,7 +259,7 @@ class LRXMetadataReader(MetadataReaderPlugin):
class MOBIMetadataReader(MetadataReaderPlugin):
name = 'Read MOBI metadata'
- file_types = set(['mobi', 'prc', 'azw', 'azw4', 'pobi'])
+ file_types = set(['mobi', 'prc', 'azw', 'azw3', 'azw4', 'pobi'])
description = _('Read metadata from %s files')%'MOBI'
def get_metadata(self, stream, ftype):
diff --git a/src/calibre/devices/android/driver.py b/src/calibre/devices/android/driver.py
index 571ceee55d..07be4e42c1 100644
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@@ -40,6 +40,7 @@ class ANDROID(USBMS):
0xcac : [0x100, 0x0227, 0x0226, 0x222],
0xccf : [0x100, 0x0227, 0x0226, 0x222],
0x2910 : [0x222],
+ 0xff9 : [0x9999],
},
# Eken
@@ -174,7 +175,7 @@ class ANDROID(USBMS):
'TELECHIP', 'HUAWEI', 'T-MOBILE', 'SEMC', 'LGE', 'NVIDIA',
'GENERIC-', 'ZTE', 'MID', 'QUALCOMM', 'PANDIGIT', 'HYSTON',
'VIZIO', 'GOOGLE', 'FREESCAL', 'KOBO_INC', 'LENOVO', 'ROCKCHIP',
- 'POCKET', 'ONDA_MID', 'ZENITHIN', 'INGENIC']
+ 'POCKET', 'ONDA_MID', 'ZENITHIN', 'INGENIC', 'PMID701C']
WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
'__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID',
@@ -189,7 +190,8 @@ class ANDROID(USBMS):
'UMS', '.K080', 'P990', 'LTE', 'MB853', 'GT-S5660_CARD', 'A107',
'GT-I9003_CARD', 'XT912', 'FILE-CD_GADGET', 'RK29_SDK', 'MB855',
'XT910', 'BOOK_A10', 'USB_2.0_DRIVER', 'I9100T', 'P999DW',
- 'KTABLET_PC', 'INGENIC', 'GT-I9001_CARD']
+ 'KTABLET_PC', 'INGENIC', 'GT-I9001_CARD', 'USB_2.0_DRIVER',
+ 'GT-S5830L_CARD']
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
@@ -197,7 +199,7 @@ class ANDROID(USBMS):
'ANDROID_MID', 'P990_SD_CARD', '.K080', 'LTE_CARD', 'MB853',
'A1-07___C0541A4F', 'XT912', 'MB855', 'XT910', 'BOOK_A10_CARD',
'USB_2.0_DRIVER', 'I9100T', 'P999DW_SD_CARD', 'KTABLET_PC',
- 'FILE-CD_GADGET', 'GT-I9001_CARD']
+ 'FILE-CD_GADGET', 'GT-I9001_CARD', 'USB_2.0_DRIVER']
OSX_MAIN_MEM = 'Android Device Main Memory'
diff --git a/src/calibre/devices/kindle/driver.py b/src/calibre/devices/kindle/driver.py
index c71eb67985..8154b7d3a0 100644
--- a/src/calibre/devices/kindle/driver.py
+++ b/src/calibre/devices/kindle/driver.py
@@ -325,6 +325,10 @@ class KINDLE2(KINDLE):
OPT_APNX_ACCURATE = 1
OPT_APNX_CUST_COL = 2
+ def formats_to_scan_for(self):
+ ans = USBMS.formats_to_scan_for(self) | {'azw3'}
+ return ans
+
def books(self, oncard=None, end_session=True):
bl = USBMS.books(self, oncard=oncard, end_session=end_session)
# Read collections information
@@ -423,6 +427,8 @@ class KINDLE_FIRE(KINDLE2):
name = 'Kindle Fire Device Interface'
description = _('Communicate with the Kindle Fire')
gui_name = 'Fire'
+ FORMATS = list(KINDLE2.FORMATS)
+ FORMATS.insert(0, 'azw3')
PRODUCT_ID = [0x0006]
BCD = [0x216, 0x100]
diff --git a/src/calibre/devices/kobo/driver.py b/src/calibre/devices/kobo/driver.py
index f68ea8feff..1384ec0810 100644
--- a/src/calibre/devices/kobo/driver.py
+++ b/src/calibre/devices/kobo/driver.py
@@ -298,7 +298,7 @@ class KOBO(USBMS):
changed = False
for i, row in enumerate(cursor):
# self.report_progress((i+1) / float(numrows), _('Getting list of books on device...'))
- if row[3].startswith("file:///usr/local/Kobo/help/"):
+ if not hasattr(row[3], 'startswith') or row[3].startswith("file:///usr/local/Kobo/help/"):
# These are internal to the Kobo device and do not exist
continue
path = self.path_from_contentid(row[3], row[5], row[4], oncard)
diff --git a/src/calibre/devices/prst1/driver.py b/src/calibre/devices/prst1/driver.py
index 12867e0859..c2b04f11f7 100644
--- a/src/calibre/devices/prst1/driver.py
+++ b/src/calibre/devices/prst1/driver.py
@@ -307,11 +307,21 @@ class PRST1(USBMS):
# Work-around for Sony Bug (SD Card DB not using right SQLite sequence)
if source_id == 1:
+ # Update any existing sequence numbers in the table that aren't in the required range
sdcard_sequence_start = '4294967296'
query = 'UPDATE sqlite_sequence SET seq = ? WHERE seq < ?'
t = (sdcard_sequence_start, sdcard_sequence_start,)
cursor.execute(query, t)
+ # Insert sequence numbers for tables we will be manipulating, if they don't already exist
+ query = ('INSERT INTO sqlite_sequence (name, seq) '
+ 'SELECT ?, ? '
+ 'WHERE NOT EXISTS (SELECT 1 FROM sqlite_sequence WHERE name = ?)');
+ cursor.execute(query, ('books',sdcard_sequence_start,'books',))
+ cursor.execute(query, ('collection',sdcard_sequence_start,'collection',))
+ cursor.execute(query, ('collections',sdcard_sequence_start,'collections',))
+
+
for book in booklist:
# Run through plugboard if needed
if plugboard is not None:
diff --git a/src/calibre/devices/usbms/driver.py b/src/calibre/devices/usbms/driver.py
index a5ab0bd15c..dc0299b46e 100644
--- a/src/calibre/devices/usbms/driver.py
+++ b/src/calibre/devices/usbms/driver.py
@@ -128,6 +128,9 @@ class USBMS(CLI, Device):
elif location_code == 'B':
self._update_driveinfo_file(self._card_b_prefix, location_code, name)
+ def formats_to_scan_for(self):
+ return set(self.settings().format_map) | set(self.FORMATS)
+
def books(self, oncard=None, end_session=True):
from calibre.ebooks.metadata.meta import path_to_ext
@@ -166,7 +169,7 @@ class USBMS(CLI, Device):
for idx,b in enumerate(bl):
bl_cache[b.lpath] = idx
- all_formats = set(self.settings().format_map) | set(self.FORMATS)
+ all_formats = self.formats_to_scan_for()
def update_booklist(filename, path, prefix):
changed = False
diff --git a/src/calibre/ebooks/__init__.py b/src/calibre/ebooks/__init__.py
index 82e8c6f925..09cc2fbaaf 100644
--- a/src/calibre/ebooks/__init__.py
+++ b/src/calibre/ebooks/__init__.py
@@ -31,7 +31,7 @@ BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'txtz', 'text', 'ht
'epub', 'fb2', 'djv', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip',
'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'pmlz', 'mbp', 'tan', 'snb',
'xps', 'oxps', 'azw4', 'book', 'zbf', 'pobi', 'docx', 'md',
- 'textile', 'markdown', 'ibook', 'iba']
+ 'textile', 'markdown', 'ibook', 'iba', 'azw3']
class HTMLRenderer(object):
diff --git a/src/calibre/ebooks/conversion/cli.py b/src/calibre/ebooks/conversion/cli.py
index 559402ca1c..877b15c24a 100644
--- a/src/calibre/ebooks/conversion/cli.py
+++ b/src/calibre/ebooks/conversion/cli.py
@@ -156,9 +156,10 @@ def add_pipeline_options(parser, plumber):
'SEARCH AND REPLACE' : (
_('Modify the document text and structure using user defined patterns.'),
[
- 'sr1_search', 'sr1_replace',
- 'sr2_search', 'sr2_replace',
- 'sr3_search', 'sr3_replace',
+ 'sr1_search', 'sr1_replace',
+ 'sr2_search', 'sr2_replace',
+ 'sr3_search', 'sr3_replace',
+ 'search_replace',
]
),
@@ -211,6 +212,7 @@ def add_pipeline_options(parser, plumber):
if rec.level < rec.HIGH:
option_recommendation_to_cli_option(add_option, rec)
+
def option_parser():
parser = OptionParser(usage=USAGE)
parser.add_option('--list-recipes', default=False, action='store_true',
@@ -271,6 +273,34 @@ def abspath(x):
return x
return os.path.abspath(os.path.expanduser(x))
+def read_sr_patterns(path, log=None):
+ import json, re, codecs
+ pats = []
+ with codecs.open(path, 'r', 'utf-8') as f:
+ pat = None
+ for line in f.readlines():
+ if line.endswith(u'\n'):
+ line = line[:-1]
+
+ if pat is None:
+ if not line.strip():
+ continue
+ try:
+ re.compile(line)
+ except:
+ msg = u'Invalid regular expression: %r from file: %r'%(
+ line, path)
+ if log is not None:
+ log.error(msg)
+ raise SystemExit(1)
+ else:
+ raise ValueError(msg)
+ pat = line
+ else:
+ pats.append((pat, line))
+ pat = None
+ return json.dumps(pats)
+
def main(args=sys.argv):
log = Log()
parser, plumber = create_option_parser(args, log)
@@ -278,6 +308,9 @@ def main(args=sys.argv):
for x in ('read_metadata_from_opf', 'cover'):
if getattr(opts, x, None) is not None:
setattr(opts, x, abspath(getattr(opts, x)))
+ if opts.search_replace:
+ opts.search_replace = read_sr_patterns(opts.search_replace, log)
+
recommendations = [(n.dest, getattr(opts, n.dest),
OptionRecommendation.HIGH) \
for n in parser.options_iter()
diff --git a/src/calibre/ebooks/conversion/plugins/mobi_input.py b/src/calibre/ebooks/conversion/plugins/mobi_input.py
index 0e12dd5db7..3817a7bda9 100644
--- a/src/calibre/ebooks/conversion/plugins/mobi_input.py
+++ b/src/calibre/ebooks/conversion/plugins/mobi_input.py
@@ -28,7 +28,7 @@ class MOBIInput(InputFormatPlugin):
name = 'MOBI Input'
author = 'Kovid Goyal'
description = 'Convert MOBI files (.mobi, .prc, .azw) to HTML'
- file_types = set(['mobi', 'prc', 'azw'])
+ file_types = set(['mobi', 'prc', 'azw', 'azw3'])
def convert(self, stream, options, file_ext, log,
accelerators):
diff --git a/src/calibre/ebooks/conversion/plugins/mobi_output.py b/src/calibre/ebooks/conversion/plugins/mobi_output.py
index 72314b4237..89ab91f8eb 100644
--- a/src/calibre/ebooks/conversion/plugins/mobi_output.py
+++ b/src/calibre/ebooks/conversion/plugins/mobi_output.py
@@ -6,8 +6,6 @@ __license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal '
__docformat__ = 'restructuredtext en'
-from cStringIO import StringIO
-
from calibre.customize.conversion import OutputFormatPlugin
from calibre.customize.conversion import OptionRecommendation
@@ -79,18 +77,9 @@ class MOBIOutput(OutputFormatPlugin):
def check_for_masthead(self):
found = 'masthead' in self.oeb.guide
if not found:
+ from calibre.ebooks import generate_masthead
self.oeb.log.debug('No masthead found in manifest, generating default mastheadImage...')
- try:
- from PIL import Image as PILImage
- PILImage
- except ImportError:
- import Image as PILImage
-
- raw = open(P('content_server/calibre_banner.png'), 'rb')
- im = PILImage.open(raw)
- of = StringIO()
- im.save(of, 'GIF')
- raw = of.getvalue()
+ raw = generate_masthead(unicode(self.oeb.metadata['title'][0]))
id, href = self.oeb.manifest.generate('masthead', 'masthead')
self.oeb.manifest.add(id, href, 'image/gif', data=raw)
self.oeb.guide.add('masthead', 'Masthead Image', href)
@@ -151,17 +140,46 @@ class MOBIOutput(OutputFormatPlugin):
# Fix up the periodical href to point to first section href
toc.nodes[0].href = toc.nodes[0].nodes[0].href
+ def remove_html_cover(self):
+ from calibre.ebooks.oeb.base import OEB_DOCS
+
+ oeb = self.oeb
+ if not oeb.metadata.cover \
+ or 'cover' not in oeb.guide:
+ return
+ href = oeb.guide['cover'].href
+ del oeb.guide['cover']
+ item = oeb.manifest.hrefs[href]
+ if item.spine_position is not None:
+ self.log.warn('Found an HTML cover: ', item.href, 'removing it.',
+ 'If you find some content missing from the output MOBI, it '
+ 'is because you misidentified the HTML cover in the input '
+ 'document')
+ oeb.spine.remove(item)
+ if item.media_type in OEB_DOCS:
+ self.oeb.manifest.remove(item)
+
def convert(self, oeb, output_path, input_plugin, opts, log):
+ from calibre.utils.config import tweaks
+ from calibre.ebooks.mobi.writer2.resources import Resources
self.log, self.opts, self.oeb = log, opts, oeb
- kf8 = self.create_kf8()
- self.write_mobi(input_plugin, output_path, kf8)
+ create_kf8 = tweaks.get('create_kf8', False)
- def create_kf8(self):
+ self.remove_html_cover()
+ resources = Resources(oeb, opts, self.is_periodical,
+ add_fonts=create_kf8)
+
+ kf8 = self.create_kf8(resources) if create_kf8 else None
+
+ self.log('Creating MOBI 6 output')
+ self.write_mobi(input_plugin, output_path, kf8, resources)
+
+ def create_kf8(self, resources):
from calibre.ebooks.mobi.writer8.main import KF8Writer
- return KF8Writer(self.oeb, self.opts)
+ return KF8Writer(self.oeb, self.opts, resources)
- def write_mobi(self, input_plugin, output_path, kf8):
+ def write_mobi(self, input_plugin, output_path, kf8, resources):
from calibre.ebooks.mobi.mobiml import MobiMLizer
from calibre.ebooks.oeb.transforms.manglecase import CaseMangler
from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer, Unavailable
@@ -180,12 +198,15 @@ class MOBIOutput(OutputFormatPlugin):
rasterizer(oeb, opts)
except Unavailable:
self.log.warn('SVG rasterizer unavailable, SVG will not be converted')
+ else:
+ # Add rasterized SVG images
+ resources.add_extra_images()
mobimlizer = MobiMLizer(ignore_tables=opts.linearize_tables)
mobimlizer(oeb, opts)
self.check_for_periodical()
write_page_breaks_after_item = input_plugin is not plugin_for_input_format('cbz')
from calibre.ebooks.mobi.writer2.main import MobiWriter
- writer = MobiWriter(opts,
+ writer = MobiWriter(opts, resources, kf8,
write_page_breaks_after_item=write_page_breaks_after_item)
writer(oeb, output_path)
diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py
index 8bb4fdd891..dbba38e987 100644
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@@ -626,6 +626,14 @@ OptionRecommendation(name='sr3_search',
OptionRecommendation(name='sr3_replace',
recommended_value='', level=OptionRecommendation.LOW,
help=_('Replacement to replace the text found with sr3-search.')),
+
+OptionRecommendation(name='search_replace',
+ recommended_value=None, level=OptionRecommendation.LOW, help=_(
+ 'Path to a file containing search and replace regular expressions. '
+ 'The file must contain alternating lines of regular expression '
+ 'followed by replacement pattern (which can be an empty line). '
+ 'The regular expression must be in the python regex syntax and '
+ 'the file must be UTF-8 encoded.')),
]
# }}}
diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py
index 617de18555..c526cba8a9 100644
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@@ -5,7 +5,7 @@ __license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal '
__docformat__ = 'restructuredtext en'
-import functools, re
+import functools, re, json
from calibre import entity_to_unicode, as_unicode
@@ -515,18 +515,31 @@ class HTMLPreProcessor(object):
if not getattr(self.extra_opts, 'keep_ligatures', False):
html = _ligpat.sub(lambda m:LIGATURES[m.group()], html)
- for search, replace in [['sr3_search', 'sr3_replace'], ['sr2_search', 'sr2_replace'], ['sr1_search', 'sr1_replace']]:
+ # Function for processing search and replace
+ def do_search_replace(search_pattern, replace_txt):
+ try:
+ search_re = re.compile(search_pattern)
+ if not replace_txt:
+ replace_txt = ''
+ rules.insert(0, (search_re, replace_txt))
+ except Exception as e:
+ self.log.error('Failed to parse %r regexp because %s' %
+ (search, as_unicode(e)))
+
+ # search / replace using the sr?_search / sr?_replace options
+ for i in range(1, 4):
+ search, replace = 'sr%d_search'%i, 'sr%d_replace'%i
search_pattern = getattr(self.extra_opts, search, '')
+ replace_txt = getattr(self.extra_opts, replace, '')
if search_pattern:
- try:
- search_re = re.compile(search_pattern)
- replace_txt = getattr(self.extra_opts, replace, '')
- if not replace_txt:
- replace_txt = ''
- rules.insert(0, (search_re, replace_txt))
- except Exception as e:
- self.log.error('Failed to parse %r regexp because %s' %
- (search, as_unicode(e)))
+ do_search_replace(search_pattern, replace_txt)
+
+ # multi-search / replace using the search_replace option
+ search_replace = getattr(self.extra_opts, 'search_replace', None)
+ if search_replace:
+ search_replace = json.loads(search_replace)
+ for search_pattern, replace_txt in search_replace:
+ do_search_replace(search_pattern, replace_txt)
end_rules = []
# delete soft hyphens - moved here so it's executed after header/footer removal
diff --git a/src/calibre/ebooks/mobi/debug/headers.py b/src/calibre/ebooks/mobi/debug/headers.py
index 034c714d31..07a3fa91b9 100644
--- a/src/calibre/ebooks/mobi/debug/headers.py
+++ b/src/calibre/ebooks/mobi/debug/headers.py
@@ -308,8 +308,10 @@ class MOBIHeader(object): # {{{
self.extra_data_flags = 0
if self.has_extra_data_flags:
self.unknown4 = self.raw[180:192]
- self.fdst_idx, self.fdst_count = struct.unpack_from(b'>II',
+ self.fdst_idx, self.fdst_count = struct.unpack_from(b'>LL',
self.raw, 192)
+ if self.fdst_count <= 1:
+ self.fdst_idx = NULL_INDEX
(self.fcis_number, self.fcis_count, self.flis_number,
self.flis_count) = struct.unpack(b'>IIII',
self.raw[200:216])
@@ -342,7 +344,7 @@ class MOBIHeader(object): # {{{
'first_non_book_record', 'datp_record_offset', 'fcis_number',
'flis_number', 'primary_index_record', 'fdst_idx',
'first_image_index'):
- if hasattr(self, x):
+ if hasattr(self, x) and getattr(self, x) != NULL_INDEX:
setattr(self, x, self.header_offset+getattr(self, x))
if self.has_exth:
diff --git a/src/calibre/ebooks/mobi/debug/mobi8.py b/src/calibre/ebooks/mobi/debug/mobi8.py
index 9dcc298742..1c61690d42 100644
--- a/src/calibre/ebooks/mobi/debug/mobi8.py
+++ b/src/calibre/ebooks/mobi/debug/mobi8.py
@@ -10,6 +10,7 @@ __docformat__ = 'restructuredtext en'
import sys, os, imghdr, struct
from itertools import izip
+from calibre import CurrentDir
from calibre.ebooks.mobi.debug.headers import TextRecord
from calibre.ebooks.mobi.debug.index import (SKELIndex, SECTIndex, NCXIndex)
from calibre.ebooks.mobi.utils import read_font_record
@@ -43,6 +44,24 @@ class FDST(object):
return '\n'.join(ans)
+class File(object):
+
+ def __init__(self, skel, skeleton, text, first_aid, sections):
+ self.name = 'part%04d'%skel.file_number
+ self.skeleton, self.text, self.first_aid = skeleton, text, first_aid
+ self.sections = sections
+
+ def dump(self, ddir):
+ with open(os.path.join(ddir, self.name + '.html'), 'wb') as f:
+ f.write(self.text)
+ base = os.path.join(ddir, self.name + '-parts')
+ os.mkdir(base)
+ with CurrentDir(base):
+ with open('skeleton.html', 'wb') as f:
+ f.write(self.skeleton)
+ for i, text in enumerate(self.sections):
+ with open('sect-%04d.html'%i, 'wb') as f:
+ f.write(text)
class MOBIFile(object):
@@ -67,6 +86,7 @@ class MOBIFile(object):
self.extract_resources()
self.read_fdst()
self.read_indices()
+ self.build_files()
def print_header(self, f=sys.stdout):
print (str(self.mf.palmdb).encode('utf-8'), file=f)
@@ -95,6 +115,26 @@ class MOBIFile(object):
self.ncx_index = NCXIndex(self.header.primary_index_record,
self.mf.records, self.header.encoding)
+ def build_files(self):
+ text = self.raw_text
+ self.files = []
+ for skel in self.skel_index.records:
+ sects = [x for x in self.sect_index.records if x.file_number
+ == skel.file_number]
+ skeleton = text[skel.start_position:skel.start_position+skel.length]
+ ftext = skeleton
+ first_aid = sects[0].toc_text
+ sections = []
+
+ for sect in sects:
+ start_pos = skel.start_position + skel.length + sect.start_pos
+ sect_text = text[start_pos:start_pos+sect.length]
+ insert_pos = sect.insert_pos - skel.start_position
+ ftext = ftext[:insert_pos] + sect_text + ftext[insert_pos:]
+ sections.append(sect_text)
+
+ self.files.append(File(skel, skeleton, ftext, first_aid, sections))
+
def extract_resources(self):
self.resource_map = []
known_types = {b'FLIS', b'FCIS', b'SRCS',
@@ -141,7 +181,7 @@ def inspect_mobi(mobi_file, ddir):
with open(alltext, 'wb') as of:
of.write(f.raw_text)
- for x in ('text_records', 'images', 'fonts', 'binary'):
+ for x in ('text_records', 'images', 'fonts', 'binary', 'files'):
os.mkdir(os.path.join(ddir, x))
for rec in f.text_records:
@@ -164,3 +204,6 @@ def inspect_mobi(mobi_file, ddir):
with open(os.path.join(ddir, 'ncx.record'), 'wb') as fo:
fo.write(str(f.ncx_index).encode('utf-8'))
+ for part in f.files:
+ part.dump(os.path.join(ddir, 'files'))
+
diff --git a/src/calibre/ebooks/mobi/mobiml.py b/src/calibre/ebooks/mobi/mobiml.py
index 7cda4b0a57..d276689224 100644
--- a/src/calibre/ebooks/mobi/mobiml.py
+++ b/src/calibre/ebooks/mobi/mobiml.py
@@ -10,7 +10,7 @@ import copy
import re
from lxml import etree
from calibre.ebooks.oeb.base import namespace, barename
-from calibre.ebooks.oeb.base import XHTML, XHTML_NS, OEB_DOCS, urlnormalize
+from calibre.ebooks.oeb.base import XHTML, XHTML_NS, urlnormalize
from calibre.ebooks.oeb.stylizer import Stylizer
from calibre.ebooks.oeb.transforms.flatcss import KeyMapper
from calibre.utils.magick.draw import identify_data
@@ -109,26 +109,8 @@ class MobiMLizer(object):
self.profile = profile = context.dest
self.fnums = fnums = dict((v, k) for k, v in profile.fnums.items())
self.fmap = KeyMapper(profile.fbase, profile.fbase, fnums.keys())
- self.remove_html_cover()
self.mobimlize_spine()
- def remove_html_cover(self):
- oeb = self.oeb
- if not oeb.metadata.cover \
- or 'cover' not in oeb.guide:
- return
- href = oeb.guide['cover'].href
- del oeb.guide['cover']
- item = oeb.manifest.hrefs[href]
- if item.spine_position is not None:
- self.log.warn('Found an HTML cover,', item.href, 'removing it.',
- 'If you find some content missing from the output MOBI, it '
- 'is because you misidentified the HTML cover in the input '
- 'document')
- oeb.spine.remove(item)
- if item.media_type in OEB_DOCS:
- self.oeb.manifest.remove(item)
-
def mobimlize_spine(self):
'Iterate over the spine and convert it to MOBIML'
for item in self.oeb.spine:
@@ -473,7 +455,7 @@ class MobiMLizer(object):
if tag in TABLE_TAGS and self.ignore_tables:
tag = 'span' if tag == 'td' else 'div'
- if tag == 'table':
+ if tag in ('table', 'td', 'tr'):
col = style.backgroundColor
if col:
elem.set('bgcolor', col)
diff --git a/src/calibre/ebooks/mobi/reader/index.py b/src/calibre/ebooks/mobi/reader/index.py
index f5add94eac..c732d8862e 100644
--- a/src/calibre/ebooks/mobi/reader/index.py
+++ b/src/calibre/ebooks/mobi/reader/index.py
@@ -114,6 +114,7 @@ class CNCX(object): # {{{
def __bool__(self):
return bool(self.records)
+ __nonzero__ = __bool__
def iteritems(self):
return self.records.iteritems()
diff --git a/src/calibre/ebooks/mobi/utils.py b/src/calibre/ebooks/mobi/utils.py
index 3530736ba0..0ae992f438 100644
--- a/src/calibre/ebooks/mobi/utils.py
+++ b/src/calibre/ebooks/mobi/utils.py
@@ -7,7 +7,7 @@ __license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal '
__docformat__ = 'restructuredtext en'
-import struct, string, imghdr, zlib
+import struct, string, imghdr, zlib, os
from collections import OrderedDict
from calibre.utils.magick.draw import Image, save_cover_data_to, thumbnail
@@ -364,7 +364,7 @@ def count_set_bits(num):
num >>= 1
return ans
-def to_base(num, base=32):
+def to_base(num, base=32, min_num_digits=None):
digits = string.digits + string.ascii_uppercase
sign = 1 if num >= 0 else -1
if num == 0: return '0'
@@ -373,6 +373,8 @@ def to_base(num, base=32):
while num:
ans.append(digits[(num % base)])
num //= base
+ if min_num_digits is not None and len(ans) < min_num_digits:
+ ans.extend('0'*(min_num_digits - len(ans)))
if sign < 0:
ans.append('-')
ans.reverse()
@@ -388,27 +390,8 @@ def mobify_image(data):
data = im.export('gif')
return data
-def read_zlib_header(header):
- header = bytearray(header)
- # See sec 2.2 of RFC 1950 for the zlib stream format
- # http://www.ietf.org/rfc/rfc1950.txt
- if (header[0]*256 + header[1])%31 != 0:
- return None, 'Bad zlib header, FCHECK failed'
-
- cmf = header[0] & 0b1111
- cinfo = header[0] >> 4
- if cmf != 8:
- return None, 'Unknown zlib compression method: %d'%cmf
- if cinfo > 7:
- return None, 'Invalid CINFO field in zlib header: %d'%cinfo
- fdict = (header[1]&0b10000)>>5
- if fdict != 0:
- return None, 'FDICT based zlib compression not supported'
- wbits = cinfo + 8
- return wbits, None
-
-
-def read_font_record(data, extent=1040): # {{{
+# Font records {{{
+def read_font_record(data, extent=1040):
'''
Return the font encoded in the MOBI FONT record represented by data.
The return value in a dict with fields raw_data, font_data, err, ext,
@@ -466,15 +449,8 @@ def read_font_record(data, extent=1040): # {{{
if flags & 0b1:
# ZLIB compressed data
- wbits, err = read_zlib_header(font_data[:2])
- if err is not None:
- ans['err'] = err
- return ans
- adler32, = struct.unpack_from(b'>I', font_data, len(font_data) - 4)
try:
- # remove two bytes of zlib header and 4 bytes of trailing checksum
- # negative wbits indicates no standard gzip header
- font_data = zlib.decompress(font_data[2:-4], -wbits, usize)
+ font_data = zlib.decompress(font_data)
except Exception as e:
ans['err'] = 'Failed to zlib decompress font data (%s)'%e
return ans
@@ -483,23 +459,42 @@ def read_font_record(data, extent=1040): # {{{
ans['err'] = 'Uncompressed font size mismatch'
return ans
- if False:
- # For some reason these almost never match, probably Amazon has a
- # buggy Adler32 implementation
- sig = (zlib.adler32(font_data) & 0xffffffff)
- if sig != adler32:
- ans['err'] = ('Adler checksum did not match. Stored: %d '
- 'Calculated: %d')%(adler32, sig)
- return ans
-
ans['font_data'] = font_data
sig = font_data[:4]
ans['ext'] = ('ttf' if sig in {b'\0\1\0\0', b'true', b'ttcf'}
else 'otf' if sig == b'OTTO' else 'dat')
return ans
+
+def write_font_record(data, obfuscate=True, compress=True):
+ '''
+ Write the ttf/otf font represented by data into a font record. See
+ read_font_record() for details on the format of the record.
+ '''
+
+ flags = 0
+ key_len = 20
+ usize = len(data)
+ xor_key = b''
+ if compress:
+ flags |= 0b1
+ data = zlib.compress(data, 9)
+ if obfuscate:
+ flags |= 0b10
+ xor_key = os.urandom(key_len)
+ key = bytearray(xor_key)
+ data = bytearray(data)
+ for i in xrange(1040):
+ data[i] ^= key[i%key_len]
+ data = bytes(data)
+
+ key_start = struct.calcsize(b'>5L') + 4
+ data_start = key_start + len(xor_key)
+
+ header = b'FONT' + struct.pack(b'>5L', usize, flags, data_start,
+ len(xor_key), key_start)
+
+ return header + xor_key + data
+
# }}}
-
-
-
diff --git a/src/calibre/ebooks/mobi/writer2/main.py b/src/calibre/ebooks/mobi/writer2/main.py
index 99321fab12..b7a0d76424 100644
--- a/src/calibre/ebooks/mobi/writer2/main.py
+++ b/src/calibre/ebooks/mobi/writer2/main.py
@@ -11,17 +11,15 @@ import re, random, time
from cStringIO import StringIO
from struct import pack
-from calibre.ebooks import normalize, generate_masthead
-from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
+from calibre.ebooks import normalize
from calibre.ebooks.mobi.writer2.serializer import Serializer
from calibre.ebooks.compression.palmdoc import compress_doc
from calibre.ebooks.mobi.langcodes import iana2mobi
from calibre.utils.filenames import ascii_filename
from calibre.ebooks.mobi.writer2 import (PALMDOC, UNCOMPRESSED, RECORD_SIZE)
-from calibre.ebooks.mobi.utils import (rescale_image, encint, mobify_image,
- encode_trailing_data, align_block, detect_periodical)
+from calibre.ebooks.mobi.utils import (encint, encode_trailing_data,
+ align_block, detect_periodical)
from calibre.ebooks.mobi.writer2.indexer import Indexer
-from calibre.ebooks.mobi import MAX_THUMB_DIMEN, MAX_THUMB_SIZE
EXTH_CODES = {
'creator': 100,
@@ -50,8 +48,10 @@ WRITE_UNCROSSABLE_BREAKS = False
class MobiWriter(object):
COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+')
- def __init__(self, opts, write_page_breaks_after_item=True):
+ def __init__(self, opts, resources, kf8, write_page_breaks_after_item=True):
self.opts = opts
+ self.resources = resources
+ self.kf8 = kf8
self.write_page_breaks_after_item = write_page_breaks_after_item
self.compression = UNCOMPRESSED if opts.dont_compress else PALMDOC
self.prefer_author_sort = opts.prefer_author_sort
@@ -151,66 +151,14 @@ class MobiWriter(object):
# Images {{{
def generate_images(self):
- oeb = self.oeb
- oeb.logger.info('Serializing images...')
- self.image_records = []
- self.image_map = {}
- self.masthead_offset = 0
- index = 1
+ resources = self.resources
+ image_records = resources.records
+ self.image_map = resources.item_map
+ self.masthead_offset = resources.masthead_offset
+ self.cover_offset = resources.cover_offset
+ self.thumbnail_offset = resources.thumbnail_offset
- mh_href = None
- if 'masthead' in oeb.guide and oeb.guide['masthead'].href:
- mh_href = oeb.guide['masthead'].href
- self.image_records.append(None)
- index += 1
- elif self.is_periodical:
- # Generate a default masthead
- data = generate_masthead(unicode(self.oeb.metadata['title'][0]))
- self.image_records.append(data)
- index += 1
-
- cover_href = self.cover_offset = self.thumbnail_offset = None
- if (oeb.metadata.cover and
- unicode(oeb.metadata.cover[0]) in oeb.manifest.ids):
- cover_id = unicode(oeb.metadata.cover[0])
- item = oeb.manifest.ids[cover_id]
- cover_href = item.href
-
- for item in self.oeb.manifest.values():
- if item.media_type not in OEB_RASTER_IMAGES: continue
- try:
- data = item.data
- if self.opts.mobi_keep_original_images:
- data = mobify_image(data)
- else:
- data = rescale_image(data)
- except:
- oeb.logger.warn('Bad image file %r' % item.href)
- continue
- else:
- if mh_href and item.href == mh_href:
- self.image_records[0] = data
- continue
-
- self.image_records.append(data)
- self.image_map[item.href] = index
- index += 1
-
- if cover_href and item.href == cover_href:
- self.cover_offset = self.image_map[item.href] - 1
- try:
- data = rescale_image(item.data, dimen=MAX_THUMB_DIMEN,
- maxsizeb=MAX_THUMB_SIZE)
- except:
- oeb.logger.warn('Failed to generate thumbnail')
- else:
- self.image_records.append(data)
- self.thumbnail_offset = index - 1
- index += 1
- finally:
- item.unload_data_from_memory()
-
- if self.image_records and self.image_records[0] is None:
+ if image_records and image_records[0] is None:
raise ValueError('Failed to find masthead image in manifest')
# }}}
@@ -317,9 +265,12 @@ class MobiWriter(object):
exth = self.build_exth(bt)
first_image_record = None
- if self.image_records:
+ if self.resources:
+ used_images = self.serializer.used_images
+ if self.kf8 is not None:
+ used_images |= self.kf8.used_images
first_image_record = len(self.records)
- self.records.extend(self.image_records)
+ self.resources.serialize(self.records, used_images)
last_content_record = len(self.records) - 1
# FCIS/FLIS (Seems to serve no purpose)
diff --git a/src/calibre/ebooks/mobi/writer2/resources.py b/src/calibre/ebooks/mobi/writer2/resources.py
new file mode 100644
index 0000000000..2fcb93790c
--- /dev/null
+++ b/src/calibre/ebooks/mobi/writer2/resources.py
@@ -0,0 +1,136 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+ print_function)
+
+__license__ = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal '
+__docformat__ = 'restructuredtext en'
+
+import imghdr
+
+from calibre.ebooks.mobi import MAX_THUMB_DIMEN, MAX_THUMB_SIZE
+from calibre.ebooks.mobi.utils import (rescale_image, mobify_image,
+ write_font_record)
+from calibre.ebooks import generate_masthead
+from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
+
+PLACEHOLDER_GIF = b'GIF89a\x01\x00\x01\x00\x80\x00\x00\x00\x00\x00\xff\xff\xff!\xf9\x04\x01\x00\x00\x00\x00,\x00\x00\x00\x00\x01\x00\x01\x00@\x02\x01D\x00;'
+
+class Resources(object):
+
+ def __init__(self, oeb, opts, is_periodical, add_fonts=False):
+ self.oeb, self.log, self.opts = oeb, oeb.log, opts
+ self.is_periodical = is_periodical
+
+ self.item_map = {}
+ self.records = []
+ self.mime_map = {}
+ self.masthead_offset = 0
+ self.used_image_indices = set()
+ self.image_indices = set()
+ self.cover_offset = self.thumbnail_offset = None
+
+ self.add_resources(add_fonts)
+
+ def process_image(self, data):
+ return (mobify_image(data) if self.opts.mobi_keep_original_images else
+ rescale_image(data))
+
+ def add_resources(self, add_fonts):
+ oeb = self.oeb
+ oeb.logger.info('Serializing resources...')
+ index = 1
+
+ mh_href = None
+ if 'masthead' in oeb.guide and oeb.guide['masthead'].href:
+ mh_href = oeb.guide['masthead'].href
+ self.records.append(None)
+ index += 1
+ self.used_image_indices.add(0)
+ self.image_indices.add(0)
+ elif self.is_periodical:
+ # Generate a default masthead
+ data = generate_masthead(unicode(self.oeb.metadata['title'][0]))
+ self.records.append(data)
+ self.used_image_indices.add(0)
+ self.image_indices.add(0)
+ index += 1
+
+ cover_href = self.cover_offset = self.thumbnail_offset = None
+ if (oeb.metadata.cover and
+ unicode(oeb.metadata.cover[0]) in oeb.manifest.ids):
+ cover_id = unicode(oeb.metadata.cover[0])
+ item = oeb.manifest.ids[cover_id]
+ cover_href = item.href
+
+ for item in self.oeb.manifest.values():
+ if item.media_type not in OEB_RASTER_IMAGES: continue
+ try:
+ data = self.process_image(item.data)
+ except:
+ self.log.warn('Bad image file %r' % item.href)
+ continue
+ else:
+ if mh_href and item.href == mh_href:
+ self.records[0] = data
+ continue
+
+ self.image_indices.add(len(self.records))
+ self.records.append(data)
+ self.item_map[item.href] = index
+ self.mime_map[item.href] = 'image/%s'%imghdr.what(None, data)
+ index += 1
+
+ if cover_href and item.href == cover_href:
+ self.cover_offset = self.item_map[item.href] - 1
+ self.used_image_indices.add(self.cover_offset)
+ try:
+ data = rescale_image(item.data, dimen=MAX_THUMB_DIMEN,
+ maxsizeb=MAX_THUMB_SIZE)
+ except:
+ self.log.warn('Failed to generate thumbnail')
+ else:
+ self.image_indices.add(len(self.records))
+ self.records.append(data)
+ self.thumbnail_offset = index - 1
+ self.used_image_indices.add(self.thumbnail_offset)
+ index += 1
+ finally:
+ item.unload_data_from_memory()
+
+ if add_fonts:
+ for item in self.oeb.manifest.values():
+ if item.href and item.href.rpartition('.')[-1].lower() in {
+ 'ttf', 'otf'} and isinstance(item.data, bytes):
+ self.records.append(write_font_record(item.data))
+ self.item_map[item.href] = len(self.records)
+
+ def add_extra_images(self):
+ '''
+ Add any images that were created after the call to add_resources()
+ '''
+ for item in self.oeb.manifest.values():
+ if (item.media_type not in OEB_RASTER_IMAGES or item.href in
+ self.item_map): continue
+ try:
+ data = self.process_image(item.data)
+ except:
+ self.log.warn('Bad image file %r' % item.href)
+ else:
+ self.records.append(data)
+ self.item_map[item.href] = len(self.records)
+ finally:
+ item.unload_data_from_memory()
+
+ def serialize(self, records, used_images):
+ used_image_indices = self.used_image_indices | {
+ v-1 for k, v in self.item_map.iteritems() if k in used_images}
+ for i in self.image_indices-used_image_indices:
+ self.records[i] = PLACEHOLDER_GIF
+ records.extend(self.records)
+
+ def __bool__(self):
+ return bool(self.records)
+ __nonzero__ = __bool__
+
diff --git a/src/calibre/ebooks/mobi/writer2/serializer.py b/src/calibre/ebooks/mobi/writer2/serializer.py
index b35f33439b..d8d63bcff4 100644
--- a/src/calibre/ebooks/mobi/writer2/serializer.py
+++ b/src/calibre/ebooks/mobi/writer2/serializer.py
@@ -39,6 +39,7 @@ class Serializer(object):
self.oeb = oeb
# Map of image hrefs to image index in the MOBI file
self.images = images
+ self.used_images = set()
self.logger = oeb.logger
self.is_periodical = is_periodical
self.write_page_breaks_after_item = write_page_breaks_after_item
@@ -329,6 +330,7 @@ class Serializer(object):
href = urlnormalize(item.abshref(val))
if href in self.images:
index = self.images[href]
+ self.used_images.add(href)
buf.write(b'recindex="%05d"' % index)
continue
buf.write(attr.encode('utf-8'))
diff --git a/src/calibre/ebooks/mobi/writer8/main.py b/src/calibre/ebooks/mobi/writer8/main.py
index fc4234eb10..79ff7c3d96 100644
--- a/src/calibre/ebooks/mobi/writer8/main.py
+++ b/src/calibre/ebooks/mobi/writer8/main.py
@@ -7,9 +7,199 @@ __license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal '
__docformat__ = 'restructuredtext en'
+import copy
+from functools import partial
+from collections import defaultdict
+
+import cssutils
+from lxml import etree
+
+from calibre import isbytestring, force_unicode
+from calibre.ebooks.mobi.utils import to_base
+from calibre.ebooks.oeb.base import (OEB_DOCS, OEB_STYLES, SVG_MIME, XPath,
+ extract, XHTML, urlnormalize)
+from calibre.ebooks.oeb.parse_utils import barename
+from calibre.ebooks.mobi.writer8.skeleton import Chunker, aid_able_tags
+
+XML_DOCS = OEB_DOCS | {SVG_MIME}
+
+# References to record numbers in KF8 are stored as base-32 encoded integers,
+# with 4 digits
+to_ref = partial(to_base, base=32, min_num_digits=4)
+# References in links are stored with 10 digits
+to_href = partial(to_base, base=32, min_num_digits=10)
class KF8Writer(object):
- def __init__(self, oeb, opts):
+ def __init__(self, oeb, opts, resources):
self.oeb, self.opts, self.log = oeb, opts, oeb.log
+ self.log.info('Creating KF8 output')
+ self.used_images = set()
+ self.resources = resources
+ self.dup_data()
+ self.flows = [None] # First flow item is reserved for the text
+
+ self.replace_resource_links()
+ self.extract_css_into_flows()
+ self.extract_svg_into_flows()
+ self.replace_internal_links_with_placeholders()
+ self.insert_aid_attributes()
+ self.chunk_it_up()
+
+ def dup_data(self):
+ ''' Duplicate data so that any changes we make to markup/CSS only
+ affect KF8 output and not MOBI 6 output '''
+ self._data_cache = {}
+ for item in self.oeb.manifest:
+ if item.media_type in XML_DOCS:
+ self._data_cache[item.href] = copy.deepcopy(item.data)
+ elif item.media_type in OEB_STYLES:
+ # I can't figure out how to make an efficient copy of the
+ # in-memory CSSStylesheet, as deepcopy doesn't work (raises an
+ # exception)
+ self._data_cache[item.href] = cssutils.parseString(
+ item.data.cssText)
+
+ def data(self, item):
+ return self._data_cache.get(item.href, item.data)
+
+ def replace_resource_links(self):
+ ''' Replace links to resources (raster images/fonts) with pointers to
+ the MOBI record containing the resource. The pointers are of the form:
+ kindle:embed:XXXX?mime=image/* The ?mime= is apparently optional and
+ not used for fonts. '''
+
+ def pointer(item, oref):
+ ref = item.abshref(oref)
+ idx = self.resources.item_map.get(ref, None)
+ if idx is not None:
+ is_image = self.resources.records[idx-1][:4] not in {b'FONT'}
+ idx = to_ref(idx)
+ if is_image:
+ self.used_images.add(ref)
+ return 'kindle:embed:%s?mime=%s'%(idx,
+ self.resources.mime_map[ref])
+ else:
+ return 'kindle:embed:%s'%idx
+ return oref
+
+ for item in self.oeb.manifest:
+
+ if item.media_type in XML_DOCS:
+ root = self.data(item)
+ for tag in XPath('//h:img|//svg:image')(root):
+ for attr, ref in tag.attrib.iteritems():
+ if attr.split('}')[-1].lower() in {'src', 'href'}:
+ tag.attrib[attr] = pointer(item, ref)
+
+ for tag in XPath('//h:style')(root):
+ if tag.text:
+ sheet = cssutils.parseString(tag.text)
+ replacer = partial(pointer, item)
+ cssutils.replaceUrls(sheet, replacer,
+ ignoreImportRules=True)
+ repl = sheet.cssText
+ if isbytestring(repl):
+ repl = repl.decode('utf-8')
+ tag.text = '\n'+ repl + '\n'
+
+ elif item.media_type in OEB_STYLES:
+ sheet = self.data(item)
+ replacer = partial(pointer, item)
+ cssutils.replaceUrls(sheet, replacer, ignoreImportRules=True)
+
+ def extract_css_into_flows(self):
+ inlines = defaultdict(list) # Ensure identical