mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Sync to trunk.
This commit is contained in:
commit
00cd06b5f1
@ -19,6 +19,51 @@
|
||||
# new recipes:
|
||||
# - title:
|
||||
|
||||
- version: 0.8.47
|
||||
date: 2012-04-13
|
||||
|
||||
new features:
|
||||
- title: "Conversion pipeline: Add support for all the named entities in the HTML 5 spec."
|
||||
tickets: [976056]
|
||||
|
||||
- title: "Support for viewing and converting the Haodoo PDB ebook format"
|
||||
tickets: [976478]
|
||||
|
||||
- title: "Device driver for Laser EB720"
|
||||
|
||||
bug fixes:
|
||||
- title: "Fix regression in automatic adding in 0.8.46 that broke automatic adding if adding of duplicates is enabled and auto convert is also enabled"
|
||||
tickets: [976336]
|
||||
|
||||
- title: 'Fix "Tags" field in advanced search does not obey regex setting'
|
||||
tickets: [980221]
|
||||
|
||||
- title: "EPUB Input: Automatically extract cover image from simple HTML title page that consists of only a single <img> tag, instead of rendering the page"
|
||||
|
||||
- title: "Prevent errors when both author and author_sort are used in a template for reading metadata from filenames for files on a device"
|
||||
|
||||
- title: "Amazon metadata download: Handle books whose titles start with a bracket."
|
||||
tickets: [976365]
|
||||
|
||||
- title: "Get Books: Fix downloading of purchased books from Baen"
|
||||
tickets: [975929]
|
||||
|
||||
|
||||
improved recipes:
|
||||
- Forbes
|
||||
- Caros Amigos
|
||||
- Trouw
|
||||
- Sun UK
|
||||
- Metro
|
||||
- Daily Mirror
|
||||
|
||||
new recipes:
|
||||
- title: "Melbourne Herald Sun"
|
||||
author: Ray Hartley
|
||||
|
||||
- title: "Editoriali and Zerocalcare"
|
||||
author: faber1971
|
||||
|
||||
- version: 0.8.46
|
||||
date: 2012-04-06
|
||||
|
||||
|
@ -9,6 +9,7 @@ class Adventure_zone(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
oldest_article = 20
|
||||
max_articles_per_feed = 100
|
||||
index='http://www.adventure-zone.info/fusion/'
|
||||
use_embedded_content=False
|
||||
preprocess_regexps = [(re.compile(r"<td class='capmain'>Komentarze</td>", re.IGNORECASE), lambda m: '')]
|
||||
remove_tags_before= dict(name='td', attrs={'class':'main-bg'})
|
||||
@ -45,6 +46,19 @@ class Adventure_zone(BasicNewsRecipe):
|
||||
skip_tag = skip_tag.findAll(name='a')
|
||||
for r in skip_tag:
|
||||
if r.strong:
|
||||
word=r.strong.string
|
||||
if word and (('zapowied' in word) or ('recenzj' in word) or ('solucj' in word)):
|
||||
word=r.strong.string.lower()
|
||||
if word and (('zapowied' in word) or ('recenzj' in word) or ('solucj' in word) or ('poradnik' in word)):
|
||||
return self.index_to_soup('http://www.adventure-zone.info/fusion/print.php?type=A&item'+r['href'][r['href'].find('article_id')+7:], raw=True)
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
footer=soup.find(attrs={'class':'news-footer middle-border'})
|
||||
if footer and len(footer('a'))>=2:
|
||||
footer('a')[1].extract()
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for a in soup('a'):
|
||||
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||
a['href']=self.index + a['href']
|
||||
return soup
|
||||
|
||||
|
@ -68,4 +68,7 @@ class Benchmark_pl(BasicNewsRecipe):
|
||||
self.image_article(soup, soup.body)
|
||||
else:
|
||||
self.append_page(soup, soup.body)
|
||||
for a in soup('a'):
|
||||
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||
a['href']=self.INDEX + a['href']
|
||||
return soup
|
||||
|
@ -1,220 +1,35 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
|
||||
'''
|
||||
www.canada.com
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
|
||||
|
||||
|
||||
class CanWestPaper(BasicNewsRecipe):
|
||||
|
||||
# un-comment the following four lines for the Victoria Times Colonist
|
||||
## title = u'Victoria Times Colonist'
|
||||
## url_prefix = 'http://www.timescolonist.com'
|
||||
## description = u'News from Victoria, BC'
|
||||
## fp_tag = 'CAN_TC'
|
||||
|
||||
# un-comment the following four lines for the Vancouver Province
|
||||
## title = u'Vancouver Province'
|
||||
## url_prefix = 'http://www.theprovince.com'
|
||||
## description = u'News from Vancouver, BC'
|
||||
## fp_tag = 'CAN_VP'
|
||||
|
||||
# un-comment the following four lines for the Vancouver Sun
|
||||
## title = u'Vancouver Sun'
|
||||
## url_prefix = 'http://www.vancouversun.com'
|
||||
## description = u'News from Vancouver, BC'
|
||||
## fp_tag = 'CAN_VS'
|
||||
|
||||
# un-comment the following four lines for the Edmonton Journal
|
||||
## title = u'Edmonton Journal'
|
||||
## url_prefix = 'http://www.edmontonjournal.com'
|
||||
## description = u'News from Edmonton, AB'
|
||||
## fp_tag = 'CAN_EJ'
|
||||
|
||||
# un-comment the following four lines for the Calgary Herald
|
||||
class CalgaryHerald(BasicNewsRecipe):
|
||||
title = u'Calgary Herald'
|
||||
url_prefix = 'http://www.calgaryherald.com'
|
||||
description = u'News from Calgary, AB'
|
||||
fp_tag = 'CAN_CH'
|
||||
oldest_article = 3
|
||||
max_articles_per_feed = 100
|
||||
|
||||
# un-comment the following four lines for the Regina Leader-Post
|
||||
## title = u'Regina Leader-Post'
|
||||
## url_prefix = 'http://www.leaderpost.com'
|
||||
## description = u'News from Regina, SK'
|
||||
## fp_tag = ''
|
||||
|
||||
# un-comment the following four lines for the Saskatoon Star-Phoenix
|
||||
## title = u'Saskatoon Star-Phoenix'
|
||||
## url_prefix = 'http://www.thestarphoenix.com'
|
||||
## description = u'News from Saskatoon, SK'
|
||||
## fp_tag = ''
|
||||
|
||||
# un-comment the following four lines for the Windsor Star
|
||||
## title = u'Windsor Star'
|
||||
## url_prefix = 'http://www.windsorstar.com'
|
||||
## description = u'News from Windsor, ON'
|
||||
## fp_tag = 'CAN_'
|
||||
|
||||
# un-comment the following four lines for the Ottawa Citizen
|
||||
## title = u'Ottawa Citizen'
|
||||
## url_prefix = 'http://www.ottawacitizen.com'
|
||||
## description = u'News from Ottawa, ON'
|
||||
## fp_tag = 'CAN_OC'
|
||||
|
||||
# un-comment the following four lines for the Montreal Gazette
|
||||
## title = u'Montreal Gazette'
|
||||
## url_prefix = 'http://www.montrealgazette.com'
|
||||
## description = u'News from Montreal, QC'
|
||||
## fp_tag = 'CAN_MG'
|
||||
feeds = [
|
||||
(u'News', u'http://rss.canada.com/get/?F233'),
|
||||
(u'Calgary', u'http://www.calgaryherald.com/scripts/sp6query.aspx?catalog=cahr&tags=keyword|calgary&output=rss?link=http%3a%2f%2fwww.calgaryherald'),
|
||||
(u'Alberta', u'http://www.calgaryherald.com/scripts/Sp6Query.aspx?catalog=CAHR&tags=Keyword|Alberta&output=rss?link=http%3A%2F%2Fwww.calgaryherald.com%2Fnews%2Falberta%2Findex.html'),
|
||||
(u'Politics', u'http://rss.canada.com/get/?F7551'),
|
||||
(u'National', u'http://rss.canada.com/get/?F7552'),
|
||||
(u'World', u'http://rss.canada.com/get/?F7553'),
|
||||
]
|
||||
__author__ = 'rty'
|
||||
pubisher = 'Calgary Herald'
|
||||
description = 'Calgary, Alberta, Canada'
|
||||
category = 'News, Calgary, Alberta, Canada'
|
||||
|
||||
|
||||
language = 'en_CA'
|
||||
__author__ = 'Nick Redding'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
timefmt = ' [%b %d]'
|
||||
extra_css = '''
|
||||
.timestamp { font-size:xx-small; display: block; }
|
||||
#storyheader { font-size: medium; }
|
||||
#storyheader h1 { font-size: x-large; }
|
||||
#storyheader h2 { font-size: large; font-style: italic; }
|
||||
.byline { font-size:xx-small; }
|
||||
#photocaption { font-size: small; font-style: italic }
|
||||
#photocredit { font-size: xx-small; }'''
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
|
||||
remove_tags = [{'class':'comments'},
|
||||
dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
|
||||
dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
|
||||
dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
|
||||
dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
|
||||
dict(name='div', attrs={'class':'rule_grey_solid'}),
|
||||
dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
|
||||
language = 'en_CA'
|
||||
encoding = 'utf-8'
|
||||
conversion_options = {'linearize_tables':True}
|
||||
##masthead_url = 'http://www.calgaryherald.com/index.html'
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':'storyheader'}),
|
||||
dict(name='div', attrs={'id':'storycontent'})
|
||||
|
||||
def get_cover_url(self):
|
||||
from datetime import timedelta, date
|
||||
if self.fp_tag=='':
|
||||
return None
|
||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
daysback=1
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
while daysback<7:
|
||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str((date.today() - timedelta(days=daysback)).day)+'/lg/'+self.fp_tag+'.jpg'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
daysback = daysback+1
|
||||
continue
|
||||
break
|
||||
if daysback==7:
|
||||
self.log("\nCover unavailable")
|
||||
cover = None
|
||||
return cover
|
||||
]
|
||||
remove_tags_after = {'class':"story_tool_hr"}
|
||||
|
||||
def fixChars(self,string):
|
||||
# Replace lsquo (\x91)
|
||||
fixed = re.sub("\x91","‘",string)
|
||||
# Replace rsquo (\x92)
|
||||
fixed = re.sub("\x92","’",fixed)
|
||||
# Replace ldquo (\x93)
|
||||
fixed = re.sub("\x93","“",fixed)
|
||||
# Replace rdquo (\x94)
|
||||
fixed = re.sub("\x94","”",fixed)
|
||||
# Replace ndash (\x96)
|
||||
fixed = re.sub("\x96","–",fixed)
|
||||
# Replace mdash (\x97)
|
||||
fixed = re.sub("\x97","—",fixed)
|
||||
fixed = re.sub("’","’",fixed)
|
||||
return fixed
|
||||
|
||||
def massageNCXText(self, description):
|
||||
# Kindle TOC descriptions won't render certain characters
|
||||
if description:
|
||||
massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
|
||||
# Replace '&' with '&'
|
||||
massaged = re.sub("&","&", massaged)
|
||||
return self.fixChars(massaged)
|
||||
else:
|
||||
return description
|
||||
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
if first:
|
||||
picdiv = soup.find('body').find('img')
|
||||
if picdiv is not None:
|
||||
self.add_toc_thumbnail(article,re.sub(r'links\\link\d+\\','',picdiv['src']))
|
||||
xtitle = article.text_summary.strip()
|
||||
if len(xtitle) == 0:
|
||||
desc = soup.find('meta',attrs={'property':'og:description'})
|
||||
if desc is not None:
|
||||
article.summary = article.text_summary = desc['content']
|
||||
|
||||
def strip_anchors(self,soup):
|
||||
paras = soup.findAll(True)
|
||||
for para in paras:
|
||||
aTags = para.findAll('a')
|
||||
for a in aTags:
|
||||
if a.img is None:
|
||||
a.replaceWith(a.renderContents().decode('cp1252','replace'))
|
||||
return soup
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.strip_anchors(soup)
|
||||
|
||||
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
|
||||
|
||||
articles = {}
|
||||
key = 'News'
|
||||
ans = ['News']
|
||||
|
||||
# Find each instance of class="sectiontitle", class="featurecontent"
|
||||
for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
|
||||
#self.log(" div class = %s" % divtag['class'])
|
||||
if divtag['class'].startswith('section_title'):
|
||||
# div contains section title
|
||||
if not divtag.h3:
|
||||
continue
|
||||
key = self.tag_to_string(divtag.h3,False)
|
||||
ans.append(key)
|
||||
self.log("Section name %s" % key)
|
||||
continue
|
||||
# div contains article data
|
||||
h1tag = divtag.find('h1')
|
||||
if not h1tag:
|
||||
continue
|
||||
atag = h1tag.find('a',href=True)
|
||||
if not atag:
|
||||
continue
|
||||
url = self.url_prefix+'/news/todays-paper/'+atag['href']
|
||||
#self.log("Section %s" % key)
|
||||
#self.log("url %s" % url)
|
||||
title = self.tag_to_string(atag,False)
|
||||
#self.log("title %s" % title)
|
||||
pubdate = ''
|
||||
description = ''
|
||||
ptag = divtag.find('p');
|
||||
if ptag:
|
||||
description = self.tag_to_string(ptag,False)
|
||||
#self.log("description %s" % description)
|
||||
author = ''
|
||||
autag = divtag.find('h4')
|
||||
if autag:
|
||||
author = self.tag_to_string(autag,False)
|
||||
#self.log("author %s" % author)
|
||||
if not articles.has_key(key):
|
||||
articles[key] = []
|
||||
articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
|
||||
|
||||
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
||||
return ans
|
||||
|
17
recipes/camera_di_commercio_di_bari.recipe
Normal file
17
recipes/camera_di_commercio_di_bari.recipe
Normal file
@ -0,0 +1,17 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1331729727(BasicNewsRecipe):
|
||||
title = u'Camera di Commercio di Bari'
|
||||
oldest_article = 7
|
||||
__author__ = 'faber1971'
|
||||
description = 'News from the Chamber of Commerce of Bari'
|
||||
language = 'it'
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
masthead_url = 'http://www.ba.camcom.it/grafica/layout-bordo/logo_camcom_bari.png'
|
||||
feeds = [(u'Camera di Commercio di Bari', u'http://feed43.com/4715147488845101.xml')]
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, faber1971'
|
||||
__version__ = 'v1.00'
|
||||
__date__ = '17, April 2012'
|
@ -6,6 +6,7 @@ class CD_Action(BasicNewsRecipe):
|
||||
description = 'cdaction.pl - polish games magazine site'
|
||||
category = 'games'
|
||||
language = 'pl'
|
||||
index='http://www.cdaction.pl'
|
||||
oldest_article = 8
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets= True
|
||||
@ -18,3 +19,9 @@ class CD_Action(BasicNewsRecipe):
|
||||
soup = self.index_to_soup('http://www.cdaction.pl/magazyn/')
|
||||
self.cover_url='http://www.cdaction.pl'+ soup.find(id='wspolnik').div.a['href']
|
||||
return getattr(self, 'cover_url', self.cover_url)
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for a in soup('a'):
|
||||
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||
a['href']=self.index + a['href']
|
||||
return soup
|
@ -1,11 +1,12 @@
|
||||
from calibre import browser
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
||||
title = u'Countryfile.com'
|
||||
cover_url = 'http://www.buysubscriptions.com/static_content/the-immediate/en/images/covers/CFIL_maxi.jpg'
|
||||
#cover_url = 'http://www.countryfile.com/sites/default/files/imagecache/160px_wide/cover/2_1.jpg'
|
||||
__author__ = 'Dave Asbury'
|
||||
description = 'The official website of Countryfile Magazine'
|
||||
# last updated 29/1/12
|
||||
# last updated 15/4/12
|
||||
language = 'en_GB'
|
||||
oldest_article = 30
|
||||
max_articles_per_feed = 25
|
||||
@ -13,7 +14,23 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
auto_cleanup = True
|
||||
#articles_are_obfuscated = True
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('http://www.countryfile.com/')
|
||||
cov = soup.find(attrs={'class' : 'imagecache imagecache-160px_wide imagecache-linked imagecache-160px_wide_linked'})
|
||||
#print '******** ',cov,' ***'
|
||||
cov2 = str(cov)
|
||||
cov2=cov2[124:-90]
|
||||
#print '******** ',cov2,' ***'
|
||||
|
||||
# try to get cover - if can't get known cover
|
||||
br = browser()
|
||||
br.set_handle_redirect(False)
|
||||
try:
|
||||
br.open_novisit(cov2)
|
||||
cover_url = cov2
|
||||
except:
|
||||
cover_url = 'http://www.countryfile.com/sites/default/files/imagecache/160px_wide/cover/2_1.jpg'
|
||||
return cover_url
|
||||
remove_tags = [
|
||||
# dict(attrs={'class' : ['player']}),
|
||||
|
||||
|
@ -1,20 +1,21 @@
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
import mechanize
|
||||
class AdvancedUserRecipe1306061239(BasicNewsRecipe):
|
||||
title = u'The Daily Mirror'
|
||||
description = 'News as provide by The Daily Mirror -UK'
|
||||
|
||||
__author__ = 'Dave Asbury'
|
||||
# last updated 11/2/12
|
||||
# last updated 7/4/12
|
||||
language = 'en_GB'
|
||||
|
||||
cover_url = 'http://yookeo.com/screens/m/i/mirror.co.uk.jpg'
|
||||
#cover_url = 'http://yookeo.com/screens/m/i/mirror.co.uk.jpg'
|
||||
|
||||
masthead_url = 'http://www.nmauk.co.uk/nma/images/daily_mirror.gif'
|
||||
|
||||
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 5
|
||||
max_articles_per_feed = 10
|
||||
remove_empty_feeds = True
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
@ -75,3 +76,28 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
|
||||
img { display:block}
|
||||
'''
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('http://www.politicshome.com/uk/latest_frontpage.html')
|
||||
# look for the block containing the mirror button and url
|
||||
cov = soup.find(attrs={'style' : 'background-image: url(http://www.politicshome.com/images/sources/source_frontpage_button_92.gif);'})
|
||||
cov2 = str(cov)
|
||||
cov2='http://www.politicshome.com'+cov2[9:-142]
|
||||
#cov2 now contains url of the page containing pic
|
||||
soup = self.index_to_soup(cov2)
|
||||
cov = soup.find(attrs={'id' : 'large'})
|
||||
cov2 = str(cov)
|
||||
cov2=cov2[27:-18]
|
||||
#cov2 now is pic url, now go back to original function
|
||||
br = mechanize.Browser()
|
||||
br.set_handle_redirect(False)
|
||||
try:
|
||||
br.open_novisit(cov2)
|
||||
cover_url = cov2
|
||||
except:
|
||||
cover_url = 'http://yookeo.com/screens/m/i/mirror.co.uk.jpg'
|
||||
|
||||
#cover_url = cov2
|
||||
#cover_url = 'http://www.thesun.co.uk/img/global/new-masthead-logo.png'
|
||||
return cover_url
|
||||
|
||||
|
||||
|
@ -11,6 +11,7 @@ class Dobreprogramy_pl(BasicNewsRecipe):
|
||||
cover_url = 'http://userlogos.org/files/logos/Karmody/dobreprogramy_01.png'
|
||||
description = u'Aktualności i blogi z dobreprogramy.pl'
|
||||
encoding = 'utf-8'
|
||||
index='http://www.dobreprogramy.pl/'
|
||||
no_stylesheets = True
|
||||
language = 'pl'
|
||||
extra_css = '.title {font-size:22px;}'
|
||||
@ -22,3 +23,10 @@ class Dobreprogramy_pl(BasicNewsRecipe):
|
||||
#remove_tags = [dict(name='div', attrs={'class':['komentarze', 'block', 'portalInfo', 'menuBar', 'topBar']})]
|
||||
feeds = [(u'Aktualności', 'http://feeds.feedburner.com/dobreprogramy/Aktualnosci'),
|
||||
('Blogi', 'http://feeds.feedburner.com/dobreprogramy/BlogCzytelnikow')]
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for a in soup('a'):
|
||||
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||
a['href']=self.index + a['href']
|
||||
return soup
|
@ -7,6 +7,7 @@ class Dzieje(BasicNewsRecipe):
|
||||
cover_url = 'http://www.dzieje.pl/sites/default/files/dzieje_logo.png'
|
||||
category = 'history'
|
||||
language = 'pl'
|
||||
index='http://dzieje.pl'
|
||||
oldest_article = 8
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript=True
|
||||
@ -15,3 +16,10 @@ class Dzieje(BasicNewsRecipe):
|
||||
remove_tags_after= dict(id='dogory')
|
||||
remove_tags=[dict(id='dogory')]
|
||||
feeds = [(u'Dzieje', u'http://dzieje.pl/rss.xml')]
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for a in soup('a'):
|
||||
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||
a['href']=self.index + a['href']
|
||||
return soup
|
@ -21,3 +21,8 @@ class eioba(BasicNewsRecipe):
|
||||
(u'Rozrywka', u'http://www.eioba.pl/feed/categories/10.xml'),
|
||||
(u'Rożne', u'http://www.eioba.pl/feed/categories/9.xml')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
@ -7,6 +7,7 @@ class eMuzyka(BasicNewsRecipe):
|
||||
description = u'Emuzyka to największa i najpopularniejsza strona o muzyce w Polsce'
|
||||
category = 'music'
|
||||
language = 'pl'
|
||||
index='http://www.emuzyka.pl'
|
||||
cover_url='http://s.emuzyka.pl/img/emuzyka_invert_small.jpg'
|
||||
no_stylesheets = True
|
||||
oldest_article = 7
|
||||
@ -14,3 +15,9 @@ class eMuzyka(BasicNewsRecipe):
|
||||
keep_only_tags=[dict(name='div', attrs={'id':'news_container'}), dict(name='h3'), dict(name='div', attrs={'class':'review_text'})]
|
||||
remove_tags=[dict(name='span', attrs={'id':'date'})]
|
||||
feeds = [(u'Aktualno\u015bci', u'http://www.emuzyka.pl/rss.php?f=1'), (u'Recenzje', u'http://www.emuzyka.pl/rss.php?f=2')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for a in soup('a'):
|
||||
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||
a['href']=self.index + a['href']
|
||||
return soup
|
@ -7,7 +7,7 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
||||
# cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/373529_38324934806_64930243_n.jpg'
|
||||
masthead_url = 'http://www.fhm.com/App_Resources/Images/Site/re-design/logo.gif'
|
||||
__author__ = 'Dave Asbury'
|
||||
# last updated 17/3/12
|
||||
# last updated 14/4/12
|
||||
language = 'en_GB'
|
||||
oldest_article = 28
|
||||
max_articles_per_feed = 12
|
||||
@ -28,7 +28,8 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
||||
|
||||
#]
|
||||
feeds = [
|
||||
(u'From the Homepage',u'http://feed43.com/8053226782885416.xml'),
|
||||
(u'From the Homepage',u'http://feed43.com/0032328550253453.xml'),
|
||||
#http://feed43.com/8053226782885416.xml'),
|
||||
(u'Funny - The Very Best Of The Internet',u'http://feed43.com/4538510106331565.xml'),
|
||||
(u'Upgrade',u'http://feed43.com/0877305847443234.xml'),
|
||||
#(u'The Final Countdown', u'http://feed43.com/3576106158530118.xml'),
|
||||
|
@ -7,6 +7,7 @@ class Filmweb_pl(BasicNewsRecipe):
|
||||
cover_url = 'http://userlogos.org/files/logos/crudus/filmweb.png'
|
||||
category = 'movies'
|
||||
language = 'pl'
|
||||
index='http://www.filmweb.pl'
|
||||
oldest_article = 8
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets= True
|
||||
@ -39,3 +40,9 @@ class Filmweb_pl(BasicNewsRecipe):
|
||||
self.log.warn(skip_tag)
|
||||
return self.index_to_soup(skip_tag['href'], raw=True)
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for a in soup('a'):
|
||||
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||
a['href']=self.index + a['href']
|
||||
return soup
|
@ -1,39 +1,49 @@
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Forbes(BasicNewsRecipe):
|
||||
title = u'Forbes'
|
||||
description = 'Business and Financial News'
|
||||
__author__ = 'Darko Miletic'
|
||||
__author__ = 'Kovid Goyal'
|
||||
oldest_article = 30
|
||||
max_articles_per_feed = 100
|
||||
max_articles_per_feed = 20
|
||||
language = 'en'
|
||||
encoding = 'utf-8'
|
||||
recursions = 1
|
||||
|
||||
no_stylesheets = True
|
||||
html2lrf_options = ['--base-font-size', '10']
|
||||
|
||||
cover_url = u'http://www.forbes.com/media/current_covers/forbes_120_160.gif'
|
||||
|
||||
feeds = [(u'Latest', u'http://www.forbes.com/news/index.xml'),
|
||||
(u'Most Popular', u'http://www.forbes.com/feeds/popstories.xml'),
|
||||
(u'Most Emailed', u'http://www.forbes.com/feeds/mostemailed.xml'),
|
||||
(u'Faces', u'http://www.forbes.com/facesscan/index.xml'),
|
||||
(u'Technology', u'http://www.forbes.com/technology/index.xml'),
|
||||
(u'Personal Tech', u'http://www.forbes.com/personaltech/index.xml'),
|
||||
(u'Wireless', u'http://www.forbes.com/wireless/index.xml'),
|
||||
(u'Business', u'http://www.forbes.com/business/index.xml'),
|
||||
(u'Sports Money', u'http://www.forbes.com/sportsmoney/index.xml'),
|
||||
(u'Sports', u'http://www.forbes.com/forbeslife/sports/index.xml'),
|
||||
(u'Vehicles', u'http://www.forbes.com/forbeslife/vehicles/index.xml'),
|
||||
(u'Leadership', u'http://www.forbes.com/leadership/index.xml'),
|
||||
(u'Careers', u'http://www.forbes.com/leadership/careers/index.xml'),
|
||||
(u'Compensation', u'http://www.forbes.com/leadership/compensation/index.xml'),
|
||||
(u'Managing', u'http://www.forbes.com/leadership/managing/index.xml')]
|
||||
(u'Leadership', u'http://www.forbes.com/leadership/index.xml'),]
|
||||
|
||||
keep_only_tags = \
|
||||
{'class':lambda x: x and (set(x.split()) & {'body', 'pagination',
|
||||
'articleHead', 'article_head'})}
|
||||
remove_tags_before = {'name':'h1'}
|
||||
remove_tags = [
|
||||
{'class':['comment_bug', 'engagement_block',
|
||||
'video_promo_block', 'article_actions']},
|
||||
{'id':'comments'}
|
||||
]
|
||||
|
||||
def is_link_wanted(self, url, tag):
|
||||
ans = re.match(r'http://.*/[2-9]/', url) is not None
|
||||
if ans:
|
||||
self.log('Following multipage link: %s'%url)
|
||||
return ans
|
||||
|
||||
def postprocess_html(self, soup, first_fetch):
|
||||
for pag in soup.findAll(True, 'pagination'):
|
||||
pag.extract()
|
||||
if not first_fetch:
|
||||
h1 = soup.find('h1')
|
||||
if h1 is not None:
|
||||
h1.extract()
|
||||
return soup
|
||||
|
||||
def print_version(self, url):
|
||||
raw = self.browser.open(url).read()
|
||||
soup = BeautifulSoup(raw.decode('latin1', 'replace'))
|
||||
print_link = soup.find('a', {'onclick':"s_linkTrackVars='prop18';s_linkType='o';s_linkName='Print';if(typeof(globalPageName)!='undefined')s_prop18=globalPageName;s_lnk=s_co(this);s_gs(s_account);"})
|
||||
if print_link is None:
|
||||
return ''
|
||||
return 'http://www.forbes.com' + print_link['href']
|
16
recipes/fotoblogia_pl.recipe
Normal file
16
recipes/fotoblogia_pl.recipe
Normal file
@ -0,0 +1,16 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Fotoblogia_pl(BasicNewsRecipe):
|
||||
title = u'Fotoblogia.pl'
|
||||
__author__ = 'fenuks'
|
||||
category = 'photography'
|
||||
language = 'pl'
|
||||
masthead_url = 'http://img.interia.pl/komputery/nimg/u/0/fotoblogia21.jpg'
|
||||
cover_url= 'http://fotoblogia.pl/images/2009/03/fotoblogia2.jpg'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
keep_only_tags=[dict(name='div', attrs={'class':'post-view post-standard'})]
|
||||
remove_tags=[dict(attrs={'class':['external fotoblogia', 'categories', 'tags']})]
|
||||
feeds = [(u'Wszystko', u'http://fotoblogia.pl/feed/rss2')]
|
@ -6,12 +6,14 @@ class Gameplay_pl(BasicNewsRecipe):
|
||||
description = u'gameplay.pl - serwis o naszych zainteresowaniach, grach, filmach, książkach, muzyce, fotografii i konsolach.'
|
||||
category = 'games, movies, books, music'
|
||||
language = 'pl'
|
||||
index='http://gameplay.pl'
|
||||
masthead_url= 'http://gameplay.pl/img/gpy_top_logo.png'
|
||||
cover_url= 'http://gameplay.pl/img/gpy_top_logo.png'
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript= True
|
||||
no_stylesheets= True
|
||||
keep_only_tags=[dict(name='div', attrs={'class':['news_endpage_tit', 'news']})]
|
||||
remove_tags=[dict(name='div', attrs={'class':['galeria', 'noedit center im']})]
|
||||
remove_tags=[dict(name='div', attrs={'class':['galeria', 'noedit center im', 'news_list', 'news_list_autor', 'stop_bot', 'tagi']}), dict(attrs={'usemap':'#map'})]
|
||||
feeds = [(u'Wiadomo\u015bci', u'http://gameplay.pl/rss/')]
|
||||
|
||||
def image_url_processor(self, baseurl, url):
|
||||
@ -19,3 +21,9 @@ class Gameplay_pl(BasicNewsRecipe):
|
||||
return 'http://gameplay.pl'+ url[2:]
|
||||
else:
|
||||
return url
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for a in soup('a'):
|
||||
if a.has_key('href') and '../' in a['href']:
|
||||
a['href']=self.index + a['href'][2:]
|
||||
return soup
|
@ -9,6 +9,7 @@ class Gildia(BasicNewsRecipe):
|
||||
language = 'pl'
|
||||
oldest_article = 8
|
||||
max_articles_per_feed = 100
|
||||
remove_empty_feeds=True
|
||||
no_stylesheets=True
|
||||
remove_tags=[dict(name='div', attrs={'class':'backlink'}), dict(name='div', attrs={'class':'im_img'}), dict(name='div', attrs={'class':'addthis_toolbox addthis_default_style'})]
|
||||
keep_only_tags=dict(name='div', attrs={'class':'widetext'})
|
||||
@ -24,3 +25,16 @@ class Gildia(BasicNewsRecipe):
|
||||
self.log.warn('odnosnik')
|
||||
self.log.warn(link['href'])
|
||||
return self.index_to_soup(link['href'], raw=True)
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for a in soup('a'):
|
||||
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||
if '/gry/' in a['href']:
|
||||
a['href']='http://www.gry.gildia.pl' + a['href']
|
||||
elif u'książk' in soup.title.string.lower() or u'komiks' in soup.title.string.lower():
|
||||
a['href']='http://www.literatura.gildia.pl' + a['href']
|
||||
elif u'komiks' in soup.title.string.lower():
|
||||
a['href']='http://www.literatura.gildia.pl' + a['href']
|
||||
else:
|
||||
a['href']='http://www.gildia.pl' + a['href']
|
||||
return soup
|
||||
|
@ -7,6 +7,7 @@ class Gram_pl(BasicNewsRecipe):
|
||||
category = 'games'
|
||||
language = 'pl'
|
||||
oldest_article = 8
|
||||
index='http://www.gram.pl'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets= True
|
||||
extra_css = 'h2 {font-style: italic; font-size:20px;} .picbox div {float: left;}'
|
||||
@ -52,4 +53,7 @@ class Gram_pl(BasicNewsRecipe):
|
||||
tag=soup.findAll(name='div', attrs={'class':'picbox'})
|
||||
for t in tag:
|
||||
t['style']='float: left;'
|
||||
for a in soup('a'):
|
||||
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||
a['href']=self.index + a['href']
|
||||
return soup
|
@ -59,6 +59,7 @@ class heiseDe(BasicNewsRecipe):
|
||||
dict(name='span', attrs={'class':'rsaquo'}),
|
||||
dict(name='div', attrs={'class':'news_logo'}),
|
||||
dict(name='div', attrs={'class':'bcadv ISI_IGNORE'}),
|
||||
dict(name='div', attrs={'class':'navi_top_container'}),
|
||||
dict(name='p', attrs={'class':'news_option'}),
|
||||
dict(name='p', attrs={'class':'news_navi'}),
|
||||
dict(name='div', attrs={'class':'news_foren'})]
|
||||
@ -69,3 +70,5 @@ class heiseDe(BasicNewsRecipe):
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
20
recipes/historia_news.recipe
Normal file
20
recipes/historia_news.recipe
Normal file
@ -0,0 +1,20 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class historia_news(BasicNewsRecipe):
|
||||
title = u'historia-news'
|
||||
__author__ = 'fenuks'
|
||||
description = u'Historia-news to portal dla ludzi kochających historię. Najnowsze newsy z historii bliższej i dalszej, archeologii, paleontologii oraz ciekawostki i podcasty z historii kultury, sportu, motoryzacji i inne.'
|
||||
masthead_url = 'http://historia-news.pl/templates/hajak4/images/header.jpg'
|
||||
cover_url= 'http://www.historia-news.pl/templates/hajak4/images/header.jpg'
|
||||
category = 'history'
|
||||
language = 'pl'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
remove_empty_feeds = True
|
||||
remove_tags=[dict(name='form'), dict(name='img', attrs={'alt':'Print'}), dict(attrs={'class':['commbutt', 'cpr']}), dict(id=['plusone', 'facebook'])]
|
||||
feeds = [(u'Wiadomo\u015bci', u'http://historia-news.pl/wiadomoci.feed?type=rss'), (u'Artyku\u0142y', u'http://historia-news.pl/artykuy.feed?type=rss')]
|
||||
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '?tmpl=component&print=1&layout=default&page='
|
BIN
recipes/icons/fotoblogia_pl.png
Normal file
BIN
recipes/icons/fotoblogia_pl.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 379 B |
BIN
recipes/icons/historia_news.png
Normal file
BIN
recipes/icons/historia_news.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 833 B |
BIN
recipes/icons/swiat_obrazu.png
Normal file
BIN
recipes/icons/swiat_obrazu.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 1006 B |
@ -8,6 +8,7 @@ class in4(BasicNewsRecipe):
|
||||
description = u'Serwis Informacyjny - Aktualnosci, recenzje'
|
||||
category = 'IT'
|
||||
language = 'pl'
|
||||
index='http://www.in4.pl/'
|
||||
#cover_url= 'http://www.in4.pl/recenzje/337/in4pl.jpg'
|
||||
no_stylesheets = True
|
||||
remove_empty_feeds = True
|
||||
@ -39,6 +40,7 @@ class in4(BasicNewsRecipe):
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
self.append_page(soup, soup.body)
|
||||
for a in soup('a'):
|
||||
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||
a['href']=self.index + a['href']
|
||||
return soup
|
||||
|
||||
|
||||
|
@ -8,6 +8,7 @@ class INFRA(BasicNewsRecipe):
|
||||
description = u'Serwis Informacyjny INFRA - UFO, Zjawiska Paranormalne, Duchy, Tajemnice świata.'
|
||||
cover_url = 'http://npn.nazwa.pl/templates/ja_teline_ii/images/logo.jpg'
|
||||
category = 'UFO'
|
||||
index='http://infra.org.pl'
|
||||
language = 'pl'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheers=True
|
||||
@ -15,3 +16,11 @@ class INFRA(BasicNewsRecipe):
|
||||
remove_tags_after=dict(attrs={'class':'pagenav'})
|
||||
remove_tags=[dict(attrs={'class':'pagenav'})]
|
||||
feeds = [(u'Najnowsze wiadomo\u015bci', u'http://www.infra.org.pl/index.php?option=com_rd_rss&id=1')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for a in soup('a'):
|
||||
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||
a['href']=self.index + a['href']
|
||||
return soup
|
34
recipes/jakarta_globe.recipe
Normal file
34
recipes/jakarta_globe.recipe
Normal file
@ -0,0 +1,34 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class JakartaGlobe(BasicNewsRecipe):
|
||||
title = u'Jakarta Globe'
|
||||
oldest_article = 3
|
||||
max_articles_per_feed = 100
|
||||
|
||||
feeds = [
|
||||
(u'News', u'http://www.thejakartaglobe.com/pages/getrss/getrss-news.php'),
|
||||
(u'Business', u'http://www.thejakartaglobe.com/pages/getrss/getrss-business.php'),
|
||||
(u'Technology', u'http://www.thejakartaglobe.com/pages/getrss/getrss-tech.php'),
|
||||
(u'My Jakarta', u'http://www.thejakartaglobe.com/pages/getrss/getrss-myjakarta.php'),
|
||||
(u'International', u'http://www.thejakartaglobe.com/pages/getrss/getrss-international.php'),
|
||||
(u'Life and Times', u'http://www.thejakartaglobe.com/pages/getrss/getrss-lifeandtimes.php'),
|
||||
]
|
||||
__author__ = 'rty'
|
||||
pubisher = 'JakartaGlobe.com'
|
||||
description = 'JakartaGlobe, Indonesia, Newspaper'
|
||||
category = 'News, Indonesia'
|
||||
|
||||
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
language = 'en_ID'
|
||||
encoding = 'utf-8'
|
||||
conversion_options = {'linearize_tables':True}
|
||||
masthead_url = 'http://www.thejakartaglobe.com/pages/2010/images/jak-globe-logo.jpg'
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'story'}),
|
||||
dict(name='span', attrs={'class':'headline'}),
|
||||
dict(name='div', attrs={'class':'story'}),
|
||||
dict(name='p', attrs={'id':'bodytext'})
|
||||
]
|
@ -1,5 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
|
||||
class Konflikty(BasicNewsRecipe):
|
||||
title = u'Konflikty Zbrojne'
|
||||
@ -10,6 +11,23 @@ class Konflikty(BasicNewsRecipe):
|
||||
category='military, history'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
no_stylesheets = True
|
||||
keep_only_tags=[dict(attrs={'class':['title1', 'image']}), dict(id='body')]
|
||||
|
||||
feeds = [(u'Aktualności', u'http://www.konflikty.pl/rss_aktualnosci_10.xml'), (u'Artyku\u0142y', u'http://www.konflikty.pl/rss_artykuly_10.xml'), (u'Relacje', u'http://www.konflikty.pl/rss_relacje_10.xml'), (u'Recenzje', u'http://www.konflikty.pl/rss_recenzje_10.xml')]
|
||||
feeds = [(u'Aktualności', u'http://www.konflikty.pl/rss_aktualnosci_10.xml'),
|
||||
(u'Artyku\u0142y', u'http://www.konflikty.pl/rss_artykuly_10.xml'),
|
||||
(u'Historia', u'http://www.konflikty.pl/rss_historia_10.xml'),
|
||||
(u'Militaria', u'http://www.konflikty.pl/rss_militaria_10.xml'),
|
||||
(u'Relacje', u'http://www.konflikty.pl/rss_relacje_10.xml'),
|
||||
(u'Recenzje', u'http://www.konflikty.pl/rss_recenzje_10.xml'),
|
||||
(u'Teksty źródłowe', u'http://www.konflikty.pl/rss_tekstyzrodlowe_10.xml')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for image in soup.findAll(name='a', attrs={'class':'image'}):
|
||||
if image.img and image.img.has_key('alt'):
|
||||
image.name='div'
|
||||
pos = len(image.contents)
|
||||
image.insert(pos, BeautifulSoup('<p style="font-style:italic;">'+image.img['alt']+'</p>'))
|
||||
return soup
|
||||
|
12
recipes/liberatorio_politico.recipe
Normal file
12
recipes/liberatorio_politico.recipe
Normal file
@ -0,0 +1,12 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1334649829(BasicNewsRecipe):
|
||||
title = u'Liberatorio Politico'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
masthead_url = 'http://liberatorio.altervista.org/wp-content/uploads/2012/01/Testata-LIBERATORIO-Altervista1.jpg'
|
||||
feeds = [(u'Liberatorio Politico', u'http://liberatorio.altervista.org/feed/')]
|
||||
__author__ = 'faber1971'
|
||||
description = 'Inquiry journalism - a blog on Molfetta, Land of Bari, Apulia and Italy - v1.00 (07, April 2012)'
|
||||
language = 'it'
|
50
recipes/limes.recipe
Normal file
50
recipes/limes.recipe
Normal file
@ -0,0 +1,50 @@
|
||||
#!/usr/bin/env python
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, faber1971'
|
||||
__version__ = 'v1.00'
|
||||
__date__ = '16, April 2012'
|
||||
__description__ = 'Geopolitical Italian magazine'
|
||||
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Limes(BasicNewsRecipe):
|
||||
description = 'Italian weekly magazine'
|
||||
__author__ = 'faber1971'
|
||||
|
||||
cover_url = 'http://temi.repubblica.it/UserFiles/limes/Image/Loghi/logo-limes.gif'
|
||||
title = 'Limes'
|
||||
category = 'Geopolitical news'
|
||||
|
||||
language = 'it'
|
||||
# encoding = 'cp1252'
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
|
||||
oldest_article = 16
|
||||
max_articles_per_feed = 100
|
||||
use_embedded_content = False
|
||||
recursion = 10
|
||||
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
masthead_url = 'http://temi.repubblica.it/UserFiles/limes/Image/Loghi/logo-limes.gif'
|
||||
|
||||
feeds = [
|
||||
(u'Limes', u'http://temi.repubblica.it/limes/feed/')
|
||||
]
|
||||
|
||||
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':['testo','copertina','occhiello','firma','didascalia','content-second-right','detail-articles','titolo-local','generic-articles']}),
|
||||
dict(name='div', attrs={'class':['generic-articles','summary','detail-articles']}),
|
||||
dict(name='div', attrs={'id':['content-second-right','content2']})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div',attrs={'class':['servizi','aggiungi','label-web','bottom-mobile','box-abbonamenti','box-cerca','big','little','stampaweb']}),
|
||||
dict(name='div',attrs={'id':['topheader','header','navigation-new','navigation','content-second-left','menutext']}),
|
||||
dict(name='ul',attrs={'id':'user-utility'}),
|
||||
dict(name=['script','noscript','iframe'])
|
||||
]
|
||||
|
@ -3,25 +3,6 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
from calibre.utils.magick import Image
|
||||
from BeautifulSoup import BeautifulSoup
|
||||
try:
|
||||
from calibre_plugins.drMerry.debug import debuglogger as mlog
|
||||
print 'drMerry debuglogger found, debug options can be used'
|
||||
from calibre_plugins.drMerry.stats import statslogger as mstat
|
||||
print 'drMerry stats tracker found, stat can be tracked'
|
||||
mlog.setLoglevel(1) #-1 == no log; 0 for normal output
|
||||
mstat.calculateStats(False) #track stats (to track stats loglevel must be > 0
|
||||
KEEPSTATS = mstat.keepmystats()
|
||||
SHOWDEBUG0 = mlog.showdebuglevel(0)
|
||||
SHOWDEBUG1 = mlog.showdebuglevel(1)
|
||||
SHOWDEBUG2 = mlog.showdebuglevel(2)
|
||||
except:
|
||||
#print 'drMerry debuglogger not found, skipping debug options'
|
||||
SHOWDEBUG0 = False
|
||||
SHOWDEBUG1 = False
|
||||
SHOWDEBUG2 = False
|
||||
KEEPSTATS = False
|
||||
|
||||
#print ('level0: %s\nlevel1: %s\nlevel2: %s' % (SHOWDEBUG0,SHOWDEBUG1,SHOWDEBUG2))
|
||||
|
||||
''' Version 1.2, updated cover image to match the changed website.
|
||||
added info date on title
|
||||
@ -43,6 +24,9 @@ except:
|
||||
extended timeout from 2 to 10
|
||||
changed oldest article from 10 to 1.2
|
||||
changed max articles from 15 to 25
|
||||
Version 1.9.1 18-04-2012
|
||||
removed some debug settings
|
||||
updated code to match new metro-layout
|
||||
'''
|
||||
|
||||
class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||
@ -70,34 +54,40 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||
'author_sort' : 'Metro Nederland & calibre & DrMerry',
|
||||
'publisher' : 'DrMerry/Metro Nederland'
|
||||
}
|
||||
extra_css = 'body {padding:5px 0px; background:#fff;font-size: 13px;}\
|
||||
#date, div.share-and-byline div.byline div.text div.title, div.share-and-byline div.byline div.text div.name {clear: both;margin-bottom: 10px;font-size:0.5em; color: #616262;}\
|
||||
.article-box-fact.module-title {clear:both;padding: 8px 0;color: #24763b;font-family: arial, sans-serif;font-size: 14px;font-weight: bold;}\
|
||||
h1.title {color: #000000;font-size: 44px;padding-bottom: 10px;font-weight: 300;} h2.subtitle {font-size: 13px;font-weight: 700;padding-bottom: 10px;}\
|
||||
.article-body p{padding-bottom:10px;}div.column-1-3{margin-left: 19px;padding-right: 9px;}\
|
||||
div.column-1-2 {display: inline;padding-right: 7px;}\
|
||||
p.article-image-caption {font-size: 12px;font-weight: 300;color: #616262;margin-top: 5px;} \
|
||||
p.article-image-caption .credits {font-style: italic;font-size: 10px;}\
|
||||
div.article-image-caption {width: 246px;margin-bottom: 5px;margin-left: 10px;}\
|
||||
div.article-image-caption-2column {margin-bottom: 10px;width: 373px;} div.article-image-caption-3column {}\
|
||||
img {border:0px; padding:2px;} hr.merryhr {width:30%; border-width:0px; color:green; margin-left:5px; background-color: green} div.column-3 {background-color:#eee; width:50%; margin:2px; float:right; padding:2px;} div.column-3 module-title {border: 1px solid #aaa} div.article-box-fact div.subtitle {font-weight:bold; color:green;}'
|
||||
extra_css = 'body {padding:5px 0; background-color:#fff;font-size: 1em}\
|
||||
#date, div.share-and-byline div.byline div.text div.title, div.share-and-byline div.byline div.text div.name {margin-bottom: 10px}\
|
||||
#date, div.share-and-byline div.byline div.text div.title, div.share-and-byline div.byline div.text div.name, p.article-image-caption .credits {font-size:0.5em}\
|
||||
.article-box-fact.module-title, #date, div.share-and-byline div.byline div.text div.title, div.share-and-byline div.byline div.text div.name {clear:both}\
|
||||
.article-box-fact.module-title {padding: 8px 0}\
|
||||
h1.title {color: #000;font-size: 1.4em}\
|
||||
.article-box-fact.module-title, h2.subtitle {font-size: 1.2em}\
|
||||
h1.title, h2.subtitle, .article-body p{padding-bottom:10px}\
|
||||
h1.title, p.article-image-caption {font-weight: 300}\
|
||||
div.column-1-3{margin-left: 19px;padding-right: 9px}\
|
||||
div.column-1-2 {display: inline;padding-right: 7px}\
|
||||
p.article-image-caption {font-size: 0.6em;margin-top: 5px}\
|
||||
p.article-image-caption, #date, div.share-and-byline div.byline div.text div.title, div.share-and-byline div.byline div.text div.name {color: #616262}\
|
||||
p.article-image-caption .credits {font-style: italic}\
|
||||
div.article-image-caption {width: 246px;margin: 5px}\
|
||||
div.article-image-caption-2column {width: 373px}\
|
||||
div.article-image-caption-2column, div.article-image-caption-3column {margin-bottom: 5px}\
|
||||
img {border:0}\
|
||||
img, div.column-3 {padding:2px}\
|
||||
hr.merryhr {width:30%; border-width:0; margin-left:5px; background-color: #24763b}\
|
||||
div.column-3 {background-color:#eee; width:50%; margin:2px; float:right}\
|
||||
div.column-3 module-title {border: 1px solid #aaa}\
|
||||
div.article-box-fact div.subtitle, .article-box-fact.module-title, h2.subtitle {font-weight:bold}\
|
||||
div.article-box-fact div.subtitle, hr.merryhr, .article-box-fact.module-title {color: #24763b}'
|
||||
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'<img[^>]+top-line[^>]+>', re.DOTALL|re.IGNORECASE),
|
||||
lambda match: '<hr class="merryhr" />'),
|
||||
(re.compile(r'(<img[^>]+metronieuws\.nl/[^>]+/templates/[^>]+jpe?g[^>]+>|metronieuws\.nl/internal\-roxen\-unit\.gif)', re.DOTALL|re.IGNORECASE),
|
||||
(re.compile(r'<img[^>]+(metronieuws\.nl/[^>]+/templates/[^>]+jpe?g|metronieuws\.nl/internal\-roxen\-unit\.gif)[^>]+>', re.DOTALL|re.IGNORECASE),
|
||||
lambda match: ''),
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
if SHOWDEBUG0 == True:
|
||||
mlog.setdefaults()
|
||||
mlog.addTextAndTag(['Show debug = on with level'], [str(mlog.debuglevel)])
|
||||
if KEEPSTATS == True:
|
||||
mlog.addDebug('Stats will be calculated')
|
||||
else:
|
||||
mlog.addTextAndTag(['Stats won\'t be calculated\nTo be enabled, stats must be true, currently','and debug level must be 1 or higher, currently'],[mstat.dokeepmystats, mlog.debuglevel])
|
||||
mlog.showDebug()
|
||||
myProcess = MerryProcess()
|
||||
myProcess.removeUnwantedTags(soup)
|
||||
return soup
|
||||
@ -105,18 +95,6 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||
def postprocess_html(self, soup, first):
|
||||
myProcess = MerryProcess()
|
||||
myProcess.optimizeLayout(soup)
|
||||
if SHOWDEBUG0 == True:
|
||||
if KEEPSTATS == True:
|
||||
statinfo = 'generated stats:'
|
||||
statinfo += str(mstat.stats(mstat.statslist))
|
||||
print statinfo
|
||||
statinfo = 'generated stats (for removed tags):'
|
||||
statinfo += str(mstat.stats(mstat.removedtagslist))
|
||||
print statinfo
|
||||
#show all Debug info we forgot to report
|
||||
#Using print to be sure that this text will not be added at the end of the log.
|
||||
print '\n!!!!!unreported messages:\n(should be empty)\n'
|
||||
mlog.showDebug()
|
||||
return soup
|
||||
|
||||
feeds = [
|
||||
@ -142,44 +120,24 @@ class MerryPreProcess():
|
||||
return soup
|
||||
|
||||
def optimizePicture(self,soup):
|
||||
if SHOWDEBUG0 == True:
|
||||
mlog.addDebug('start image optimize')
|
||||
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
|
||||
iurl = tag['src']
|
||||
img = Image()
|
||||
img.open(iurl)
|
||||
img.trim(0)
|
||||
img.save(iurl)
|
||||
if SHOWDEBUG0 == True:
|
||||
mlog.addDebug('Images optimized')
|
||||
mlog.showDebug()
|
||||
return soup
|
||||
|
||||
class MerryExtract():
|
||||
def safeRemovePart(self, killingSoup, soupIsArray):
|
||||
if killingSoup and not killingSoup == None:
|
||||
if SHOWDEBUG2 == True:
|
||||
mlog.addTextAndTag(['items to remove'],[killingSoup])
|
||||
try:
|
||||
if soupIsArray == True:
|
||||
for killer in killingSoup:
|
||||
killer.extract()
|
||||
else:
|
||||
killingSoup.extract()
|
||||
if SHOWDEBUG1 == True:
|
||||
mlog.addDebug('tag extracted')
|
||||
mlog.showDebug()
|
||||
if KEEPSTATS == True:
|
||||
try:
|
||||
mstat.addstat(mstat.removedtagslist,str(killingSoup.name))
|
||||
except:
|
||||
mstat.addstat(mstat.removedtagslist,'unknown')
|
||||
except:
|
||||
if SHOWDEBUG1 == True:
|
||||
mlog.addDebug('tag extraction failed')
|
||||
mlog.showDebug()
|
||||
if KEEPSTATS == True:
|
||||
mstat.addstat(mstat.removedtagslist,'exception')
|
||||
return False
|
||||
else:
|
||||
return False
|
||||
@ -230,60 +188,26 @@ class MerryProcess(BeautifulSoup):
|
||||
|
||||
def optimizeLayout(self,soup):
|
||||
self.myPrepare.optimizePicture(soup)
|
||||
if SHOWDEBUG0 == True:
|
||||
mlog.addDebug('End of Optimize Layout')
|
||||
mlog.showDebug()
|
||||
return soup
|
||||
|
||||
def insertFacts(self, soup):
|
||||
allfacts = soup.findAll('div', {'class':re.compile('^article-box-fact.*$')})
|
||||
if SHOWDEBUG0 == True:
|
||||
mlog.addTextAndTag(['allfacts'],[allfacts])
|
||||
mlog.showDebug()
|
||||
if allfacts and not allfacts == None:
|
||||
allfactsparent = soup.find('div', {'class':re.compile('^article-box-fact.*$')}).parent
|
||||
if SHOWDEBUG0 == True:
|
||||
mlog.addTextAndTag(['allfactsparent'],[allfactsparent])
|
||||
mlog.showDebug()
|
||||
for part in allfactsparent:
|
||||
if not part in allfacts:
|
||||
if SHOWDEBUG0 == True:
|
||||
mlog.addTextAndTag(['FOUND A non-fact'],[part])
|
||||
mlog.showDebug()
|
||||
self.myKiller.safeRemovePart(part, True)
|
||||
if SHOWDEBUG1 == True:
|
||||
mlog.addTextAndTag(['New All Facts'],[allfacts])
|
||||
mlog.showDebug()
|
||||
articlefacts = soup.find('div', {'class':'article-box-fact column'})
|
||||
errorOccured=False
|
||||
if (articlefacts and not articlefacts==None):
|
||||
try:
|
||||
contenttag = soup.find('div', {'class':'article-body'})
|
||||
if SHOWDEBUG0 == True:
|
||||
mlog.addTextAndTag(['curcontag'],[contenttag])
|
||||
mlog.showDebug()
|
||||
foundrighttag = False
|
||||
if contenttag and not contenttag == None:
|
||||
foundrighttag = True
|
||||
if SHOWDEBUG0 == True:
|
||||
if errorOccured == False:
|
||||
mlog.addTextAndTag(['type','curcontag (in while)'],[type(contenttag),contenttag])
|
||||
else:
|
||||
mlog.addDebug('Could not find right parent tag. Error Occured')
|
||||
mlog.showDebug()
|
||||
if foundrighttag == True:
|
||||
contenttag.insert(0, allfactsparent)
|
||||
if SHOWDEBUG2 == True:
|
||||
mlog.addTextAndTag(['added parent'],[soup.prettify()])
|
||||
mlog.showDebug()
|
||||
except:
|
||||
errorOccured=True
|
||||
mlog.addTrace()
|
||||
else:
|
||||
errorOccured=True
|
||||
if SHOWDEBUG0 == True and errorOccured == True:
|
||||
mlog.addTextAndTag(['no articlefacts'],[articlefacts])
|
||||
mlog.showDebug()
|
||||
pass
|
||||
return soup
|
||||
|
||||
def previousNextSibRemover(self, soup, previous=True, soupIsArray=False):
|
||||
@ -300,71 +224,38 @@ class MerryProcess(BeautifulSoup):
|
||||
sibs = findsibsof.nextSiblingGenerator()
|
||||
for sib in sibs:
|
||||
self.myKiller.safeRemovePart(sib, True)
|
||||
else:
|
||||
if SHOWDEBUG1 == True:
|
||||
mlog.addDebug('Not any sib found')
|
||||
return
|
||||
|
||||
def removeUnwantedTags(self,soup):
|
||||
if SHOWDEBUG1 == True:
|
||||
mlog.addTextAndTag(['Len of Soup before RemoveTagsByName'],[len(str(soup))])
|
||||
mlog.showDebug()
|
||||
self.removeTagsByName(soup)
|
||||
if SHOWDEBUG1 == True:
|
||||
mlog.addDebug('Len of Soup before firstandlastpart: %s' % len(str(soup)))
|
||||
mlog.showDebug()
|
||||
self.insertFacts(soup)
|
||||
self.removeFirstAndLastPart(soup)
|
||||
if SHOWDEBUG1 == True:
|
||||
mlog.addDebug('Len of Soup before unwantedpart: %s' % len(str(soup)))
|
||||
mlog.showDebug()
|
||||
self.removeUnwantedParts(soup)
|
||||
if SHOWDEBUG1 == True:
|
||||
mlog.addDebug('Len of Soup before EmptyParts: %s' % len(str(soup)))
|
||||
mlog.showDebug()
|
||||
self.removeEmptyTags(soup)
|
||||
if SHOWDEBUG1 == True:
|
||||
mlog.addDebug('Len of Soup after EmptyParts: %s' % len(str(soup)))
|
||||
mlog.showDebug()
|
||||
self.myReplacer.replaceATag(soup)
|
||||
return soup
|
||||
|
||||
def removeUnwantedParts(self, soup):
|
||||
if SHOWDEBUG1 == True:
|
||||
mlog.addDebug('Len of Soup before UnwantedID: %s' % len(str(soup)))
|
||||
mlog.showDebug()
|
||||
self.removeUnwantedTagsByID(soup)
|
||||
if SHOWDEBUG1 == True:
|
||||
mlog.addDebug('Len of Soup before Class: %s' % len(str(soup)))
|
||||
mlog.showDebug()
|
||||
self.removeUnwantedTagsByClass(soup)
|
||||
if SHOWDEBUG1 == True:
|
||||
mlog.addDebug('Len of Soup before Style: %s' % len(str(soup)))
|
||||
mlog.showDebug()
|
||||
self.removeUnwantedTagsByStyle(soup)
|
||||
return soup
|
||||
|
||||
def removeUnwantedTagsByStyle(self,soup):
|
||||
self.removeArrayOfTags(soup.findAll(attrs={'style' : re.compile("^(.*(display\s?:\s?none|img-mask|white)\s?;?.*)$")}))
|
||||
if SHOWDEBUG0 == True:
|
||||
mlog.addDebug('end remove by style')
|
||||
self.removeArrayOfTags(soup.findAll(attrs={'style':re.compile("^(.*(display\s?:\s?none|img-mask|white)\s?;?.*)$")}))
|
||||
self.removeArrayOfTags(soup.findAll(attrs={'title':'volledig scherm'}))
|
||||
return soup
|
||||
|
||||
def removeArrayOfTags(self,souparray):
|
||||
return self.myKiller.safeRemovePart(souparray, True)
|
||||
|
||||
def removeUnwantedTagsByClass(self,soup):
|
||||
if SHOWDEBUG0 == True:
|
||||
mlog.addDebug('start remove by class')
|
||||
self.removeArrayOfTags(soup.findAll("div", { "class" :re.compile('^(promo.*?|article-tools-below-title|metroCommentFormWrap|ad|share-tools|tools|header-links|related-links|padding-top-15)$')}))
|
||||
self.removeArrayOfTags(soup.findAll("div", { "class" :re.compile('^(promo.*?|share-tools-top|share-tools-bottom|article-tools-below-title|metroCommentFormWrap|ad|share-tools|tools|header-links|related-links|padding-top-15|footer-[a-zA-Z0-9]+)$')}))
|
||||
return soup
|
||||
|
||||
def removeUnwantedTagsByID(self,soup):
|
||||
defaultids = ['footer-extra',re.compile('^ad(\d+|adcomp.*?)?$'),'column-4-5','navigation','header',re.compile('^column-1-5-(top|bottom)$'),'footer','hidden_div','sidebar',re.compile('^article-\d$'),'comments','footer']
|
||||
defaultids = ['footer-extra',re.compile('^ad(\d+|adcomp.*?)?$'),'column-4-5','navigation','header',re.compile('^column-1-5-(top|bottom)$'),'footer','hidden_div','sidebar',re.compile('^article-\d$'),'comments','footer','gallery-1']
|
||||
for removeid in defaultids:
|
||||
if SHOWDEBUG1 == True:
|
||||
mlog.addDebug('RemoveTagByID, tag: %s, Len of Soup: %s' % (str(removeid), len(str(soup))))
|
||||
mlog.showDebug()
|
||||
self.removeArrayOfTags(soup.findAll(id=removeid))
|
||||
return soup
|
||||
|
||||
@ -380,33 +271,12 @@ class MerryProcess(BeautifulSoup):
|
||||
return soup
|
||||
|
||||
def removeEmptyTags(self,soup,run=0):
|
||||
if SHOWDEBUG0 == True:
|
||||
mlog.addDebug('starting removeEmptyTags')
|
||||
if SHOWDEBUG1 == True:
|
||||
run += 1
|
||||
mlog.addDebug(run)
|
||||
if SHOWDEBUG2 == True:
|
||||
mlog.addDebug(str(soup.prettify()))
|
||||
mlog.showDebug()
|
||||
emptymatches = re.compile('^( |\s|\n|\r|\t)*$')
|
||||
emptytags = soup.findAll(lambda tag: tag.find(True) is None and (tag.string is None or tag.string.strip()=="" or tag.string.strip()==emptymatches) and not tag.isSelfClosing)
|
||||
if emptytags and not (emptytags == None or emptytags == []):
|
||||
if SHOWDEBUG1 == True:
|
||||
mlog.addDebug('tags found')
|
||||
mlog.addDebug(str(emptytags))
|
||||
self.removeArrayOfTags(emptytags)
|
||||
#recursive in case removing empty tag creates new empty tag
|
||||
self.removeEmptyTags(soup, run=run)
|
||||
else:
|
||||
if SHOWDEBUG1 == True:
|
||||
mlog.addDebug('no empty tags found')
|
||||
mlog.showDebug()
|
||||
if SHOWDEBUG0 == True:
|
||||
if SHOWDEBUG2 == True:
|
||||
mlog.addDebug('new soup:')
|
||||
mlog.addDebug(str(soup.prettify()))
|
||||
mlog.addDebug('RemoveEmptyTags Completed')
|
||||
mlog.showDebug()
|
||||
return soup
|
||||
|
||||
def removeFirstAndLastPart(self,soup):
|
||||
|
@ -1,52 +1,30 @@
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||
title = u'Metro UK'
|
||||
description = 'News as provide by The Metro -UK'
|
||||
|
||||
#timefmt = ''
|
||||
__author__ = 'Dave Asbury'
|
||||
#last update 3/12/11
|
||||
cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/276636_117118184990145_2132092232_n.jpg'
|
||||
no_stylesheets = True
|
||||
#no_stylesheets = True
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 20
|
||||
max_articles_per_feed = 10
|
||||
remove_empty_feeds = True
|
||||
remove_javascript = True
|
||||
auto_cleanup = True
|
||||
|
||||
#preprocess_regexps = [(re.compile(r'Tweet'), lambda a : '')]
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'<span class="img-cap legend">', re.IGNORECASE | re.DOTALL), lambda match: '<p></p><span class="img-cap legend"> ')]
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'tweet', re.IGNORECASE | re.DOTALL), lambda match: '')]
|
||||
|
||||
language = 'en_GB'
|
||||
|
||||
|
||||
masthead_url = 'http://e-edition.metro.co.uk/images/metro_logo.gif'
|
||||
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='h1'),dict(name='h2', attrs={'class':'h2'}),
|
||||
dict(attrs={'class':['img-cnt figure']}),
|
||||
dict(attrs={'class':['art-img']}),
|
||||
dict(name='div', attrs={'class':'art-lft'}),
|
||||
dict(name='p')
|
||||
|
||||
]
|
||||
remove_tags = [
|
||||
dict(name = 'div',attrs={'id' : ['comments-news','formSubmission']}),
|
||||
dict(name='div', attrs={'class':[ 'news m12 clrd clr-b p5t shareBtm', 'commentForm', 'metroCommentInnerWrap',
|
||||
'art-rgt','pluck-app pluck-comm','news m12 clrd clr-l p5t', 'flt-r','username','clrd' ]}),
|
||||
dict(attrs={'class':['username', 'metroCommentFormWrap','commentText','commentsNav','avatar','submDateAndTime','addYourComment','displayName']})
|
||||
,dict(name='div', attrs={'class' : 'clrd art-fd fd-gr1-b'})
|
||||
|
||||
]
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'News', u'http://www.metro.co.uk/rss/news/'), (u'Money', u'http://www.metro.co.uk/rss/money/'), (u'Sport', u'http://www.metro.co.uk/rss/sport/'), (u'Film', u'http://www.metro.co.uk/rss/metrolife/film/'), (u'Music', u'http://www.metro.co.uk/rss/metrolife/music/'), (u'TV', u'http://www.metro.co.uk/rss/tv/'), (u'Showbiz', u'http://www.metro.co.uk/rss/showbiz/'), (u'Weird News', u'http://www.metro.co.uk/rss/weird/'), (u'Travel', u'http://www.metro.co.uk/rss/travel/'), (u'Lifestyle', u'http://www.metro.co.uk/rss/lifestyle/'), (u'Books', u'http://www.metro.co.uk/rss/lifestyle/books/'), (u'Food', u'http://www.metro.co.uk/rss/lifestyle/restaurants/')]
|
||||
|
||||
extra_css = '''
|
||||
body {font: sans-serif medium;}'
|
||||
h1 {text-align : center; font-family:Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold;}
|
||||
h2 {text-align : center;color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; }
|
||||
span{ font-size:9.5px; font-weight:bold;font-style:italic}
|
||||
p { text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
|
||||
|
||||
body{ text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
|
||||
'''
|
||||
|
@ -9,8 +9,9 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
class recipeMagic(BasicNewsRecipe):
|
||||
title = 'National Geographic PL'
|
||||
__author__ = 'Marcin Urban 2011'
|
||||
__modified_by__ = 'fenuks'
|
||||
description = 'legenda wśród magazynów z historią sięgającą 120 lat'
|
||||
cover_url = 'http://www.guj.pl/var/guj/storage/images/media/nasze_magazyny/national_geographic/logo/ng_logo/2606-1-pol-PL/ng_logo.jpg'
|
||||
#cover_url = 'http://www.guj.pl/var/guj/storage/images/media/nasze_magazyny/national_geographic/logo/ng_logo/2606-1-pol-PL/ng_logo.jpg'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
@ -42,11 +43,43 @@ class recipeMagic(BasicNewsRecipe):
|
||||
]
|
||||
|
||||
remove_attributes = ['width','height']
|
||||
feeds=[]
|
||||
|
||||
feeds = [
|
||||
('National Geographic PL', 'http://www.national-geographic.pl/rss/'),
|
||||
]
|
||||
def find_articles(self, url):
|
||||
articles = []
|
||||
soup=self.index_to_soup(url)
|
||||
tag=soup.find(attrs={'class':'arl'})
|
||||
art=tag.ul.findAll('li')
|
||||
for i in art:
|
||||
title=i.a['title']
|
||||
url=i.a['href']
|
||||
#date=soup.find(id='footer').ul.li.string[41:-1]
|
||||
desc=i.div.p.string
|
||||
articles.append({'title' : title,
|
||||
'url' : url,
|
||||
'date' : '',
|
||||
'description' : desc
|
||||
})
|
||||
return articles
|
||||
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
feeds.append((u"Aktualności", self.find_articles('http://www.national-geographic.pl/aktualnosci/')))
|
||||
feeds.append((u"Artykuły", self.find_articles('http://www.national-geographic.pl/artykuly/')))
|
||||
|
||||
return feeds
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('artykuly0Cpokaz', 'drukuj-artykul')
|
||||
if 'artykuly' in url:
|
||||
return url.replace('artykuly/pokaz', 'drukuj-artykul')
|
||||
elif 'aktualnosci' in url:
|
||||
return url.replace('aktualnosci/pokaz', 'drukuj-artykul')
|
||||
else:
|
||||
return url
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('http://www.national-geographic.pl/biezace-wydania/')
|
||||
tag=soup.find(attrs={'class':'txt jus'})
|
||||
self.cover_url=tag.img['src']
|
||||
return getattr(self, 'cover_url', self.cover_url)
|
||||
|
||||
|
@ -81,5 +81,7 @@ class Nowa_Fantastyka(BasicNewsRecipe):
|
||||
title=soup.find(attrs={'class':'tytul'})
|
||||
if title:
|
||||
title['style']='font-size: 20px; font-weight: bold;'
|
||||
self.log.warn(soup)
|
||||
for a in soup('a'):
|
||||
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||
a['href']=self.INDEX + a['href']
|
||||
return soup
|
||||
|
@ -1,3 +1,4 @@
|
||||
import urllib, re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1279258912(BasicNewsRecipe):
|
||||
@ -27,12 +28,30 @@ class AdvancedUserRecipe1279258912(BasicNewsRecipe):
|
||||
encoding = 'utf-8'
|
||||
conversion_options = {'linearize_tables':True}
|
||||
masthead_url = 'http://www.orlandosentinel.com/media/graphic/2009-07/46844851.gif'
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'story'})
|
||||
]
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['articlerail','tools','comment-group','clearfix']}),
|
||||
]
|
||||
remove_tags_after = [
|
||||
dict(name='p', attrs={'class':'copyright'}),
|
||||
]
|
||||
|
||||
auto_cleanup = True
|
||||
|
||||
def get_article_url(self, article):
|
||||
ans = None
|
||||
try:
|
||||
s = article.summary
|
||||
ans = urllib.unquote(
|
||||
re.search(r'href=".+?bookmark.cfm.+?link=(.+?)"', s).group(1))
|
||||
except:
|
||||
pass
|
||||
if ans is None:
|
||||
link = article.get('feedburner_origlink', None)
|
||||
if link and link.split('/')[-1]=="story01.htm":
|
||||
link=link.split('/')[-2]
|
||||
encoding = {'0B': '.', '0C': '/', '0A': '0', '0F': '=', '0G': '&',
|
||||
'0D': '?', '0E': '-', '0N': '.com', '0L': 'http:',
|
||||
'0S':'//'}
|
||||
for k, v in encoding.iteritems():
|
||||
link = link.replace(k, v)
|
||||
ans = link
|
||||
elif link:
|
||||
ans = link
|
||||
if ans is not None:
|
||||
return ans.replace('?track=rss', '')
|
||||
|
||||
|
||||
|
@ -7,6 +7,7 @@ class PC_Arena(BasicNewsRecipe):
|
||||
description = u'Najnowsze informacje z branży IT - testy, recenzje, aktualności, rankingi, wywiady. Twoje źródło informacji o sprzęcie komputerowym.'
|
||||
category = 'IT'
|
||||
language = 'pl'
|
||||
index='http://pcarena.pl'
|
||||
masthead_url='http://pcarena.pl/pcarena/img/logo.png'
|
||||
cover_url= 'http://pcarena.pl/pcarena/img/logo.png'
|
||||
no_stylesheets = True
|
||||
@ -23,3 +24,9 @@ class PC_Arena(BasicNewsRecipe):
|
||||
return 'http://pcarena.pl' + url
|
||||
else:
|
||||
return url
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for a in soup('a'):
|
||||
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||
a['href']=self.index + a['href']
|
||||
return soup
|
@ -1,5 +1,5 @@
|
||||
"""
|
||||
readitlaterlist.com
|
||||
Pocket Calibre Recipe v1.0
|
||||
"""
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '''
|
||||
@ -12,22 +12,23 @@ from calibre import strftime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Readitlater(BasicNewsRecipe):
|
||||
title = 'ReadItLater'
|
||||
class Pocket(BasicNewsRecipe):
|
||||
title = 'Pocket'
|
||||
__author__ = 'Darko Miletic, Przemyslaw Kryger, Keith Callenberg, tBunnyMan'
|
||||
description = '''Personalized news feeds. Go to readitlaterlist.com to setup \
|
||||
up your news. This version displays pages of articles from \
|
||||
description = '''Personalized news feeds. Go to getpocket.com to setup up \
|
||||
your news. This version displays pages of articles from \
|
||||
oldest to newest, with max & minimum counts, and marks articles \
|
||||
read after downloading.'''
|
||||
publisher = 'readitlaterlist.com'
|
||||
publisher = 'getpocket.com'
|
||||
category = 'news, custom'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 50
|
||||
minimum_articles = 1
|
||||
minimum_articles = 10
|
||||
mark_as_read_after_dl = True
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
needs_subscription = True
|
||||
INDEX = u'http://readitlaterlist.com'
|
||||
INDEX = u'http://getpocket.com'
|
||||
LOGIN = INDEX + u'/l'
|
||||
readList = []
|
||||
|
||||
@ -100,9 +101,31 @@ class Readitlater(BasicNewsRecipe):
|
||||
br = self.get_browser()
|
||||
for link in markList:
|
||||
url = self.INDEX + link
|
||||
print 'Marking read: ', url
|
||||
response = br.open(url)
|
||||
response
|
||||
print response.info()
|
||||
|
||||
def cleanup(self):
|
||||
if self.mark_as_read_after_dl:
|
||||
self.mark_as_read(self.readList)
|
||||
else:
|
||||
pass
|
||||
|
||||
def default_cover(self, cover_file):
|
||||
'''
|
||||
Create a generic cover for recipes that don't have a cover
|
||||
This override adds time to the cover
|
||||
'''
|
||||
try:
|
||||
from calibre.ebooks import calibre_cover
|
||||
title = self.title if isinstance(self.title, unicode) else \
|
||||
self.title.decode('utf-8', 'replace')
|
||||
date = strftime(self.timefmt)
|
||||
time = strftime('[%I:%M %p]')
|
||||
img_data = calibre_cover(title, date, time)
|
||||
cover_file.write(img_data)
|
||||
cover_file.flush()
|
||||
except:
|
||||
self.log.exception('Failed to generate default cover')
|
||||
return False
|
||||
return True
|
||||
|
25
recipes/swiat_obrazu.recipe
Normal file
25
recipes/swiat_obrazu.recipe
Normal file
@ -0,0 +1,25 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Swiat_Obrazu(BasicNewsRecipe):
|
||||
title = u'Swiat Obrazu'
|
||||
__author__ = 'fenuks'
|
||||
description = u'Internetowy Dziennik o Fotografii i Wideo www.SwiatObrazu.pl to źródło informacji o technice fotografii i wideo, o sprzęcie najbardziej znanych i uznanych firm: Canon, Nikon, Sony, Hasselblad i wielu innych. Znajdziecie tu programy do obróbki zdjęć, forum foto i forum wideo i galerie zdjęć. Codziennie najświeższe informacje: aktualności, testy, poradniki, wywiady, felietony. Swiatobrazu.pl stale organizuje konkursy oraz warsztaty fotograficzne i wideo.'
|
||||
category = 'photography'
|
||||
masthead_url = 'http://www.swiatobrazu.pl/img/logo.jpg'
|
||||
cover_url = 'http://www.swiatobrazu.pl/img/logo.jpg'
|
||||
language = 'pl'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
remove_javascript= True
|
||||
use_embedded_content = False
|
||||
feeds = [(u'Wszystko', u'http://www.swiatobrazu.pl/rss')]
|
||||
|
||||
def print_version(self, url):
|
||||
return url + ',drukuj'
|
||||
|
||||
def image_url_processor(self, baseurl, url):
|
||||
if 'http://' not in url or 'https://' not in url:
|
||||
return 'http://www.swiatobrazu.pl' + url[5:]
|
||||
else:
|
||||
return url
|
@ -34,4 +34,12 @@ class tanuki(BasicNewsRecipe):
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
self.append_page(soup, soup.body)
|
||||
for a in soup('a'):
|
||||
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||
if 'tanuki-anime' in soup.title.string.lower():
|
||||
a['href']='http://anime.tanuki.pl' + a['href']
|
||||
elif 'tanuki-manga' in soup.title.string.lower():
|
||||
a['href']='http://manga.tanuki.pl' + a['href']
|
||||
elif 'tanuki-czytelnia' in soup.title.string.lower():
|
||||
a['href']='http://czytelnia.tanuki.pl' + a['href']
|
||||
return soup
|
@ -1,9 +1,8 @@
|
||||
import re
|
||||
import re, mechanize
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
||||
|
||||
title = u'The Sun UK'
|
||||
cover_url = 'http://www.thesun.co.uk/img/global/new-masthead-logo.png'
|
||||
|
||||
description = 'A Recipe for The Sun tabloid UK'
|
||||
__author__ = 'Dave Asbury'
|
||||
@ -49,12 +48,44 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
||||
|
||||
|
||||
feeds = [
|
||||
#(u'News', u'http://www.thesun.co.uk/sol/homepage/news/rss'),
|
||||
(u'News','http://feed43.com/2517447382644748.xml'),
|
||||
(u'Sport', u'http://feed43.com/4283846255668687.xml'),
|
||||
(u'Bizarre', u'http://feed43.com/0233840304242011.xml'),
|
||||
(u'Film',u'http://feed43.com/1307545221226200.xml'),
|
||||
(u'Music',u'http://feed43.com/1701513435064132.xml'),
|
||||
(u'Sun Woman',u'http://feed43.com/0022626854226453.xml'),
|
||||
]
|
||||
]
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('http://www.politicshome.com/uk/latest_frontpage.html')
|
||||
# look for the block containing the sun button and url
|
||||
cov = soup.find(attrs={'style' : 'background-image: url(http://www.politicshome.com/images/sources/source_frontpage_button_84.gif);'})
|
||||
|
||||
|
||||
|
||||
#cov = soup.find(attrs={'id' : 'large'})
|
||||
cov2 = str(cov)
|
||||
|
||||
cov2='http://www.politicshome.com'+cov2[9:-133]
|
||||
#cov2 now contains url of the page containing pic
|
||||
|
||||
#cov2 now contains url of the page containing pic
|
||||
soup = self.index_to_soup(cov2)
|
||||
cov = soup.find(attrs={'id' : 'large'})
|
||||
cov2 = str(cov)
|
||||
cov2=cov2[27:-18]
|
||||
#cov2 now is pic url, now go back to original function
|
||||
|
||||
br = mechanize.Browser()
|
||||
br.set_handle_redirect(False)
|
||||
try:
|
||||
br.open_novisit(cov2)
|
||||
cover_url = cov2
|
||||
except:
|
||||
cover_url = 'http://www.thesun.co.uk/img/global/new-masthead-logo.png'
|
||||
|
||||
#cover_url = cov2
|
||||
#cover_url = 'http://www.thesun.co.uk/img/global/new-masthead-logo.png'
|
||||
return cover_url
|
||||
|
||||
|
||||
|
@ -8,6 +8,7 @@ class webhosting_pl(BasicNewsRecipe):
|
||||
cover_url='http://webhosting.pl/images/logo.png'
|
||||
masthead_url='http://webhosting.pl/images/logo.png'
|
||||
oldest_article = 7
|
||||
index='http://webhosting.pl'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
remove_empty_feeds = True
|
||||
@ -37,3 +38,9 @@ class webhosting_pl(BasicNewsRecipe):
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('webhosting.pl', 'webhosting.pl/print')
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for a in soup('a'):
|
||||
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||
a['href']=self.index + a['href']
|
||||
return soup
|
@ -21,7 +21,7 @@ class XkcdCom(BasicNewsRecipe):
|
||||
|
||||
use_embedded_content = False
|
||||
oldest_article = 60
|
||||
keep_only_tags = [dict(id='middleContent')]
|
||||
keep_only_tags = [dict(id='middleContainer')]
|
||||
remove_tags = [dict(name='ul'), dict(name='h3'), dict(name='br')]
|
||||
no_stylesheets = True
|
||||
# turn image bubblehelp into a paragraph
|
||||
|
@ -26,7 +26,7 @@ def login_to_google(username, password):
|
||||
br.form['Email'] = username
|
||||
br.form['Passwd'] = password
|
||||
raw = br.submit().read()
|
||||
if re.search(br'<title>.*?Account Settings</title>', raw) is None:
|
||||
if re.search(br'(?i)<title>.*?Account Settings</title>', raw) is None:
|
||||
x = re.search(br'(?is)<title>.*?</title>', raw)
|
||||
if x is not None:
|
||||
print ('Title of post login page: %s'%x.group())
|
||||
|
@ -12,14 +12,14 @@ msgstr ""
|
||||
"Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
|
||||
"devel@lists.alioth.debian.org>\n"
|
||||
"POT-Creation-Date: 2011-11-25 14:01+0000\n"
|
||||
"PO-Revision-Date: 2011-12-14 19:48+0000\n"
|
||||
"Last-Translator: Ferran Rius <frius64@hotmail.com>\n"
|
||||
"PO-Revision-Date: 2012-04-12 09:56+0000\n"
|
||||
"Last-Translator: Dídac Rios <didac@niorcs.com>\n"
|
||||
"Language-Team: Catalan <linux@softcatala.org>\n"
|
||||
"MIME-Version: 1.0\n"
|
||||
"Content-Type: text/plain; charset=UTF-8\n"
|
||||
"Content-Transfer-Encoding: 8bit\n"
|
||||
"X-Launchpad-Export-Date: 2011-12-15 05:18+0000\n"
|
||||
"X-Generator: Launchpad (build 14487)\n"
|
||||
"X-Launchpad-Export-Date: 2012-04-13 05:26+0000\n"
|
||||
"X-Generator: Launchpad (build 15070)\n"
|
||||
"Language: ca\n"
|
||||
|
||||
#. name for aaa
|
||||
@ -9584,31 +9584,31 @@ msgstr ""
|
||||
|
||||
#. name for hoi
|
||||
msgid "Holikachuk"
|
||||
msgstr ""
|
||||
msgstr "Holikachuk"
|
||||
|
||||
#. name for hoj
|
||||
msgid "Hadothi"
|
||||
msgstr ""
|
||||
msgstr "Hadothi"
|
||||
|
||||
#. name for hol
|
||||
msgid "Holu"
|
||||
msgstr ""
|
||||
msgstr "Holu"
|
||||
|
||||
#. name for hom
|
||||
msgid "Homa"
|
||||
msgstr ""
|
||||
msgstr "Homa"
|
||||
|
||||
#. name for hoo
|
||||
msgid "Holoholo"
|
||||
msgstr ""
|
||||
msgstr "Holoholo"
|
||||
|
||||
#. name for hop
|
||||
msgid "Hopi"
|
||||
msgstr ""
|
||||
msgstr "Hopi"
|
||||
|
||||
#. name for hor
|
||||
msgid "Horo"
|
||||
msgstr ""
|
||||
msgstr "Horo"
|
||||
|
||||
#. name for hos
|
||||
msgid "Ho Chi Minh City Sign Language"
|
||||
@ -9616,15 +9616,15 @@ msgstr "Llenguatge de signes de la ciutat de Ho Chi Minh"
|
||||
|
||||
#. name for hot
|
||||
msgid "Hote"
|
||||
msgstr ""
|
||||
msgstr "Hote"
|
||||
|
||||
#. name for hov
|
||||
msgid "Hovongan"
|
||||
msgstr ""
|
||||
msgstr "Hovongan"
|
||||
|
||||
#. name for how
|
||||
msgid "Honi"
|
||||
msgstr ""
|
||||
msgstr "Honi"
|
||||
|
||||
#. name for hoy
|
||||
msgid "Holiya"
|
||||
@ -9636,7 +9636,7 @@ msgstr ""
|
||||
|
||||
#. name for hpo
|
||||
msgid "Hpon"
|
||||
msgstr ""
|
||||
msgstr "Hpon"
|
||||
|
||||
#. name for hps
|
||||
msgid "Hawai'i Pidgin Sign Language"
|
||||
@ -9644,35 +9644,35 @@ msgstr "Hawaià Pidgin; llenguatge de signes"
|
||||
|
||||
#. name for hra
|
||||
msgid "Hrangkhol"
|
||||
msgstr ""
|
||||
msgstr "Hrangkhol"
|
||||
|
||||
#. name for hre
|
||||
msgid "Hre"
|
||||
msgstr ""
|
||||
msgstr "Hre"
|
||||
|
||||
#. name for hrk
|
||||
msgid "Haruku"
|
||||
msgstr ""
|
||||
msgstr "Haruku"
|
||||
|
||||
#. name for hrm
|
||||
msgid "Miao; Horned"
|
||||
msgstr ""
|
||||
msgstr "Miao; Horned"
|
||||
|
||||
#. name for hro
|
||||
msgid "Haroi"
|
||||
msgstr ""
|
||||
msgstr "Haroi"
|
||||
|
||||
#. name for hrr
|
||||
msgid "Horuru"
|
||||
msgstr ""
|
||||
msgstr "Horuru"
|
||||
|
||||
#. name for hrt
|
||||
msgid "Hértevin"
|
||||
msgstr ""
|
||||
msgstr "Hértevin"
|
||||
|
||||
#. name for hru
|
||||
msgid "Hruso"
|
||||
msgstr ""
|
||||
msgstr "Hruso"
|
||||
|
||||
#. name for hrv
|
||||
msgid "Croatian"
|
||||
@ -9680,11 +9680,11 @@ msgstr "Croat"
|
||||
|
||||
#. name for hrx
|
||||
msgid "Hunsrik"
|
||||
msgstr ""
|
||||
msgstr "Hunsrik"
|
||||
|
||||
#. name for hrz
|
||||
msgid "Harzani"
|
||||
msgstr ""
|
||||
msgstr "Harzani"
|
||||
|
||||
#. name for hsb
|
||||
msgid "Sorbian; Upper"
|
||||
@ -9704,31 +9704,31 @@ msgstr "Xinès; Xiang"
|
||||
|
||||
#. name for hss
|
||||
msgid "Harsusi"
|
||||
msgstr ""
|
||||
msgstr "Harsusi"
|
||||
|
||||
#. name for hti
|
||||
msgid "Hoti"
|
||||
msgstr ""
|
||||
msgstr "Hoti"
|
||||
|
||||
#. name for hto
|
||||
msgid "Huitoto; Minica"
|
||||
msgstr ""
|
||||
msgstr "Huitoto; Minica"
|
||||
|
||||
#. name for hts
|
||||
msgid "Hadza"
|
||||
msgstr ""
|
||||
msgstr "Hadza"
|
||||
|
||||
#. name for htu
|
||||
msgid "Hitu"
|
||||
msgstr ""
|
||||
msgstr "Hitu"
|
||||
|
||||
#. name for htx
|
||||
msgid "Hittite; Middle"
|
||||
msgstr ""
|
||||
msgstr "Hittite; Middle"
|
||||
|
||||
#. name for hub
|
||||
msgid "Huambisa"
|
||||
msgstr ""
|
||||
msgstr "Huambisa"
|
||||
|
||||
#. name for huc
|
||||
msgid "=/Hua"
|
||||
@ -9736,27 +9736,27 @@ msgstr ""
|
||||
|
||||
#. name for hud
|
||||
msgid "Huaulu"
|
||||
msgstr ""
|
||||
msgstr "Huaulu"
|
||||
|
||||
#. name for hue
|
||||
msgid "Huave; San Francisco Del Mar"
|
||||
msgstr ""
|
||||
msgstr "Huave; San Francisco Del Mar"
|
||||
|
||||
#. name for huf
|
||||
msgid "Humene"
|
||||
msgstr ""
|
||||
msgstr "Humene"
|
||||
|
||||
#. name for hug
|
||||
msgid "Huachipaeri"
|
||||
msgstr ""
|
||||
msgstr "Huachipaeri"
|
||||
|
||||
#. name for huh
|
||||
msgid "Huilliche"
|
||||
msgstr ""
|
||||
msgstr "Huilliche"
|
||||
|
||||
#. name for hui
|
||||
msgid "Huli"
|
||||
msgstr ""
|
||||
msgstr "Huli"
|
||||
|
||||
#. name for huj
|
||||
msgid "Miao; Northern Guiyang"
|
||||
@ -9764,15 +9764,15 @@ msgstr "Miao; Guiyang septentrional"
|
||||
|
||||
#. name for huk
|
||||
msgid "Hulung"
|
||||
msgstr ""
|
||||
msgstr "Hulung"
|
||||
|
||||
#. name for hul
|
||||
msgid "Hula"
|
||||
msgstr ""
|
||||
msgstr "Hula"
|
||||
|
||||
#. name for hum
|
||||
msgid "Hungana"
|
||||
msgstr ""
|
||||
msgstr "Hungana"
|
||||
|
||||
#. name for hun
|
||||
msgid "Hungarian"
|
||||
@ -9780,43 +9780,43 @@ msgstr "Hongarès"
|
||||
|
||||
#. name for huo
|
||||
msgid "Hu"
|
||||
msgstr ""
|
||||
msgstr "Hu"
|
||||
|
||||
#. name for hup
|
||||
msgid "Hupa"
|
||||
msgstr ""
|
||||
msgstr "Hupa"
|
||||
|
||||
#. name for huq
|
||||
msgid "Tsat"
|
||||
msgstr ""
|
||||
msgstr "Tsat"
|
||||
|
||||
#. name for hur
|
||||
msgid "Halkomelem"
|
||||
msgstr ""
|
||||
msgstr "Halkomelem"
|
||||
|
||||
#. name for hus
|
||||
msgid "Huastec"
|
||||
msgstr ""
|
||||
msgstr "Huastec"
|
||||
|
||||
#. name for hut
|
||||
msgid "Humla"
|
||||
msgstr ""
|
||||
msgstr "Humla"
|
||||
|
||||
#. name for huu
|
||||
msgid "Huitoto; Murui"
|
||||
msgstr ""
|
||||
msgstr "Huitoto; Murui"
|
||||
|
||||
#. name for huv
|
||||
msgid "Huave; San Mateo Del Mar"
|
||||
msgstr ""
|
||||
msgstr "Huave; San Mateo Del Mar"
|
||||
|
||||
#. name for huw
|
||||
msgid "Hukumina"
|
||||
msgstr ""
|
||||
msgstr "Hukumina"
|
||||
|
||||
#. name for hux
|
||||
msgid "Huitoto; Nüpode"
|
||||
msgstr ""
|
||||
msgstr "Huitoto; Nüpode"
|
||||
|
||||
#. name for huy
|
||||
msgid "Hulaulá"
|
||||
|
@ -8,14 +8,14 @@ msgstr ""
|
||||
"Project-Id-Version: calibre\n"
|
||||
"Report-Msgid-Bugs-To: FULL NAME <EMAIL@ADDRESS>\n"
|
||||
"POT-Creation-Date: 2011-11-25 14:01+0000\n"
|
||||
"PO-Revision-Date: 2012-03-11 10:13+0000\n"
|
||||
"Last-Translator: Jellby <Unknown>\n"
|
||||
"PO-Revision-Date: 2012-04-18 20:56+0000\n"
|
||||
"Last-Translator: David de Obregon <Unknown>\n"
|
||||
"Language-Team: Spanish <es@li.org>\n"
|
||||
"MIME-Version: 1.0\n"
|
||||
"Content-Type: text/plain; charset=UTF-8\n"
|
||||
"Content-Transfer-Encoding: 8bit\n"
|
||||
"X-Launchpad-Export-Date: 2012-03-12 04:38+0000\n"
|
||||
"X-Generator: Launchpad (build 14933)\n"
|
||||
"X-Launchpad-Export-Date: 2012-04-19 04:37+0000\n"
|
||||
"X-Generator: Launchpad (build 15108)\n"
|
||||
|
||||
#. name for aaa
|
||||
msgid "Ghotuo"
|
||||
@ -4931,7 +4931,7 @@ msgstr "Como karim"
|
||||
|
||||
#. name for cfm
|
||||
msgid "Chin; Falam"
|
||||
msgstr ""
|
||||
msgstr "Chin; Falam"
|
||||
|
||||
#. name for cga
|
||||
msgid "Changriwa"
|
||||
@ -5071,7 +5071,7 @@ msgstr "Chinali"
|
||||
|
||||
#. name for cik
|
||||
msgid "Kinnauri; Chitkuli"
|
||||
msgstr ""
|
||||
msgstr "Kinnauri; Chitkuli"
|
||||
|
||||
#. name for cim
|
||||
msgid "Cimbrian"
|
||||
@ -5147,7 +5147,7 @@ msgstr "Chino jin"
|
||||
|
||||
#. name for cka
|
||||
msgid "Chin; Khumi Awa"
|
||||
msgstr ""
|
||||
msgstr "Chin; Khumi Awa"
|
||||
|
||||
#. name for ckb
|
||||
msgid "Kurdish; Central"
|
||||
@ -5287,7 +5287,7 @@ msgstr "Mnong central"
|
||||
|
||||
#. name for cmr
|
||||
msgid "Chin; Mro"
|
||||
msgstr ""
|
||||
msgstr "Chin; Mro"
|
||||
|
||||
#. name for cms
|
||||
msgid "Messapic"
|
||||
@ -5303,7 +5303,7 @@ msgstr "Changthang"
|
||||
|
||||
#. name for cnb
|
||||
msgid "Chin; Chinbon"
|
||||
msgstr ""
|
||||
msgstr "Chin; Chinbon"
|
||||
|
||||
#. name for cnc
|
||||
msgid "Côông"
|
||||
@ -5315,7 +5315,7 @@ msgstr "Qiang septentrional"
|
||||
|
||||
#. name for cnh
|
||||
msgid "Chin; Haka"
|
||||
msgstr ""
|
||||
msgstr "Chin; Haka"
|
||||
|
||||
#. name for cni
|
||||
msgid "Asháninka"
|
||||
@ -5323,7 +5323,7 @@ msgstr "Asháninka"
|
||||
|
||||
#. name for cnk
|
||||
msgid "Chin; Khumi"
|
||||
msgstr ""
|
||||
msgstr "Chin; Khumi"
|
||||
|
||||
#. name for cnl
|
||||
msgid "Chinantec; Lalana"
|
||||
@ -5347,7 +5347,7 @@ msgstr "Chenoua"
|
||||
|
||||
#. name for cnw
|
||||
msgid "Chin; Ngawn"
|
||||
msgstr ""
|
||||
msgstr "Chin; Ngawn"
|
||||
|
||||
#. name for cnx
|
||||
msgid "Cornish; Middle"
|
||||
@ -5459,7 +5459,7 @@ msgstr "Chinanteco de Palantla"
|
||||
|
||||
#. name for cpb
|
||||
msgid "Ashéninka; Ucayali-Yurúa"
|
||||
msgstr ""
|
||||
msgstr "Ashéninka; Ucayali-Yurúa"
|
||||
|
||||
#. name for cpc
|
||||
msgid "Ajyíninka Apurucayali"
|
||||
@ -5483,7 +5483,7 @@ msgstr "Capiznon"
|
||||
|
||||
#. name for cpu
|
||||
msgid "Ashéninka; Pichis"
|
||||
msgstr ""
|
||||
msgstr "Ashéninka; Pichis"
|
||||
|
||||
#. name for cpx
|
||||
msgid "Chinese; Pu-Xian"
|
||||
@ -5491,11 +5491,11 @@ msgstr "Chino puxian"
|
||||
|
||||
#. name for cpy
|
||||
msgid "Ashéninka; South Ucayali"
|
||||
msgstr ""
|
||||
msgstr "Ashéninka; South Ucayali"
|
||||
|
||||
#. name for cqd
|
||||
msgid "Miao; Chuanqiandian Cluster"
|
||||
msgstr ""
|
||||
msgstr "Miao; Chuanqiandian Cluster"
|
||||
|
||||
#. name for cqu
|
||||
msgid "Quechua; Chilean"
|
||||
@ -5507,7 +5507,7 @@ msgstr "Chara"
|
||||
|
||||
#. name for crb
|
||||
msgid "Carib; Island"
|
||||
msgstr ""
|
||||
msgstr "Carib; Island"
|
||||
|
||||
#. name for crc
|
||||
msgid "Lonwolwol"
|
||||
@ -5539,23 +5539,23 @@ msgstr "Forro"
|
||||
|
||||
#. name for crj
|
||||
msgid "Cree; Southern East"
|
||||
msgstr ""
|
||||
msgstr "Cree; Southern East"
|
||||
|
||||
#. name for crk
|
||||
msgid "Cree; Plains"
|
||||
msgstr ""
|
||||
msgstr "Cree; Plains"
|
||||
|
||||
#. name for crl
|
||||
msgid "Cree; Northern East"
|
||||
msgstr ""
|
||||
msgstr "Cree; Northern East"
|
||||
|
||||
#. name for crm
|
||||
msgid "Cree; Moose"
|
||||
msgstr ""
|
||||
msgstr "Cree; Moose"
|
||||
|
||||
#. name for crn
|
||||
msgid "Cora; El Nayar"
|
||||
msgstr ""
|
||||
msgstr "Cora; El Nayar"
|
||||
|
||||
#. name for cro
|
||||
msgid "Crow"
|
||||
@ -5563,11 +5563,11 @@ msgstr "Crow"
|
||||
|
||||
#. name for crq
|
||||
msgid "Chorote; Iyo'wujwa"
|
||||
msgstr ""
|
||||
msgstr "Chorote; Iyo'wujwa"
|
||||
|
||||
#. name for crr
|
||||
msgid "Algonquian; Carolina"
|
||||
msgstr ""
|
||||
msgstr "Algonquian; Carolina"
|
||||
|
||||
#. name for crs
|
||||
msgid "Creole French; Seselwa"
|
||||
@ -5575,7 +5575,7 @@ msgstr "Francés criollo seychellense"
|
||||
|
||||
#. name for crt
|
||||
msgid "Chorote; Iyojwa'ja"
|
||||
msgstr ""
|
||||
msgstr "Chorote; Iyojwa'ja"
|
||||
|
||||
#. name for crv
|
||||
msgid "Chaura"
|
||||
@ -5627,11 +5627,11 @@ msgstr "Lengua de signos chilena"
|
||||
|
||||
#. name for csh
|
||||
msgid "Chin; Asho"
|
||||
msgstr ""
|
||||
msgstr "Chin; Asho"
|
||||
|
||||
#. name for csi
|
||||
msgid "Miwok; Coast"
|
||||
msgstr ""
|
||||
msgstr "Miwok; Coast"
|
||||
|
||||
#. name for csk
|
||||
msgid "Jola-Kasa"
|
||||
@ -5643,7 +5643,7 @@ msgstr "Lengua de signos china"
|
||||
|
||||
#. name for csm
|
||||
msgid "Miwok; Central Sierra"
|
||||
msgstr ""
|
||||
msgstr "Miwok; Central Sierra"
|
||||
|
||||
#. name for csn
|
||||
msgid "Colombian Sign Language"
|
||||
@ -5671,11 +5671,11 @@ msgstr "Ohlone septentrional"
|
||||
|
||||
#. name for csw
|
||||
msgid "Cree; Swampy"
|
||||
msgstr ""
|
||||
msgstr "Cree; Swampy"
|
||||
|
||||
#. name for csy
|
||||
msgid "Chin; Siyin"
|
||||
msgstr ""
|
||||
msgstr "Chin; Siyin"
|
||||
|
||||
#. name for csz
|
||||
msgid "Coos"
|
||||
@ -5691,7 +5691,7 @@ msgstr "Chetco"
|
||||
|
||||
#. name for ctd
|
||||
msgid "Chin; Tedim"
|
||||
msgstr ""
|
||||
msgstr "Chin; Tedim"
|
||||
|
||||
#. name for cte
|
||||
msgid "Chinantec; Tepinapa"
|
||||
@ -5727,7 +5727,7 @@ msgstr "Pandan"
|
||||
|
||||
#. name for ctt
|
||||
msgid "Chetti; Wayanad"
|
||||
msgstr ""
|
||||
msgstr "Chetti; Wayanad"
|
||||
|
||||
#. name for ctu
|
||||
msgid "Chol"
|
||||
@ -5767,7 +5767,7 @@ msgstr "Mashco piro"
|
||||
|
||||
#. name for cuk
|
||||
msgid "Kuna; San Blas"
|
||||
msgstr ""
|
||||
msgstr "Kuna; San Blas"
|
||||
|
||||
#. name for cul
|
||||
msgid "Culina"
|
||||
@ -5795,7 +5795,7 @@ msgstr "Chhulung"
|
||||
|
||||
#. name for cut
|
||||
msgid "Cuicatec; Teutila"
|
||||
msgstr ""
|
||||
msgstr "Cuicatec; Teutila"
|
||||
|
||||
#. name for cuu
|
||||
msgid "Tai Ya"
|
||||
@ -5811,7 +5811,7 @@ msgstr "Chukwa"
|
||||
|
||||
#. name for cux
|
||||
msgid "Cuicatec; Tepeuxila"
|
||||
msgstr ""
|
||||
msgstr "Cuicatec; Tepeuxila"
|
||||
|
||||
#. name for cvg
|
||||
msgid "Chug"
|
||||
@ -5831,7 +5831,7 @@ msgstr "Maindo"
|
||||
|
||||
#. name for cwd
|
||||
msgid "Cree; Woods"
|
||||
msgstr ""
|
||||
msgstr "Cree; Woods"
|
||||
|
||||
#. name for cwe
|
||||
msgid "Kwere"
|
||||
@ -5879,7 +5879,7 @@ msgstr "Chino minzhong"
|
||||
|
||||
#. name for czt
|
||||
msgid "Chin; Zotung"
|
||||
msgstr ""
|
||||
msgstr "Chin; Zotung"
|
||||
|
||||
#. name for daa
|
||||
msgid "Dangaléat"
|
||||
@ -5935,7 +5935,7 @@ msgstr "Danés"
|
||||
|
||||
#. name for dao
|
||||
msgid "Chin; Daai"
|
||||
msgstr ""
|
||||
msgstr "Chin; Daai"
|
||||
|
||||
#. name for dap
|
||||
msgid "Nisi (India)"
|
||||
@ -5943,7 +5943,7 @@ msgstr "Nisi (India)"
|
||||
|
||||
#. name for daq
|
||||
msgid "Maria; Dandami"
|
||||
msgstr ""
|
||||
msgstr "Maria; Dandami"
|
||||
|
||||
#. name for dar
|
||||
msgid "Dargwa"
|
||||
@ -5995,7 +5995,7 @@ msgstr "Edopi"
|
||||
|
||||
#. name for dbg
|
||||
msgid "Dogon; Dogul Dom"
|
||||
msgstr ""
|
||||
msgstr "Dogon; Dogul Dom"
|
||||
|
||||
#. name for dbi
|
||||
msgid "Doka"
|
||||
@ -6035,7 +6035,7 @@ msgstr "Dabarre"
|
||||
|
||||
#. name for dbu
|
||||
msgid "Dogon; Bondum Dom"
|
||||
msgstr ""
|
||||
msgstr "Dogon; Bondum Dom"
|
||||
|
||||
#. name for dbv
|
||||
msgid "Dungu"
|
||||
@ -6067,7 +6067,7 @@ msgstr "Fataluku"
|
||||
|
||||
#. name for ddi
|
||||
msgid "Goodenough; West"
|
||||
msgstr ""
|
||||
msgstr "Goodenough; West"
|
||||
|
||||
#. name for ddj
|
||||
msgid "Jaru"
|
||||
@ -6083,7 +6083,7 @@ msgstr "Dido"
|
||||
|
||||
#. name for dds
|
||||
msgid "Dogon; Donno So"
|
||||
msgstr ""
|
||||
msgstr "Dogon; Donno So"
|
||||
|
||||
#. name for ddw
|
||||
msgid "Dawera-Daweloor"
|
||||
@ -6135,7 +6135,7 @@ msgstr "Slave (atabascano)"
|
||||
|
||||
#. name for dep
|
||||
msgid "Delaware; Pidgin"
|
||||
msgstr ""
|
||||
msgstr "Delaware; Pidgin"
|
||||
|
||||
#. name for deq
|
||||
msgid "Dendi (Central African Republic)"
|
||||
@ -6167,11 +6167,11 @@ msgstr "Dagaare meridional"
|
||||
|
||||
#. name for dgb
|
||||
msgid "Dogon; Bunoge"
|
||||
msgstr ""
|
||||
msgstr "Dogon; Bunoge"
|
||||
|
||||
#. name for dgc
|
||||
msgid "Agta; Casiguran Dumagat"
|
||||
msgstr ""
|
||||
msgstr "Agta; Casiguran Dumagat"
|
||||
|
||||
#. name for dgd
|
||||
msgid "Dagaari Dioula"
|
||||
@ -6283,7 +6283,7 @@ msgstr "Dinka centromeridional"
|
||||
|
||||
#. name for dic
|
||||
msgid "Dida; Lakota"
|
||||
msgstr ""
|
||||
msgstr "Dida; Lakota"
|
||||
|
||||
#. name for did
|
||||
msgid "Didinga"
|
||||
@ -6411,7 +6411,7 @@ msgstr "Djiwarli"
|
||||
|
||||
#. name for djm
|
||||
msgid "Dogon; Jamsay"
|
||||
msgstr ""
|
||||
msgstr "Dogon; Jamsay"
|
||||
|
||||
#. name for djn
|
||||
msgid "Djauan"
|
||||
@ -6471,7 +6471,7 @@ msgstr "Duma"
|
||||
|
||||
#. name for dmb
|
||||
msgid "Dogon; Mombo"
|
||||
msgstr ""
|
||||
msgstr "Dogon; Mombo"
|
||||
|
||||
#. name for dmc
|
||||
msgid "Dimir"
|
||||
@ -6483,7 +6483,7 @@ msgstr "Dugwor"
|
||||
|
||||
#. name for dmg
|
||||
msgid "Kinabatangan; Upper"
|
||||
msgstr ""
|
||||
msgstr "Kinabatangan; Upper"
|
||||
|
||||
#. name for dmk
|
||||
msgid "Domaaki"
|
||||
@ -6503,7 +6503,7 @@ msgstr "Kemezung"
|
||||
|
||||
#. name for dmr
|
||||
msgid "Damar; East"
|
||||
msgstr ""
|
||||
msgstr "Damar; East"
|
||||
|
||||
#. name for dms
|
||||
msgid "Dampelas"
|
||||
@ -6527,7 +6527,7 @@ msgstr "Demta"
|
||||
|
||||
#. name for dna
|
||||
msgid "Dani; Upper Grand Valley"
|
||||
msgstr ""
|
||||
msgstr "Dani; Upper Grand Valley"
|
||||
|
||||
#. name for dnd
|
||||
msgid "Daonda"
|
||||
@ -6543,7 +6543,7 @@ msgstr "Dungan"
|
||||
|
||||
#. name for dni
|
||||
msgid "Dani; Lower Grand Valley"
|
||||
msgstr ""
|
||||
msgstr "Dani; Lower Grand Valley"
|
||||
|
||||
#. name for dnk
|
||||
msgid "Dengka"
|
||||
@ -6559,7 +6559,7 @@ msgstr "Danaru"
|
||||
|
||||
#. name for dnt
|
||||
msgid "Dani; Mid Grand Valley"
|
||||
msgstr ""
|
||||
msgstr "Dani; Mid Grand Valley"
|
||||
|
||||
#. name for dnu
|
||||
msgid "Danau"
|
||||
@ -6695,7 +6695,7 @@ msgstr "Damar occidental"
|
||||
|
||||
#. name for dro
|
||||
msgid "Melanau; Daro-Matu"
|
||||
msgstr ""
|
||||
msgstr "Melanau; Daro-Matu"
|
||||
|
||||
#. name for drq
|
||||
msgid "Dura"
|
||||
@ -6723,7 +6723,7 @@ msgstr "Darai"
|
||||
|
||||
#. name for dsb
|
||||
msgid "Sorbian; Lower"
|
||||
msgstr ""
|
||||
msgstr "Sorbian; Lower"
|
||||
|
||||
#. name for dse
|
||||
msgid "Dutch Sign Language"
|
||||
@ -6759,7 +6759,7 @@ msgstr "Daur"
|
||||
|
||||
#. name for dtb
|
||||
msgid "Kadazan; Labuk-Kinabatangan"
|
||||
msgstr ""
|
||||
msgstr "Kadazan; Labuk-Kinabatangan"
|
||||
|
||||
#. name for dtd
|
||||
msgid "Ditidaht"
|
||||
@ -6767,15 +6767,15 @@ msgstr "Ditidaht"
|
||||
|
||||
#. name for dti
|
||||
msgid "Dogon; Ana Tinga"
|
||||
msgstr ""
|
||||
msgstr "Dogon; Ana Tinga"
|
||||
|
||||
#. name for dtk
|
||||
msgid "Dogon; Tene Kan"
|
||||
msgstr ""
|
||||
msgstr "Dogon; Tene Kan"
|
||||
|
||||
#. name for dtm
|
||||
msgid "Dogon; Tomo Kan"
|
||||
msgstr ""
|
||||
msgstr "Dogon; Tomo Kan"
|
||||
|
||||
#. name for dtp
|
||||
msgid "Dusun; Central"
|
||||
@ -6787,15 +6787,15 @@ msgstr "Lotud"
|
||||
|
||||
#. name for dts
|
||||
msgid "Dogon; Toro So"
|
||||
msgstr ""
|
||||
msgstr "Dogon; Toro So"
|
||||
|
||||
#. name for dtt
|
||||
msgid "Dogon; Toro Tegu"
|
||||
msgstr ""
|
||||
msgstr "Dogon; Toro Tegu"
|
||||
|
||||
#. name for dtu
|
||||
msgid "Dogon; Tebul Ure"
|
||||
msgstr ""
|
||||
msgstr "Dogon; Tebul Ure"
|
||||
|
||||
#. name for dua
|
||||
msgid "Duala"
|
||||
@ -6815,7 +6815,7 @@ msgstr "Hun-saare"
|
||||
|
||||
#. name for due
|
||||
msgid "Agta; Umiray Dumaget"
|
||||
msgstr ""
|
||||
msgstr "Agta; Umiray Dumaget"
|
||||
|
||||
#. name for duf
|
||||
msgid "Dumbea"
|
||||
@ -6843,7 +6843,7 @@ msgstr "Uyajitaya"
|
||||
|
||||
#. name for dul
|
||||
msgid "Agta; Alabat Island"
|
||||
msgstr ""
|
||||
msgstr "Agta; Alabat Island"
|
||||
|
||||
#. name for dum
|
||||
msgid "Dutch; Middle (ca. 1050-1350)"
|
||||
@ -6855,7 +6855,7 @@ msgstr "Dusun deyah"
|
||||
|
||||
#. name for duo
|
||||
msgid "Agta; Dupaninan"
|
||||
msgstr ""
|
||||
msgstr "Agta; Dupaninan"
|
||||
|
||||
#. name for dup
|
||||
msgid "Duano"
|
||||
@ -6891,7 +6891,7 @@ msgstr "Duungooma"
|
||||
|
||||
#. name for duy
|
||||
msgid "Agta; Dicamay"
|
||||
msgstr ""
|
||||
msgstr "Agta; Dicamay"
|
||||
|
||||
#. name for duz
|
||||
msgid "Duli"
|
||||
@ -6907,7 +6907,7 @@ msgstr "Diri"
|
||||
|
||||
#. name for dwl
|
||||
msgid "Dogon; Walo Kumbe"
|
||||
msgstr ""
|
||||
msgstr "Dogon; Walo Kumbe"
|
||||
|
||||
#. name for dwr
|
||||
msgid "Dawro"
|
||||
@ -6935,15 +6935,15 @@ msgstr "Dyugun"
|
||||
|
||||
#. name for dyg
|
||||
msgid "Agta; Villa Viciosa"
|
||||
msgstr ""
|
||||
msgstr "Agta; Villa Viciosa"
|
||||
|
||||
#. name for dyi
|
||||
msgid "Senoufo; Djimini"
|
||||
msgstr ""
|
||||
msgstr "Senoufo; Djimini"
|
||||
|
||||
#. name for dym
|
||||
msgid "Dogon; Yanda Dom"
|
||||
msgstr ""
|
||||
msgstr "Dogon; Yanda Dom"
|
||||
|
||||
#. name for dyn
|
||||
msgid "Dyangadi"
|
||||
@ -7095,19 +7095,19 @@ msgstr "Kol"
|
||||
|
||||
#. name for ekm
|
||||
msgid "Elip"
|
||||
msgstr ""
|
||||
msgstr "Elip"
|
||||
|
||||
#. name for eko
|
||||
msgid "Koti"
|
||||
msgstr ""
|
||||
msgstr "Koti"
|
||||
|
||||
#. name for ekp
|
||||
msgid "Ekpeye"
|
||||
msgstr ""
|
||||
msgstr "Ekpeye"
|
||||
|
||||
#. name for ekr
|
||||
msgid "Yace"
|
||||
msgstr ""
|
||||
msgstr "Yace"
|
||||
|
||||
#. name for eky
|
||||
msgid "Kayah; Eastern"
|
||||
@ -7115,19 +7115,19 @@ msgstr "Kayah oriental"
|
||||
|
||||
#. name for ele
|
||||
msgid "Elepi"
|
||||
msgstr ""
|
||||
msgstr "Elepi"
|
||||
|
||||
#. name for elh
|
||||
msgid "El Hugeirat"
|
||||
msgstr ""
|
||||
msgstr "El Hugeirat"
|
||||
|
||||
#. name for eli
|
||||
msgid "Nding"
|
||||
msgstr ""
|
||||
msgstr "Nding"
|
||||
|
||||
#. name for elk
|
||||
msgid "Elkei"
|
||||
msgstr ""
|
||||
msgstr "Elkei"
|
||||
|
||||
#. name for ell
|
||||
msgid "Greek; Modern (1453-)"
|
||||
@ -7135,19 +7135,19 @@ msgstr "Griego moderno (1453-)"
|
||||
|
||||
#. name for elm
|
||||
msgid "Eleme"
|
||||
msgstr ""
|
||||
msgstr "Eleme"
|
||||
|
||||
#. name for elo
|
||||
msgid "El Molo"
|
||||
msgstr ""
|
||||
msgstr "El Molo"
|
||||
|
||||
#. name for elp
|
||||
msgid "Elpaputih"
|
||||
msgstr ""
|
||||
msgstr "Elpaputih"
|
||||
|
||||
#. name for elu
|
||||
msgid "Elu"
|
||||
msgstr ""
|
||||
msgstr "Elu"
|
||||
|
||||
#. name for elx
|
||||
msgid "Elamite"
|
||||
@ -7155,15 +7155,15 @@ msgstr "Elamita"
|
||||
|
||||
#. name for ema
|
||||
msgid "Emai-Iuleha-Ora"
|
||||
msgstr ""
|
||||
msgstr "Emai-Iuleha-Ora"
|
||||
|
||||
#. name for emb
|
||||
msgid "Embaloh"
|
||||
msgstr ""
|
||||
msgstr "Embaloh"
|
||||
|
||||
#. name for eme
|
||||
msgid "Emerillon"
|
||||
msgstr ""
|
||||
msgstr "Emerillon"
|
||||
|
||||
#. name for emg
|
||||
msgid "Meohang; Eastern"
|
||||
@ -7171,7 +7171,7 @@ msgstr "Meohang oriental"
|
||||
|
||||
#. name for emi
|
||||
msgid "Mussau-Emira"
|
||||
msgstr ""
|
||||
msgstr "Mussau-Emira"
|
||||
|
||||
#. name for emk
|
||||
msgid "Maninkakan; Eastern"
|
||||
@ -7179,15 +7179,15 @@ msgstr "Maninkakan oriental"
|
||||
|
||||
#. name for emm
|
||||
msgid "Mamulique"
|
||||
msgstr ""
|
||||
msgstr "Mamulique"
|
||||
|
||||
#. name for emn
|
||||
msgid "Eman"
|
||||
msgstr ""
|
||||
msgstr "Eman"
|
||||
|
||||
#. name for emo
|
||||
msgid "Emok"
|
||||
msgstr ""
|
||||
msgstr "Emok"
|
||||
|
||||
#. name for emp
|
||||
msgid "Emberá; Northern"
|
||||
@ -7203,11 +7203,11 @@ msgstr "Muria oriental"
|
||||
|
||||
#. name for emw
|
||||
msgid "Emplawas"
|
||||
msgstr ""
|
||||
msgstr "Emplawas"
|
||||
|
||||
#. name for emx
|
||||
msgid "Erromintxela"
|
||||
msgstr ""
|
||||
msgstr "Erromintxela"
|
||||
|
||||
#. name for emy
|
||||
msgid "Mayan; Epigraphic"
|
||||
|
@ -9,14 +9,14 @@ msgstr ""
|
||||
"Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
|
||||
"devel@lists.alioth.debian.org>\n"
|
||||
"POT-Creation-Date: 2011-11-25 14:01+0000\n"
|
||||
"PO-Revision-Date: 2012-03-06 13:55+0000\n"
|
||||
"PO-Revision-Date: 2012-04-18 13:08+0000\n"
|
||||
"Last-Translator: Asier Iturralde Sarasola <Unknown>\n"
|
||||
"Language-Team: Euskara <itzulpena@comtropos.com>\n"
|
||||
"MIME-Version: 1.0\n"
|
||||
"Content-Type: text/plain; charset=UTF-8\n"
|
||||
"Content-Transfer-Encoding: 8bit\n"
|
||||
"X-Launchpad-Export-Date: 2012-03-07 05:12+0000\n"
|
||||
"X-Generator: Launchpad (build 14907)\n"
|
||||
"X-Launchpad-Export-Date: 2012-04-19 04:36+0000\n"
|
||||
"X-Generator: Launchpad (build 15108)\n"
|
||||
"Language: eu\n"
|
||||
|
||||
#. name for aaa
|
||||
@ -27125,7 +27125,7 @@ msgstr ""
|
||||
|
||||
#. name for vie
|
||||
msgid "Vietnamese"
|
||||
msgstr "Mahastiak"
|
||||
msgstr "Vietnamera"
|
||||
|
||||
#. name for vif
|
||||
msgid "Vili"
|
||||
|
@ -4,7 +4,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
__appname__ = u'calibre'
|
||||
numeric_version = (0, 8, 46)
|
||||
numeric_version = (0, 8, 47)
|
||||
__version__ = u'.'.join(map(unicode, numeric_version))
|
||||
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"
|
||||
|
||||
|
@ -259,7 +259,7 @@ class LRXMetadataReader(MetadataReaderPlugin):
|
||||
class MOBIMetadataReader(MetadataReaderPlugin):
|
||||
|
||||
name = 'Read MOBI metadata'
|
||||
file_types = set(['mobi', 'prc', 'azw', 'azw4', 'pobi'])
|
||||
file_types = set(['mobi', 'prc', 'azw', 'azw3', 'azw4', 'pobi'])
|
||||
description = _('Read metadata from %s files')%'MOBI'
|
||||
|
||||
def get_metadata(self, stream, ftype):
|
||||
|
@ -40,6 +40,7 @@ class ANDROID(USBMS):
|
||||
0xcac : [0x100, 0x0227, 0x0226, 0x222],
|
||||
0xccf : [0x100, 0x0227, 0x0226, 0x222],
|
||||
0x2910 : [0x222],
|
||||
0xff9 : [0x9999],
|
||||
},
|
||||
|
||||
# Eken
|
||||
@ -174,7 +175,7 @@ class ANDROID(USBMS):
|
||||
'TELECHIP', 'HUAWEI', 'T-MOBILE', 'SEMC', 'LGE', 'NVIDIA',
|
||||
'GENERIC-', 'ZTE', 'MID', 'QUALCOMM', 'PANDIGIT', 'HYSTON',
|
||||
'VIZIO', 'GOOGLE', 'FREESCAL', 'KOBO_INC', 'LENOVO', 'ROCKCHIP',
|
||||
'POCKET', 'ONDA_MID', 'ZENITHIN', 'INGENIC']
|
||||
'POCKET', 'ONDA_MID', 'ZENITHIN', 'INGENIC', 'PMID701C']
|
||||
WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
|
||||
'__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
|
||||
'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID',
|
||||
@ -189,7 +190,8 @@ class ANDROID(USBMS):
|
||||
'UMS', '.K080', 'P990', 'LTE', 'MB853', 'GT-S5660_CARD', 'A107',
|
||||
'GT-I9003_CARD', 'XT912', 'FILE-CD_GADGET', 'RK29_SDK', 'MB855',
|
||||
'XT910', 'BOOK_A10', 'USB_2.0_DRIVER', 'I9100T', 'P999DW',
|
||||
'KTABLET_PC', 'INGENIC', 'GT-I9001_CARD']
|
||||
'KTABLET_PC', 'INGENIC', 'GT-I9001_CARD', 'USB_2.0_DRIVER',
|
||||
'GT-S5830L_CARD']
|
||||
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
|
||||
'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
|
||||
'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
|
||||
@ -197,7 +199,7 @@ class ANDROID(USBMS):
|
||||
'ANDROID_MID', 'P990_SD_CARD', '.K080', 'LTE_CARD', 'MB853',
|
||||
'A1-07___C0541A4F', 'XT912', 'MB855', 'XT910', 'BOOK_A10_CARD',
|
||||
'USB_2.0_DRIVER', 'I9100T', 'P999DW_SD_CARD', 'KTABLET_PC',
|
||||
'FILE-CD_GADGET', 'GT-I9001_CARD']
|
||||
'FILE-CD_GADGET', 'GT-I9001_CARD', 'USB_2.0_DRIVER']
|
||||
|
||||
OSX_MAIN_MEM = 'Android Device Main Memory'
|
||||
|
||||
|
@ -325,6 +325,10 @@ class KINDLE2(KINDLE):
|
||||
OPT_APNX_ACCURATE = 1
|
||||
OPT_APNX_CUST_COL = 2
|
||||
|
||||
def formats_to_scan_for(self):
|
||||
ans = USBMS.formats_to_scan_for(self) | {'azw3'}
|
||||
return ans
|
||||
|
||||
def books(self, oncard=None, end_session=True):
|
||||
bl = USBMS.books(self, oncard=oncard, end_session=end_session)
|
||||
# Read collections information
|
||||
@ -423,6 +427,8 @@ class KINDLE_FIRE(KINDLE2):
|
||||
name = 'Kindle Fire Device Interface'
|
||||
description = _('Communicate with the Kindle Fire')
|
||||
gui_name = 'Fire'
|
||||
FORMATS = list(KINDLE2.FORMATS)
|
||||
FORMATS.insert(0, 'azw3')
|
||||
|
||||
PRODUCT_ID = [0x0006]
|
||||
BCD = [0x216, 0x100]
|
||||
|
@ -307,11 +307,21 @@ class PRST1(USBMS):
|
||||
|
||||
# Work-around for Sony Bug (SD Card DB not using right SQLite sequence)
|
||||
if source_id == 1:
|
||||
# Update any existing sequence numbers in the table that aren't in the required range
|
||||
sdcard_sequence_start = '4294967296'
|
||||
query = 'UPDATE sqlite_sequence SET seq = ? WHERE seq < ?'
|
||||
t = (sdcard_sequence_start, sdcard_sequence_start,)
|
||||
cursor.execute(query, t)
|
||||
|
||||
# Insert sequence numbers for tables we will be manipulating, if they don't already exist
|
||||
query = ('INSERT INTO sqlite_sequence (name, seq) '
|
||||
'SELECT ?, ? '
|
||||
'WHERE NOT EXISTS (SELECT 1 FROM sqlite_sequence WHERE name = ?)');
|
||||
cursor.execute(query, ('books',sdcard_sequence_start,'books',))
|
||||
cursor.execute(query, ('collection',sdcard_sequence_start,'collection',))
|
||||
cursor.execute(query, ('collections',sdcard_sequence_start,'collections',))
|
||||
|
||||
|
||||
for book in booklist:
|
||||
# Run through plugboard if needed
|
||||
if plugboard is not None:
|
||||
|
@ -128,6 +128,9 @@ class USBMS(CLI, Device):
|
||||
elif location_code == 'B':
|
||||
self._update_driveinfo_file(self._card_b_prefix, location_code, name)
|
||||
|
||||
def formats_to_scan_for(self):
|
||||
return set(self.settings().format_map) | set(self.FORMATS)
|
||||
|
||||
def books(self, oncard=None, end_session=True):
|
||||
from calibre.ebooks.metadata.meta import path_to_ext
|
||||
|
||||
@ -166,7 +169,7 @@ class USBMS(CLI, Device):
|
||||
for idx,b in enumerate(bl):
|
||||
bl_cache[b.lpath] = idx
|
||||
|
||||
all_formats = set(self.settings().format_map) | set(self.FORMATS)
|
||||
all_formats = self.formats_to_scan_for()
|
||||
|
||||
def update_booklist(filename, path, prefix):
|
||||
changed = False
|
||||
|
@ -31,7 +31,7 @@ BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'txtz', 'text', 'ht
|
||||
'epub', 'fb2', 'djv', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip',
|
||||
'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'pmlz', 'mbp', 'tan', 'snb',
|
||||
'xps', 'oxps', 'azw4', 'book', 'zbf', 'pobi', 'docx', 'md',
|
||||
'textile', 'markdown', 'ibook', 'iba']
|
||||
'textile', 'markdown', 'ibook', 'iba', 'azw3']
|
||||
|
||||
class HTMLRenderer(object):
|
||||
|
||||
@ -93,6 +93,20 @@ def extract_calibre_cover(raw, base, log):
|
||||
if os.path.exists(img):
|
||||
return open(img, 'rb').read()
|
||||
|
||||
# Look for a simple cover, i.e. a body with no text and only one <img> tag
|
||||
if matches is None:
|
||||
body = soup.find('body')
|
||||
if body is not None:
|
||||
text = u''.join(map(unicode, body.findAll(text=True)))
|
||||
if text.strip():
|
||||
# Body has text, abort
|
||||
return
|
||||
images = body.findAll('img', src=True)
|
||||
if 0 < len(images) < 2:
|
||||
img = os.path.join(base, *images[0]['src'].split('/'))
|
||||
if os.path.exists(img):
|
||||
return open(img, 'rb').read()
|
||||
|
||||
def render_html_svg_workaround(path_to_html, log, width=590, height=750):
|
||||
from calibre.ebooks.oeb.base import SVG_NS
|
||||
raw = open(path_to_html, 'rb').read()
|
||||
@ -108,6 +122,7 @@ def render_html_svg_workaround(path_to_html, log, width=590, height=750):
|
||||
data = extract_calibre_cover(raw, os.path.dirname(path_to_html), log)
|
||||
except:
|
||||
pass
|
||||
|
||||
if data is None:
|
||||
renderer = render_html(path_to_html, width, height)
|
||||
data = getattr(renderer, 'data', None)
|
||||
|
@ -159,6 +159,7 @@ def add_pipeline_options(parser, plumber):
|
||||
'sr1_search', 'sr1_replace',
|
||||
'sr2_search', 'sr2_replace',
|
||||
'sr3_search', 'sr3_replace',
|
||||
'search_replace',
|
||||
]
|
||||
),
|
||||
|
||||
@ -211,6 +212,7 @@ def add_pipeline_options(parser, plumber):
|
||||
if rec.level < rec.HIGH:
|
||||
option_recommendation_to_cli_option(add_option, rec)
|
||||
|
||||
|
||||
def option_parser():
|
||||
parser = OptionParser(usage=USAGE)
|
||||
parser.add_option('--list-recipes', default=False, action='store_true',
|
||||
@ -271,6 +273,34 @@ def abspath(x):
|
||||
return x
|
||||
return os.path.abspath(os.path.expanduser(x))
|
||||
|
||||
def read_sr_patterns(path, log=None):
|
||||
import json, re, codecs
|
||||
pats = []
|
||||
with codecs.open(path, 'r', 'utf-8') as f:
|
||||
pat = None
|
||||
for line in f.readlines():
|
||||
if line.endswith(u'\n'):
|
||||
line = line[:-1]
|
||||
|
||||
if pat is None:
|
||||
if not line.strip():
|
||||
continue
|
||||
try:
|
||||
re.compile(line)
|
||||
except:
|
||||
msg = u'Invalid regular expression: %r from file: %r'%(
|
||||
line, path)
|
||||
if log is not None:
|
||||
log.error(msg)
|
||||
raise SystemExit(1)
|
||||
else:
|
||||
raise ValueError(msg)
|
||||
pat = line
|
||||
else:
|
||||
pats.append((pat, line))
|
||||
pat = None
|
||||
return json.dumps(pats)
|
||||
|
||||
def main(args=sys.argv):
|
||||
log = Log()
|
||||
parser, plumber = create_option_parser(args, log)
|
||||
@ -278,6 +308,9 @@ def main(args=sys.argv):
|
||||
for x in ('read_metadata_from_opf', 'cover'):
|
||||
if getattr(opts, x, None) is not None:
|
||||
setattr(opts, x, abspath(getattr(opts, x)))
|
||||
if opts.search_replace:
|
||||
opts.search_replace = read_sr_patterns(opts.search_replace, log)
|
||||
|
||||
recommendations = [(n.dest, getattr(opts, n.dest),
|
||||
OptionRecommendation.HIGH) \
|
||||
for n in parser.options_iter()
|
||||
|
@ -28,7 +28,7 @@ class MOBIInput(InputFormatPlugin):
|
||||
name = 'MOBI Input'
|
||||
author = 'Kovid Goyal'
|
||||
description = 'Convert MOBI files (.mobi, .prc, .azw) to HTML'
|
||||
file_types = set(['mobi', 'prc', 'azw'])
|
||||
file_types = set(['mobi', 'prc', 'azw', 'azw3'])
|
||||
|
||||
def convert(self, stream, options, file_ext, log,
|
||||
accelerators):
|
||||
|
@ -6,8 +6,6 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from cStringIO import StringIO
|
||||
|
||||
from calibre.customize.conversion import OutputFormatPlugin
|
||||
from calibre.customize.conversion import OptionRecommendation
|
||||
|
||||
@ -79,18 +77,9 @@ class MOBIOutput(OutputFormatPlugin):
|
||||
def check_for_masthead(self):
|
||||
found = 'masthead' in self.oeb.guide
|
||||
if not found:
|
||||
from calibre.ebooks import generate_masthead
|
||||
self.oeb.log.debug('No masthead found in manifest, generating default mastheadImage...')
|
||||
try:
|
||||
from PIL import Image as PILImage
|
||||
PILImage
|
||||
except ImportError:
|
||||
import Image as PILImage
|
||||
|
||||
raw = open(P('content_server/calibre_banner.png'), 'rb')
|
||||
im = PILImage.open(raw)
|
||||
of = StringIO()
|
||||
im.save(of, 'GIF')
|
||||
raw = of.getvalue()
|
||||
raw = generate_masthead(unicode(self.oeb.metadata['title'][0]))
|
||||
id, href = self.oeb.manifest.generate('masthead', 'masthead')
|
||||
self.oeb.manifest.add(id, href, 'image/gif', data=raw)
|
||||
self.oeb.guide.add('masthead', 'Masthead Image', href)
|
||||
@ -151,13 +140,53 @@ class MOBIOutput(OutputFormatPlugin):
|
||||
# Fix up the periodical href to point to first section href
|
||||
toc.nodes[0].href = toc.nodes[0].nodes[0].href
|
||||
|
||||
def remove_html_cover(self):
|
||||
from calibre.ebooks.oeb.base import OEB_DOCS
|
||||
|
||||
oeb = self.oeb
|
||||
if not oeb.metadata.cover \
|
||||
or 'cover' not in oeb.guide:
|
||||
return
|
||||
href = oeb.guide['cover'].href
|
||||
del oeb.guide['cover']
|
||||
item = oeb.manifest.hrefs[href]
|
||||
if item.spine_position is not None:
|
||||
self.log.warn('Found an HTML cover: ', item.href, 'removing it.',
|
||||
'If you find some content missing from the output MOBI, it '
|
||||
'is because you misidentified the HTML cover in the input '
|
||||
'document')
|
||||
oeb.spine.remove(item)
|
||||
if item.media_type in OEB_DOCS:
|
||||
self.oeb.manifest.remove(item)
|
||||
|
||||
def convert(self, oeb, output_path, input_plugin, opts, log):
|
||||
from calibre.utils.config import tweaks
|
||||
from calibre.ebooks.mobi.writer2.resources import Resources
|
||||
self.log, self.opts, self.oeb = log, opts, oeb
|
||||
|
||||
create_kf8 = tweaks.get('create_kf8', False)
|
||||
|
||||
self.remove_html_cover()
|
||||
resources = Resources(oeb, opts, self.is_periodical,
|
||||
add_fonts=create_kf8)
|
||||
|
||||
kf8 = self.create_kf8(resources) if create_kf8 else None
|
||||
|
||||
self.log('Creating MOBI 6 output')
|
||||
self.write_mobi(input_plugin, output_path, kf8, resources)
|
||||
|
||||
def create_kf8(self, resources):
|
||||
from calibre.ebooks.mobi.writer8.main import KF8Writer
|
||||
return KF8Writer(self.oeb, self.opts, resources)
|
||||
|
||||
def write_mobi(self, input_plugin, output_path, kf8, resources):
|
||||
from calibre.ebooks.mobi.mobiml import MobiMLizer
|
||||
from calibre.ebooks.oeb.transforms.manglecase import CaseMangler
|
||||
from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer, Unavailable
|
||||
from calibre.ebooks.oeb.transforms.htmltoc import HTMLTOCAdder
|
||||
from calibre.customize.ui import plugin_for_input_format
|
||||
|
||||
opts, oeb = self.opts, self.oeb
|
||||
if not opts.no_inline_toc:
|
||||
tocadder = HTMLTOCAdder(title=opts.toc_title, position='start' if
|
||||
opts.mobi_toc_at_start else 'end')
|
||||
@ -169,12 +198,15 @@ class MOBIOutput(OutputFormatPlugin):
|
||||
rasterizer(oeb, opts)
|
||||
except Unavailable:
|
||||
self.log.warn('SVG rasterizer unavailable, SVG will not be converted')
|
||||
else:
|
||||
# Add rasterized SVG images
|
||||
resources.add_extra_images()
|
||||
mobimlizer = MobiMLizer(ignore_tables=opts.linearize_tables)
|
||||
mobimlizer(oeb, opts)
|
||||
self.check_for_periodical()
|
||||
write_page_breaks_after_item = input_plugin is not plugin_for_input_format('cbz')
|
||||
from calibre.ebooks.mobi.writer2.main import MobiWriter
|
||||
writer = MobiWriter(opts,
|
||||
writer = MobiWriter(opts, resources, kf8,
|
||||
write_page_breaks_after_item=write_page_breaks_after_item)
|
||||
writer(oeb, output_path)
|
||||
|
||||
|
@ -626,6 +626,14 @@ OptionRecommendation(name='sr3_search',
|
||||
OptionRecommendation(name='sr3_replace',
|
||||
recommended_value='', level=OptionRecommendation.LOW,
|
||||
help=_('Replacement to replace the text found with sr3-search.')),
|
||||
|
||||
OptionRecommendation(name='search_replace',
|
||||
recommended_value=None, level=OptionRecommendation.LOW, help=_(
|
||||
'Path to a file containing search and replace regular expressions. '
|
||||
'The file must contain alternating lines of regular expression '
|
||||
'followed by replacement pattern (which can be an empty line). '
|
||||
'The regular expression must be in the python regex syntax and '
|
||||
'the file must be UTF-8 encoded.')),
|
||||
]
|
||||
# }}}
|
||||
|
||||
|
@ -5,7 +5,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import functools, re
|
||||
import functools, re, json
|
||||
|
||||
from calibre import entity_to_unicode, as_unicode
|
||||
|
||||
@ -515,12 +515,10 @@ class HTMLPreProcessor(object):
|
||||
if not getattr(self.extra_opts, 'keep_ligatures', False):
|
||||
html = _ligpat.sub(lambda m:LIGATURES[m.group()], html)
|
||||
|
||||
for search, replace in [['sr3_search', 'sr3_replace'], ['sr2_search', 'sr2_replace'], ['sr1_search', 'sr1_replace']]:
|
||||
search_pattern = getattr(self.extra_opts, search, '')
|
||||
if search_pattern:
|
||||
# Function for processing search and replace
|
||||
def do_search_replace(search_pattern, replace_txt):
|
||||
try:
|
||||
search_re = re.compile(search_pattern)
|
||||
replace_txt = getattr(self.extra_opts, replace, '')
|
||||
if not replace_txt:
|
||||
replace_txt = ''
|
||||
rules.insert(0, (search_re, replace_txt))
|
||||
@ -528,6 +526,21 @@ class HTMLPreProcessor(object):
|
||||
self.log.error('Failed to parse %r regexp because %s' %
|
||||
(search, as_unicode(e)))
|
||||
|
||||
# search / replace using the sr?_search / sr?_replace options
|
||||
for i in range(1, 4):
|
||||
search, replace = 'sr%d_search'%i, 'sr%d_replace'%i
|
||||
search_pattern = getattr(self.extra_opts, search, '')
|
||||
replace_txt = getattr(self.extra_opts, replace, '')
|
||||
if search_pattern:
|
||||
do_search_replace(search_pattern, replace_txt)
|
||||
|
||||
# multi-search / replace using the search_replace option
|
||||
search_replace = getattr(self.extra_opts, 'search_replace', None)
|
||||
if search_replace:
|
||||
search_replace = json.loads(search_replace)
|
||||
for search_pattern, replace_txt in search_replace:
|
||||
do_search_replace(search_pattern, replace_txt)
|
||||
|
||||
end_rules = []
|
||||
# delete soft hyphens - moved here so it's executed after header/footer removal
|
||||
if is_pdftohtml:
|
||||
|
@ -48,7 +48,8 @@ def merge_result(oldmi, newmi, ensure_fields=None):
|
||||
|
||||
return newmi
|
||||
|
||||
def main(do_identify, covers, metadata, ensure_fields):
|
||||
def main(do_identify, covers, metadata, ensure_fields, tdir):
|
||||
os.chdir(tdir)
|
||||
failed_ids = set()
|
||||
failed_covers = set()
|
||||
all_failed = True
|
||||
@ -103,7 +104,8 @@ def single_identify(title, authors, identifiers):
|
||||
return [metadata_to_opf(r) for r in results], [r.has_cached_cover_url for
|
||||
r in results], dump_caches(), log.dump()
|
||||
|
||||
def single_covers(title, authors, identifiers, caches):
|
||||
def single_covers(title, authors, identifiers, caches, tdir):
|
||||
os.chdir(tdir)
|
||||
load_caches(caches)
|
||||
log = GUILog()
|
||||
results = Queue()
|
||||
|
@ -308,8 +308,10 @@ class MOBIHeader(object): # {{{
|
||||
self.extra_data_flags = 0
|
||||
if self.has_extra_data_flags:
|
||||
self.unknown4 = self.raw[180:192]
|
||||
self.fdst_idx, self.fdst_count = struct.unpack_from(b'>II',
|
||||
self.fdst_idx, self.fdst_count = struct.unpack_from(b'>LL',
|
||||
self.raw, 192)
|
||||
if self.fdst_count <= 1:
|
||||
self.fdst_idx = NULL_INDEX
|
||||
(self.fcis_number, self.fcis_count, self.flis_number,
|
||||
self.flis_count) = struct.unpack(b'>IIII',
|
||||
self.raw[200:216])
|
||||
@ -342,7 +344,7 @@ class MOBIHeader(object): # {{{
|
||||
'first_non_book_record', 'datp_record_offset', 'fcis_number',
|
||||
'flis_number', 'primary_index_record', 'fdst_idx',
|
||||
'first_image_index'):
|
||||
if hasattr(self, x):
|
||||
if hasattr(self, x) and getattr(self, x) != NULL_INDEX:
|
||||
setattr(self, x, self.header_offset+getattr(self, x))
|
||||
|
||||
if self.has_exth:
|
||||
|
158
src/calibre/ebooks/mobi/debug/index.py
Normal file
158
src/calibre/ebooks/mobi/debug/index.py
Normal file
@ -0,0 +1,158 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from collections import OrderedDict, namedtuple
|
||||
|
||||
from calibre.ebooks.mobi.reader.headers import NULL_INDEX
|
||||
from calibre.ebooks.mobi.reader.index import (CNCX, parse_indx_header,
|
||||
parse_tagx_section, parse_index_record, INDEX_HEADER_FIELDS)
|
||||
from calibre.ebooks.mobi.reader.ncx import (tag_fieldname_map, default_entry)
|
||||
|
||||
File = namedtuple('File',
|
||||
'file_number name divtbl_count start_position length')
|
||||
|
||||
Elem = namedtuple('Elem',
|
||||
'insert_pos toc_text file_number sequence_number start_pos '
|
||||
'length')
|
||||
|
||||
def read_index(sections, idx, codec):
|
||||
table, cncx = OrderedDict(), CNCX([], codec)
|
||||
|
||||
data = sections[idx].raw
|
||||
|
||||
indx_header = parse_indx_header(data)
|
||||
indx_count = indx_header['count']
|
||||
|
||||
if indx_header['ncncx'] > 0:
|
||||
off = idx + indx_count + 1
|
||||
cncx_records = [x.raw for x in sections[off:off+indx_header['ncncx']]]
|
||||
cncx = CNCX(cncx_records, codec)
|
||||
|
||||
tag_section_start = indx_header['tagx']
|
||||
control_byte_count, tags = parse_tagx_section(data[tag_section_start:])
|
||||
|
||||
for i in xrange(idx + 1, idx + 1 + indx_count):
|
||||
# Index record
|
||||
data = sections[i].raw
|
||||
parse_index_record(table, data, control_byte_count, tags, codec,
|
||||
indx_header['ordt_map'], strict=True)
|
||||
return table, cncx, indx_header
|
||||
|
||||
class Index(object):
|
||||
|
||||
def __init__(self, idx, records, codec):
|
||||
self.table = self.cncx = self.header = self.records = None
|
||||
if idx != NULL_INDEX:
|
||||
self.table, self.cncx, self.header = read_index(records, idx, codec)
|
||||
|
||||
def render(self):
|
||||
ans = ['*'*10 + ' Index Header ' + '*'*10]
|
||||
a = ans.append
|
||||
if self.header is not None:
|
||||
for field in INDEX_HEADER_FIELDS:
|
||||
a('%-12s: %r'%(field, self.header[field]))
|
||||
ans.extend(['', ''])
|
||||
|
||||
if self.cncx:
|
||||
a('*'*10 + ' CNCX ' + '*'*10)
|
||||
for offset, val in self.cncx.iteritems():
|
||||
a('%10s: %s'%(offset, val))
|
||||
ans.extend(['', ''])
|
||||
|
||||
if self.table is not None:
|
||||
a('*'*10 + ' %d Index Entries '%len(self.table) + '*'*10)
|
||||
for k, v in self.table.iteritems():
|
||||
a('%s: %r'%(k, v))
|
||||
|
||||
if self.records:
|
||||
ans.extend(['', '', '*'*10 + ' Parsed Entries ' + '*'*10])
|
||||
for f in self.records:
|
||||
a(repr(f))
|
||||
|
||||
return ans + ['']
|
||||
|
||||
def __str__(self):
|
||||
return '\n'.join(self.render())
|
||||
|
||||
class SKELIndex(Index):
|
||||
|
||||
def __init__(self, skelidx, records, codec):
|
||||
super(SKELIndex, self).__init__(skelidx, records, codec)
|
||||
self.records = []
|
||||
|
||||
if self.table is not None:
|
||||
for i, text in enumerate(self.table.iterkeys()):
|
||||
tag_map = self.table[text]
|
||||
if set(tag_map.iterkeys()) != {1, 6}:
|
||||
raise ValueError('SKEL Index has unknown tags: %s'%
|
||||
(set(tag_map.iterkeys())-{1,6}))
|
||||
self.records.append(File(
|
||||
i, # file_number
|
||||
text, # name
|
||||
tag_map[1][0], # divtbl_count
|
||||
tag_map[6][0], # start_pos
|
||||
tag_map[6][1]) # length
|
||||
)
|
||||
|
||||
class SECTIndex(Index):
|
||||
|
||||
def __init__(self, sectidx, records, codec):
|
||||
super(SECTIndex, self).__init__(sectidx, records, codec)
|
||||
self.records = []
|
||||
|
||||
if self.table is not None:
|
||||
for i, text in enumerate(self.table.iterkeys()):
|
||||
tag_map = self.table[text]
|
||||
if set(tag_map.iterkeys()) != {2, 3, 4, 6}:
|
||||
raise ValueError('SECT Index has unknown tags: %s'%
|
||||
(set(tag_map.iterkeys())-{2, 3, 4, 6}))
|
||||
|
||||
toc_text = self.cncx[tag_map[2][0]]
|
||||
self.records.append(Elem(
|
||||
int(text), # insert_pos
|
||||
toc_text, # toc_text
|
||||
tag_map[3][0], # file_number
|
||||
tag_map[4][0], # sequence_number
|
||||
tag_map[6][0], # start_pos
|
||||
tag_map[6][1] # length
|
||||
)
|
||||
)
|
||||
|
||||
class NCXIndex(Index):
|
||||
|
||||
def __init__(self, ncxidx, records, codec):
|
||||
super(NCXIndex, self).__init__(ncxidx, records, codec)
|
||||
self.records = []
|
||||
|
||||
if self.table is not None:
|
||||
for num, x in enumerate(self.table.iteritems()):
|
||||
text, tag_map = x
|
||||
entry = default_entry.copy()
|
||||
entry['name'] = text
|
||||
entry['num'] = num
|
||||
|
||||
for tag in tag_fieldname_map.iterkeys():
|
||||
fieldname, i = tag_fieldname_map[tag]
|
||||
if tag in tag_map:
|
||||
fieldvalue = tag_map[tag][i]
|
||||
if tag == 6:
|
||||
# Appears to be an idx into the KF8 elems table with an
|
||||
# offset
|
||||
fieldvalue = tuple(tag_map[tag])
|
||||
entry[fieldname] = fieldvalue
|
||||
for which, name in {3:'text', 5:'kind', 70:'description',
|
||||
71:'author', 72:'image_caption',
|
||||
73:'image_attribution'}.iteritems():
|
||||
if tag == which:
|
||||
entry[name] = self.cncx.get(fieldvalue,
|
||||
default_entry[name])
|
||||
self.records.append(entry)
|
||||
|
||||
|
||||
|
@ -10,7 +10,9 @@ __docformat__ = 'restructuredtext en'
|
||||
import sys, os, imghdr, struct
|
||||
from itertools import izip
|
||||
|
||||
from calibre import CurrentDir
|
||||
from calibre.ebooks.mobi.debug.headers import TextRecord
|
||||
from calibre.ebooks.mobi.debug.index import (SKELIndex, SECTIndex, NCXIndex)
|
||||
from calibre.ebooks.mobi.utils import read_font_record
|
||||
from calibre.ebooks.mobi.debug import format_bytes
|
||||
from calibre.ebooks.mobi.reader.headers import NULL_INDEX
|
||||
@ -42,6 +44,24 @@ class FDST(object):
|
||||
|
||||
return '\n'.join(ans)
|
||||
|
||||
class File(object):
|
||||
|
||||
def __init__(self, skel, skeleton, text, first_aid, sections):
|
||||
self.name = 'part%04d'%skel.file_number
|
||||
self.skeleton, self.text, self.first_aid = skeleton, text, first_aid
|
||||
self.sections = sections
|
||||
|
||||
def dump(self, ddir):
|
||||
with open(os.path.join(ddir, self.name + '.html'), 'wb') as f:
|
||||
f.write(self.text)
|
||||
base = os.path.join(ddir, self.name + '-parts')
|
||||
os.mkdir(base)
|
||||
with CurrentDir(base):
|
||||
with open('skeleton.html', 'wb') as f:
|
||||
f.write(self.skeleton)
|
||||
for i, text in enumerate(self.sections):
|
||||
with open('sect-%04d.html'%i, 'wb') as f:
|
||||
f.write(text)
|
||||
|
||||
class MOBIFile(object):
|
||||
|
||||
@ -65,6 +85,8 @@ class MOBIFile(object):
|
||||
self.header = self.mf.mobi8_header
|
||||
self.extract_resources()
|
||||
self.read_fdst()
|
||||
self.read_indices()
|
||||
self.build_files()
|
||||
|
||||
def print_header(self, f=sys.stdout):
|
||||
print (str(self.mf.palmdb).encode('utf-8'), file=f)
|
||||
@ -85,6 +107,34 @@ class MOBIFile(object):
|
||||
if self.fdst.num_sections != self.header.fdst_count:
|
||||
raise ValueError('KF8 Header contains invalid FDST count')
|
||||
|
||||
def read_indices(self):
|
||||
self.skel_index = SKELIndex(self.header.skel_idx, self.mf.records,
|
||||
self.header.encoding)
|
||||
self.sect_index = SECTIndex(self.header.sect_idx, self.mf.records,
|
||||
self.header.encoding)
|
||||
self.ncx_index = NCXIndex(self.header.primary_index_record,
|
||||
self.mf.records, self.header.encoding)
|
||||
|
||||
def build_files(self):
|
||||
text = self.raw_text
|
||||
self.files = []
|
||||
for skel in self.skel_index.records:
|
||||
sects = [x for x in self.sect_index.records if x.file_number
|
||||
== skel.file_number]
|
||||
skeleton = text[skel.start_position:skel.start_position+skel.length]
|
||||
ftext = skeleton
|
||||
first_aid = sects[0].toc_text
|
||||
sections = []
|
||||
|
||||
for sect in sects:
|
||||
start_pos = skel.start_position + skel.length + sect.start_pos
|
||||
sect_text = text[start_pos:start_pos+sect.length]
|
||||
insert_pos = sect.insert_pos - skel.start_position
|
||||
ftext = ftext[:insert_pos] + sect_text + ftext[insert_pos:]
|
||||
sections.append(sect_text)
|
||||
|
||||
self.files.append(File(skel, skeleton, ftext, first_aid, sections))
|
||||
|
||||
def extract_resources(self):
|
||||
self.resource_map = []
|
||||
known_types = {b'FLIS', b'FCIS', b'SRCS',
|
||||
@ -131,7 +181,7 @@ def inspect_mobi(mobi_file, ddir):
|
||||
with open(alltext, 'wb') as of:
|
||||
of.write(f.raw_text)
|
||||
|
||||
for x in ('text_records', 'images', 'fonts', 'binary'):
|
||||
for x in ('text_records', 'images', 'fonts', 'binary', 'files'):
|
||||
os.mkdir(os.path.join(ddir, x))
|
||||
|
||||
for rec in f.text_records:
|
||||
@ -145,3 +195,15 @@ def inspect_mobi(mobi_file, ddir):
|
||||
with open(os.path.join(ddir, 'fdst.record'), 'wb') as fo:
|
||||
fo.write(str(f.fdst).encode('utf-8'))
|
||||
|
||||
with open(os.path.join(ddir, 'skel.record'), 'wb') as fo:
|
||||
fo.write(str(f.skel_index).encode('utf-8'))
|
||||
|
||||
with open(os.path.join(ddir, 'sect.record'), 'wb') as fo:
|
||||
fo.write(str(f.sect_index).encode('utf-8'))
|
||||
|
||||
with open(os.path.join(ddir, 'ncx.record'), 'wb') as fo:
|
||||
fo.write(str(f.ncx_index).encode('utf-8'))
|
||||
|
||||
for part in f.files:
|
||||
part.dump(os.path.join(ddir, 'files'))
|
||||
|
||||
|
@ -10,7 +10,7 @@ import copy
|
||||
import re
|
||||
from lxml import etree
|
||||
from calibre.ebooks.oeb.base import namespace, barename
|
||||
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, OEB_DOCS, urlnormalize
|
||||
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, urlnormalize
|
||||
from calibre.ebooks.oeb.stylizer import Stylizer
|
||||
from calibre.ebooks.oeb.transforms.flatcss import KeyMapper
|
||||
from calibre.utils.magick.draw import identify_data
|
||||
@ -109,26 +109,8 @@ class MobiMLizer(object):
|
||||
self.profile = profile = context.dest
|
||||
self.fnums = fnums = dict((v, k) for k, v in profile.fnums.items())
|
||||
self.fmap = KeyMapper(profile.fbase, profile.fbase, fnums.keys())
|
||||
self.remove_html_cover()
|
||||
self.mobimlize_spine()
|
||||
|
||||
def remove_html_cover(self):
|
||||
oeb = self.oeb
|
||||
if not oeb.metadata.cover \
|
||||
or 'cover' not in oeb.guide:
|
||||
return
|
||||
href = oeb.guide['cover'].href
|
||||
del oeb.guide['cover']
|
||||
item = oeb.manifest.hrefs[href]
|
||||
if item.spine_position is not None:
|
||||
self.log.warn('Found an HTML cover,', item.href, 'removing it.',
|
||||
'If you find some content missing from the output MOBI, it '
|
||||
'is because you misidentified the HTML cover in the input '
|
||||
'document')
|
||||
oeb.spine.remove(item)
|
||||
if item.media_type in OEB_DOCS:
|
||||
self.oeb.manifest.remove(item)
|
||||
|
||||
def mobimlize_spine(self):
|
||||
'Iterate over the spine and convert it to MOBIML'
|
||||
for item in self.oeb.spine:
|
||||
@ -473,7 +455,7 @@ class MobiMLizer(object):
|
||||
if tag in TABLE_TAGS and self.ignore_tables:
|
||||
tag = 'span' if tag == 'td' else 'div'
|
||||
|
||||
if tag == 'table':
|
||||
if tag in ('table', 'td', 'tr'):
|
||||
col = style.backgroundColor
|
||||
if col:
|
||||
elem.set('bgcolor', col)
|
||||
|
@ -111,6 +111,13 @@ class CNCX(object): # {{{
|
||||
|
||||
def get(self, offset, default=None):
|
||||
return self.records.get(offset, default)
|
||||
|
||||
def __bool__(self):
|
||||
return bool(self.records)
|
||||
__nonzero__ = __bool__
|
||||
|
||||
def iteritems(self):
|
||||
return self.records.iteritems()
|
||||
# }}}
|
||||
|
||||
def parse_tagx_section(data):
|
||||
|
@ -7,7 +7,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import struct, string, imghdr, zlib
|
||||
import struct, string, imghdr, zlib, os
|
||||
from collections import OrderedDict
|
||||
|
||||
from calibre.utils.magick.draw import Image, save_cover_data_to, thumbnail
|
||||
@ -364,7 +364,7 @@ def count_set_bits(num):
|
||||
num >>= 1
|
||||
return ans
|
||||
|
||||
def to_base(num, base=32):
|
||||
def to_base(num, base=32, min_num_digits=None):
|
||||
digits = string.digits + string.ascii_uppercase
|
||||
sign = 1 if num >= 0 else -1
|
||||
if num == 0: return '0'
|
||||
@ -373,6 +373,8 @@ def to_base(num, base=32):
|
||||
while num:
|
||||
ans.append(digits[(num % base)])
|
||||
num //= base
|
||||
if min_num_digits is not None and len(ans) < min_num_digits:
|
||||
ans.extend('0'*(min_num_digits - len(ans)))
|
||||
if sign < 0:
|
||||
ans.append('-')
|
||||
ans.reverse()
|
||||
@ -388,27 +390,8 @@ def mobify_image(data):
|
||||
data = im.export('gif')
|
||||
return data
|
||||
|
||||
def read_zlib_header(header):
|
||||
header = bytearray(header)
|
||||
# See sec 2.2 of RFC 1950 for the zlib stream format
|
||||
# http://www.ietf.org/rfc/rfc1950.txt
|
||||
if (header[0]*256 + header[1])%31 != 0:
|
||||
return None, 'Bad zlib header, FCHECK failed'
|
||||
|
||||
cmf = header[0] & 0b1111
|
||||
cinfo = header[0] >> 4
|
||||
if cmf != 8:
|
||||
return None, 'Unknown zlib compression method: %d'%cmf
|
||||
if cinfo > 7:
|
||||
return None, 'Invalid CINFO field in zlib header: %d'%cinfo
|
||||
fdict = (header[1]&0b10000)>>5
|
||||
if fdict != 0:
|
||||
return None, 'FDICT based zlib compression not supported'
|
||||
wbits = cinfo + 8
|
||||
return wbits, None
|
||||
|
||||
|
||||
def read_font_record(data, extent=1040): # {{{
|
||||
# Font records {{{
|
||||
def read_font_record(data, extent=1040):
|
||||
'''
|
||||
Return the font encoded in the MOBI FONT record represented by data.
|
||||
The return value in a dict with fields raw_data, font_data, err, ext,
|
||||
@ -466,15 +449,8 @@ def read_font_record(data, extent=1040): # {{{
|
||||
|
||||
if flags & 0b1:
|
||||
# ZLIB compressed data
|
||||
wbits, err = read_zlib_header(font_data[:2])
|
||||
if err is not None:
|
||||
ans['err'] = err
|
||||
return ans
|
||||
adler32, = struct.unpack_from(b'>I', font_data, len(font_data) - 4)
|
||||
try:
|
||||
# remove two bytes of zlib header and 4 bytes of trailing checksum
|
||||
# negative wbits indicates no standard gzip header
|
||||
font_data = zlib.decompress(font_data[2:-4], -wbits, usize)
|
||||
font_data = zlib.decompress(font_data)
|
||||
except Exception as e:
|
||||
ans['err'] = 'Failed to zlib decompress font data (%s)'%e
|
||||
return ans
|
||||
@ -483,23 +459,42 @@ def read_font_record(data, extent=1040): # {{{
|
||||
ans['err'] = 'Uncompressed font size mismatch'
|
||||
return ans
|
||||
|
||||
if False:
|
||||
# For some reason these almost never match, probably Amazon has a
|
||||
# buggy Adler32 implementation
|
||||
sig = (zlib.adler32(font_data) & 0xffffffff)
|
||||
if sig != adler32:
|
||||
ans['err'] = ('Adler checksum did not match. Stored: %d '
|
||||
'Calculated: %d')%(adler32, sig)
|
||||
return ans
|
||||
|
||||
ans['font_data'] = font_data
|
||||
sig = font_data[:4]
|
||||
ans['ext'] = ('ttf' if sig in {b'\0\1\0\0', b'true', b'ttcf'}
|
||||
else 'otf' if sig == b'OTTO' else 'dat')
|
||||
|
||||
return ans
|
||||
|
||||
def write_font_record(data, obfuscate=True, compress=True):
|
||||
'''
|
||||
Write the ttf/otf font represented by data into a font record. See
|
||||
read_font_record() for details on the format of the record.
|
||||
'''
|
||||
|
||||
flags = 0
|
||||
key_len = 20
|
||||
usize = len(data)
|
||||
xor_key = b''
|
||||
if compress:
|
||||
flags |= 0b1
|
||||
data = zlib.compress(data, 9)
|
||||
if obfuscate:
|
||||
flags |= 0b10
|
||||
xor_key = os.urandom(key_len)
|
||||
key = bytearray(xor_key)
|
||||
data = bytearray(data)
|
||||
for i in xrange(1040):
|
||||
data[i] ^= key[i%key_len]
|
||||
data = bytes(data)
|
||||
|
||||
key_start = struct.calcsize(b'>5L') + 4
|
||||
data_start = key_start + len(xor_key)
|
||||
|
||||
header = b'FONT' + struct.pack(b'>5L', usize, flags, data_start,
|
||||
len(xor_key), key_start)
|
||||
|
||||
return header + xor_key + data
|
||||
|
||||
# }}}
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -11,17 +11,15 @@ import re, random, time
|
||||
from cStringIO import StringIO
|
||||
from struct import pack
|
||||
|
||||
from calibre.ebooks import normalize, generate_masthead
|
||||
from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
|
||||
from calibre.ebooks import normalize
|
||||
from calibre.ebooks.mobi.writer2.serializer import Serializer
|
||||
from calibre.ebooks.compression.palmdoc import compress_doc
|
||||
from calibre.ebooks.mobi.langcodes import iana2mobi
|
||||
from calibre.utils.filenames import ascii_filename
|
||||
from calibre.ebooks.mobi.writer2 import (PALMDOC, UNCOMPRESSED, RECORD_SIZE)
|
||||
from calibre.ebooks.mobi.utils import (rescale_image, encint, mobify_image,
|
||||
encode_trailing_data, align_block, detect_periodical)
|
||||
from calibre.ebooks.mobi.utils import (encint, encode_trailing_data,
|
||||
align_block, detect_periodical)
|
||||
from calibre.ebooks.mobi.writer2.indexer import Indexer
|
||||
from calibre.ebooks.mobi import MAX_THUMB_DIMEN, MAX_THUMB_SIZE
|
||||
|
||||
EXTH_CODES = {
|
||||
'creator': 100,
|
||||
@ -50,8 +48,10 @@ WRITE_UNCROSSABLE_BREAKS = False
|
||||
class MobiWriter(object):
|
||||
COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+')
|
||||
|
||||
def __init__(self, opts, write_page_breaks_after_item=True):
|
||||
def __init__(self, opts, resources, kf8, write_page_breaks_after_item=True):
|
||||
self.opts = opts
|
||||
self.resources = resources
|
||||
self.kf8 = kf8
|
||||
self.write_page_breaks_after_item = write_page_breaks_after_item
|
||||
self.compression = UNCOMPRESSED if opts.dont_compress else PALMDOC
|
||||
self.prefer_author_sort = opts.prefer_author_sort
|
||||
@ -151,66 +151,14 @@ class MobiWriter(object):
|
||||
# Images {{{
|
||||
|
||||
def generate_images(self):
|
||||
oeb = self.oeb
|
||||
oeb.logger.info('Serializing images...')
|
||||
self.image_records = []
|
||||
self.image_map = {}
|
||||
self.masthead_offset = 0
|
||||
index = 1
|
||||
resources = self.resources
|
||||
image_records = resources.records
|
||||
self.image_map = resources.item_map
|
||||
self.masthead_offset = resources.masthead_offset
|
||||
self.cover_offset = resources.cover_offset
|
||||
self.thumbnail_offset = resources.thumbnail_offset
|
||||
|
||||
mh_href = None
|
||||
if 'masthead' in oeb.guide and oeb.guide['masthead'].href:
|
||||
mh_href = oeb.guide['masthead'].href
|
||||
self.image_records.append(None)
|
||||
index += 1
|
||||
elif self.is_periodical:
|
||||
# Generate a default masthead
|
||||
data = generate_masthead(unicode(self.oeb.metadata['title'][0]))
|
||||
self.image_records.append(data)
|
||||
index += 1
|
||||
|
||||
cover_href = self.cover_offset = self.thumbnail_offset = None
|
||||
if (oeb.metadata.cover and
|
||||
unicode(oeb.metadata.cover[0]) in oeb.manifest.ids):
|
||||
cover_id = unicode(oeb.metadata.cover[0])
|
||||
item = oeb.manifest.ids[cover_id]
|
||||
cover_href = item.href
|
||||
|
||||
for item in self.oeb.manifest.values():
|
||||
if item.media_type not in OEB_RASTER_IMAGES: continue
|
||||
try:
|
||||
data = item.data
|
||||
if self.opts.mobi_keep_original_images:
|
||||
data = mobify_image(data)
|
||||
else:
|
||||
data = rescale_image(data)
|
||||
except:
|
||||
oeb.logger.warn('Bad image file %r' % item.href)
|
||||
continue
|
||||
else:
|
||||
if mh_href and item.href == mh_href:
|
||||
self.image_records[0] = data
|
||||
continue
|
||||
|
||||
self.image_records.append(data)
|
||||
self.image_map[item.href] = index
|
||||
index += 1
|
||||
|
||||
if cover_href and item.href == cover_href:
|
||||
self.cover_offset = self.image_map[item.href] - 1
|
||||
try:
|
||||
data = rescale_image(item.data, dimen=MAX_THUMB_DIMEN,
|
||||
maxsizeb=MAX_THUMB_SIZE)
|
||||
except:
|
||||
oeb.logger.warn('Failed to generate thumbnail')
|
||||
else:
|
||||
self.image_records.append(data)
|
||||
self.thumbnail_offset = index - 1
|
||||
index += 1
|
||||
finally:
|
||||
item.unload_data_from_memory()
|
||||
|
||||
if self.image_records and self.image_records[0] is None:
|
||||
if image_records and image_records[0] is None:
|
||||
raise ValueError('Failed to find masthead image in manifest')
|
||||
|
||||
# }}}
|
||||
@ -317,9 +265,12 @@ class MobiWriter(object):
|
||||
|
||||
exth = self.build_exth(bt)
|
||||
first_image_record = None
|
||||
if self.image_records:
|
||||
if self.resources:
|
||||
used_images = self.serializer.used_images
|
||||
if self.kf8 is not None:
|
||||
used_images |= self.kf8.used_images
|
||||
first_image_record = len(self.records)
|
||||
self.records.extend(self.image_records)
|
||||
self.resources.serialize(self.records, used_images)
|
||||
last_content_record = len(self.records) - 1
|
||||
|
||||
# FCIS/FLIS (Seems to serve no purpose)
|
||||
|
136
src/calibre/ebooks/mobi/writer2/resources.py
Normal file
136
src/calibre/ebooks/mobi/writer2/resources.py
Normal file
@ -0,0 +1,136 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import imghdr
|
||||
|
||||
from calibre.ebooks.mobi import MAX_THUMB_DIMEN, MAX_THUMB_SIZE
|
||||
from calibre.ebooks.mobi.utils import (rescale_image, mobify_image,
|
||||
write_font_record)
|
||||
from calibre.ebooks import generate_masthead
|
||||
from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
|
||||
|
||||
PLACEHOLDER_GIF = b'GIF89a\x01\x00\x01\x00\x80\x00\x00\x00\x00\x00\xff\xff\xff!\xf9\x04\x01\x00\x00\x00\x00,\x00\x00\x00\x00\x01\x00\x01\x00@\x02\x01D\x00;'
|
||||
|
||||
class Resources(object):
|
||||
|
||||
def __init__(self, oeb, opts, is_periodical, add_fonts=False):
|
||||
self.oeb, self.log, self.opts = oeb, oeb.log, opts
|
||||
self.is_periodical = is_periodical
|
||||
|
||||
self.item_map = {}
|
||||
self.records = []
|
||||
self.mime_map = {}
|
||||
self.masthead_offset = 0
|
||||
self.used_image_indices = set()
|
||||
self.image_indices = set()
|
||||
self.cover_offset = self.thumbnail_offset = None
|
||||
|
||||
self.add_resources(add_fonts)
|
||||
|
||||
def process_image(self, data):
|
||||
return (mobify_image(data) if self.opts.mobi_keep_original_images else
|
||||
rescale_image(data))
|
||||
|
||||
def add_resources(self, add_fonts):
|
||||
oeb = self.oeb
|
||||
oeb.logger.info('Serializing resources...')
|
||||
index = 1
|
||||
|
||||
mh_href = None
|
||||
if 'masthead' in oeb.guide and oeb.guide['masthead'].href:
|
||||
mh_href = oeb.guide['masthead'].href
|
||||
self.records.append(None)
|
||||
index += 1
|
||||
self.used_image_indices.add(0)
|
||||
self.image_indices.add(0)
|
||||
elif self.is_periodical:
|
||||
# Generate a default masthead
|
||||
data = generate_masthead(unicode(self.oeb.metadata['title'][0]))
|
||||
self.records.append(data)
|
||||
self.used_image_indices.add(0)
|
||||
self.image_indices.add(0)
|
||||
index += 1
|
||||
|
||||
cover_href = self.cover_offset = self.thumbnail_offset = None
|
||||
if (oeb.metadata.cover and
|
||||
unicode(oeb.metadata.cover[0]) in oeb.manifest.ids):
|
||||
cover_id = unicode(oeb.metadata.cover[0])
|
||||
item = oeb.manifest.ids[cover_id]
|
||||
cover_href = item.href
|
||||
|
||||
for item in self.oeb.manifest.values():
|
||||
if item.media_type not in OEB_RASTER_IMAGES: continue
|
||||
try:
|
||||
data = self.process_image(item.data)
|
||||
except:
|
||||
self.log.warn('Bad image file %r' % item.href)
|
||||
continue
|
||||
else:
|
||||
if mh_href and item.href == mh_href:
|
||||
self.records[0] = data
|
||||
continue
|
||||
|
||||
self.image_indices.add(len(self.records))
|
||||
self.records.append(data)
|
||||
self.item_map[item.href] = index
|
||||
self.mime_map[item.href] = 'image/%s'%imghdr.what(None, data)
|
||||
index += 1
|
||||
|
||||
if cover_href and item.href == cover_href:
|
||||
self.cover_offset = self.item_map[item.href] - 1
|
||||
self.used_image_indices.add(self.cover_offset)
|
||||
try:
|
||||
data = rescale_image(item.data, dimen=MAX_THUMB_DIMEN,
|
||||
maxsizeb=MAX_THUMB_SIZE)
|
||||
except:
|
||||
self.log.warn('Failed to generate thumbnail')
|
||||
else:
|
||||
self.image_indices.add(len(self.records))
|
||||
self.records.append(data)
|
||||
self.thumbnail_offset = index - 1
|
||||
self.used_image_indices.add(self.thumbnail_offset)
|
||||
index += 1
|
||||
finally:
|
||||
item.unload_data_from_memory()
|
||||
|
||||
if add_fonts:
|
||||
for item in self.oeb.manifest.values():
|
||||
if item.href and item.href.rpartition('.')[-1].lower() in {
|
||||
'ttf', 'otf'} and isinstance(item.data, bytes):
|
||||
self.records.append(write_font_record(item.data))
|
||||
self.item_map[item.href] = len(self.records)
|
||||
|
||||
def add_extra_images(self):
|
||||
'''
|
||||
Add any images that were created after the call to add_resources()
|
||||
'''
|
||||
for item in self.oeb.manifest.values():
|
||||
if (item.media_type not in OEB_RASTER_IMAGES or item.href in
|
||||
self.item_map): continue
|
||||
try:
|
||||
data = self.process_image(item.data)
|
||||
except:
|
||||
self.log.warn('Bad image file %r' % item.href)
|
||||
else:
|
||||
self.records.append(data)
|
||||
self.item_map[item.href] = len(self.records)
|
||||
finally:
|
||||
item.unload_data_from_memory()
|
||||
|
||||
def serialize(self, records, used_images):
|
||||
used_image_indices = self.used_image_indices | {
|
||||
v-1 for k, v in self.item_map.iteritems() if k in used_images}
|
||||
for i in self.image_indices-used_image_indices:
|
||||
self.records[i] = PLACEHOLDER_GIF
|
||||
records.extend(self.records)
|
||||
|
||||
def __bool__(self):
|
||||
return bool(self.records)
|
||||
__nonzero__ = __bool__
|
||||
|
@ -39,6 +39,7 @@ class Serializer(object):
|
||||
self.oeb = oeb
|
||||
# Map of image hrefs to image index in the MOBI file
|
||||
self.images = images
|
||||
self.used_images = set()
|
||||
self.logger = oeb.logger
|
||||
self.is_periodical = is_periodical
|
||||
self.write_page_breaks_after_item = write_page_breaks_after_item
|
||||
@ -329,6 +330,7 @@ class Serializer(object):
|
||||
href = urlnormalize(item.abshref(val))
|
||||
if href in self.images:
|
||||
index = self.images[href]
|
||||
self.used_images.add(href)
|
||||
buf.write(b'recindex="%05d"' % index)
|
||||
continue
|
||||
buf.write(attr.encode('utf-8'))
|
||||
|
11
src/calibre/ebooks/mobi/writer8/__init__.py
Normal file
11
src/calibre/ebooks/mobi/writer8/__init__.py
Normal file
@ -0,0 +1,11 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
|
||||
|
205
src/calibre/ebooks/mobi/writer8/main.py
Normal file
205
src/calibre/ebooks/mobi/writer8/main.py
Normal file
@ -0,0 +1,205 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import copy
|
||||
from functools import partial
|
||||
from collections import defaultdict
|
||||
|
||||
import cssutils
|
||||
from lxml import etree
|
||||
|
||||
from calibre import isbytestring, force_unicode
|
||||
from calibre.ebooks.mobi.utils import to_base
|
||||
from calibre.ebooks.oeb.base import (OEB_DOCS, OEB_STYLES, SVG_MIME, XPath,
|
||||
extract, XHTML, urlnormalize)
|
||||
from calibre.ebooks.oeb.parse_utils import barename
|
||||
from calibre.ebooks.mobi.writer8.skeleton import Chunker, aid_able_tags
|
||||
|
||||
XML_DOCS = OEB_DOCS | {SVG_MIME}
|
||||
|
||||
# References to record numbers in KF8 are stored as base-32 encoded integers,
|
||||
# with 4 digits
|
||||
to_ref = partial(to_base, base=32, min_num_digits=4)
|
||||
# References in links are stored with 10 digits
|
||||
to_href = partial(to_base, base=32, min_num_digits=10)
|
||||
|
||||
class KF8Writer(object):
|
||||
|
||||
def __init__(self, oeb, opts, resources):
|
||||
self.oeb, self.opts, self.log = oeb, opts, oeb.log
|
||||
self.log.info('Creating KF8 output')
|
||||
self.used_images = set()
|
||||
self.resources = resources
|
||||
self.dup_data()
|
||||
self.flows = [None] # First flow item is reserved for the text
|
||||
|
||||
self.replace_resource_links()
|
||||
self.extract_css_into_flows()
|
||||
self.extract_svg_into_flows()
|
||||
self.replace_internal_links_with_placeholders()
|
||||
self.insert_aid_attributes()
|
||||
self.chunk_it_up()
|
||||
|
||||
def dup_data(self):
|
||||
''' Duplicate data so that any changes we make to markup/CSS only
|
||||
affect KF8 output and not MOBI 6 output '''
|
||||
self._data_cache = {}
|
||||
for item in self.oeb.manifest:
|
||||
if item.media_type in XML_DOCS:
|
||||
self._data_cache[item.href] = copy.deepcopy(item.data)
|
||||
elif item.media_type in OEB_STYLES:
|
||||
# I can't figure out how to make an efficient copy of the
|
||||
# in-memory CSSStylesheet, as deepcopy doesn't work (raises an
|
||||
# exception)
|
||||
self._data_cache[item.href] = cssutils.parseString(
|
||||
item.data.cssText)
|
||||
|
||||
def data(self, item):
|
||||
return self._data_cache.get(item.href, item.data)
|
||||
|
||||
def replace_resource_links(self):
|
||||
''' Replace links to resources (raster images/fonts) with pointers to
|
||||
the MOBI record containing the resource. The pointers are of the form:
|
||||
kindle:embed:XXXX?mime=image/* The ?mime= is apparently optional and
|
||||
not used for fonts. '''
|
||||
|
||||
def pointer(item, oref):
|
||||
ref = item.abshref(oref)
|
||||
idx = self.resources.item_map.get(ref, None)
|
||||
if idx is not None:
|
||||
is_image = self.resources.records[idx-1][:4] not in {b'FONT'}
|
||||
idx = to_ref(idx)
|
||||
if is_image:
|
||||
self.used_images.add(ref)
|
||||
return 'kindle:embed:%s?mime=%s'%(idx,
|
||||
self.resources.mime_map[ref])
|
||||
else:
|
||||
return 'kindle:embed:%s'%idx
|
||||
return oref
|
||||
|
||||
for item in self.oeb.manifest:
|
||||
|
||||
if item.media_type in XML_DOCS:
|
||||
root = self.data(item)
|
||||
for tag in XPath('//h:img|//svg:image')(root):
|
||||
for attr, ref in tag.attrib.iteritems():
|
||||
if attr.split('}')[-1].lower() in {'src', 'href'}:
|
||||
tag.attrib[attr] = pointer(item, ref)
|
||||
|
||||
for tag in XPath('//h:style')(root):
|
||||
if tag.text:
|
||||
sheet = cssutils.parseString(tag.text)
|
||||
replacer = partial(pointer, item)
|
||||
cssutils.replaceUrls(sheet, replacer,
|
||||
ignoreImportRules=True)
|
||||
repl = sheet.cssText
|
||||
if isbytestring(repl):
|
||||
repl = repl.decode('utf-8')
|
||||
tag.text = '\n'+ repl + '\n'
|
||||
|
||||
elif item.media_type in OEB_STYLES:
|
||||
sheet = self.data(item)
|
||||
replacer = partial(pointer, item)
|
||||
cssutils.replaceUrls(sheet, replacer, ignoreImportRules=True)
|
||||
|
||||
def extract_css_into_flows(self):
|
||||
inlines = defaultdict(list) # Ensure identical <style>s not repeated
|
||||
sheets = {}
|
||||
|
||||
for item in self.oeb.manifest:
|
||||
if item.media_type in OEB_STYLES:
|
||||
data = self.data(item).cssText
|
||||
self.flows.append(force_unicode(data, 'utf-8'))
|
||||
sheets[item.href] = len(self.flows)
|
||||
|
||||
for item in self.oeb.spine:
|
||||
root = self.data(item)
|
||||
|
||||
for link in XPath('//h:link[@href]')(root):
|
||||
href = item.abshref(link.get('href'))
|
||||
idx = sheets.get(href, None)
|
||||
if idx is not None:
|
||||
idx = to_ref(idx)
|
||||
link.set('href', 'kindle:flow:%s?mime=text/css'%idx)
|
||||
|
||||
for tag in XPath('//h:style')(root):
|
||||
p = tag.getparent()
|
||||
idx = p.index(tag)
|
||||
raw = tag.text
|
||||
if not raw or not raw.strip():
|
||||
extract(tag)
|
||||
continue
|
||||
repl = etree.Element(XHTML('link'), type='text/css',
|
||||
rel='stylesheet')
|
||||
repl.tail='\n'
|
||||
p.insert(idx, repl)
|
||||
extract(tag)
|
||||
inlines[raw].append(repl)
|
||||
|
||||
for raw, elems in inlines.iteritems():
|
||||
self.flows.append(raw)
|
||||
idx = to_ref(len(self.flows))
|
||||
for link in elems:
|
||||
link.set('href', 'kindle:flow:%s?mime=text/css'%idx)
|
||||
|
||||
def extract_svg_into_flows(self):
|
||||
for item in self.oeb.spine:
|
||||
root = self.data(item)
|
||||
|
||||
for svg in XPath('//svg:svg')(root):
|
||||
raw = etree.tostring(svg, encoding=unicode, with_tail=False)
|
||||
self.flows.append(raw)
|
||||
p = svg.getparent()
|
||||
pos = p.index(svg)
|
||||
img = etree.Element(XHTML('img'),
|
||||
src="kindle:flow:%s?mime=image/svg+xml"%to_ref(
|
||||
len(self.flows)))
|
||||
p.insert(pos, img)
|
||||
extract(svg)
|
||||
|
||||
def replace_internal_links_with_placeholders(self):
|
||||
self.link_map = {}
|
||||
count = 0
|
||||
hrefs = {item.href for item in self.oeb.spine}
|
||||
for item in self.oeb.spine:
|
||||
root = self.data(item)
|
||||
|
||||
for a in XPath('//h:a[@href]')(root):
|
||||
count += 1
|
||||
ref = item.abshref(a.get('href'))
|
||||
href, _, frag = ref.partition('#')
|
||||
href = urlnormalize(href)
|
||||
if href in hrefs:
|
||||
placeholder = 'kindle:pos:fid:0000:off:%s'%to_href(count)
|
||||
self.link_map[placeholder] = (href, frag)
|
||||
a.set('href', placeholder)
|
||||
|
||||
def insert_aid_attributes(self):
|
||||
self.id_map = {}
|
||||
for i, item in enumerate(self.oeb.spine):
|
||||
root = self.data(item)
|
||||
aidbase = i * int(1e6)
|
||||
j = 0
|
||||
for tag in root.iterdescendants(etree.Element):
|
||||
id_ = tag.attrib.get('id', None)
|
||||
if id_ is not None or barename(tag.tag).lower() in aid_able_tags:
|
||||
aid = aidbase + j
|
||||
tag.attrib['aid'] = to_base(aid, base=32)
|
||||
if tag.tag == XHTML('body'):
|
||||
self.id_map[(item.href, '')] = tag.attrib['aid']
|
||||
if id_ is not None:
|
||||
self.id_map[(item.href, id_)] = tag.attrib['aid']
|
||||
|
||||
j += 1
|
||||
|
||||
def chunk_it_up(self):
|
||||
chunker = Chunker(self.oeb, self.data)
|
||||
chunker
|
||||
|
||||
|
232
src/calibre/ebooks/mobi/writer8/skeleton.py
Normal file
232
src/calibre/ebooks/mobi/writer8/skeleton.py
Normal file
@ -0,0 +1,232 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import re
|
||||
from collections import namedtuple
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from calibre.ebooks.oeb.base import XHTML_NS
|
||||
|
||||
CHUNK_SIZE = 8192
|
||||
|
||||
# Tags to which kindlegen adds the aid attribute
|
||||
aid_able_tags = {'a', 'abbr', 'address', 'article', 'aside', 'audio', 'b',
|
||||
'bdo', 'blockquote', 'body', 'button', 'cite', 'code', 'dd', 'del', 'details',
|
||||
'dfn', 'div', 'dl', 'dt', 'em', 'fieldset', 'figcaption', 'figure', 'footer',
|
||||
'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup', 'i', 'ins', 'kbd',
|
||||
'label', 'legend', 'li', 'map', 'mark', 'meter', 'nav', 'ol', 'output', 'p',
|
||||
'pre', 'progress', 'q', 'rp', 'rt', 'samp', 'section', 'select', 'small',
|
||||
'span', 'strong', 'sub', 'summary', 'sup', 'textarea', 'time', 'ul', 'var',
|
||||
'video'}
|
||||
|
||||
_self_closing_pat = re.compile(bytes(
|
||||
r'<(?P<tag>%s)(?=[\s/])(?P<arg>[^>]*)/>'%('|'.join(aid_able_tags))),
|
||||
re.IGNORECASE)
|
||||
|
||||
def close_self_closing_tags(raw):
|
||||
return _self_closing_pat.sub(br'<\g<tag>\g<arg>></\g<tag>>', raw)
|
||||
|
||||
def path_to_node(node):
|
||||
ans = []
|
||||
parent = node.getparent()
|
||||
while parent is not None:
|
||||
ans.append(parent.index(node))
|
||||
node = parent
|
||||
parent = parent.getparent()
|
||||
return tuple(reversed(ans))
|
||||
|
||||
def node_from_path(root, path):
|
||||
parent = root
|
||||
for idx in path:
|
||||
parent = parent[idx]
|
||||
return parent
|
||||
|
||||
class Chunk(object):
|
||||
|
||||
def __init__(self, raw):
|
||||
self.raw = raw
|
||||
self.starts_tags = []
|
||||
self.ends_tags = []
|
||||
self.insert_pos = None
|
||||
|
||||
def __len__(self):
|
||||
return len(self.raw)
|
||||
|
||||
def merge(self, chunk):
|
||||
self.raw += chunk.raw
|
||||
self.ends_tags = chunk.ends_tags
|
||||
|
||||
class Skeleton(object):
|
||||
|
||||
def __init__(self, file_number, item, root, chunks):
|
||||
self.file_number, self.item = file_number, item
|
||||
self.chunks = chunks
|
||||
|
||||
self.skeleton = self.render(root)
|
||||
self.body_offset = self.skeleton.find('<body')
|
||||
self.calculate_metrics(root)
|
||||
|
||||
self.calculate_insert_positions()
|
||||
|
||||
def render(self, root):
|
||||
raw = etree.tostring(root, encoding='UTF-8', xml_declaration=True)
|
||||
raw = raw.replace('<html', '<html xmlns="%s"'%XHTML_NS, 1)
|
||||
return raw
|
||||
|
||||
def calculate_metrics(self, root):
|
||||
Metric = namedtuple('Metric', 'start end')
|
||||
self.metrics = {}
|
||||
for tag in root.xpath('//*[@aid]'):
|
||||
text = (tag.text or '').encode('utf-8')
|
||||
raw = etree.tostring(tag, encoding='UTF-8', with_tail=True,
|
||||
xml_declaration=False)
|
||||
start_length = len(raw.partition(b'>')[0]) + len(text) + 1
|
||||
end_length = len(raw.rpartition(b'<')[-1]) + 1
|
||||
self.metrics[tag.get('aid')] = Metric(start_length, end_length)
|
||||
|
||||
def calculate_insert_positions(self):
|
||||
pos = self.body_offset
|
||||
for chunk in self.chunks:
|
||||
for tag in chunk.starts_tags:
|
||||
pos += self.metrics[tag].start
|
||||
chunk.insert_pos = pos
|
||||
pos += len(chunk)
|
||||
for tag in chunk.ends_tags:
|
||||
pos += self.metrics[tag].end
|
||||
|
||||
class Chunker(object):
|
||||
|
||||
def __init__(self, oeb, data_func):
|
||||
self.oeb, self.log = oeb, oeb.log
|
||||
self.data = data_func
|
||||
|
||||
self.skeletons = []
|
||||
|
||||
for i, item in enumerate(self.oeb.spine):
|
||||
root = self.remove_namespaces(self.data(item))
|
||||
body = root.xpath('//body')[0]
|
||||
body.tail = '\n'
|
||||
|
||||
# First pass: break up document into rendered strings of length no
|
||||
# more than CHUNK_SIZE
|
||||
chunks = []
|
||||
self.step_into_tag(body, chunks)
|
||||
|
||||
# Second pass: Merge neighboring small chunks within the same
|
||||
# skeleton tag so as to have chunks as close to the CHUNK_SIZE as
|
||||
# possible.
|
||||
chunks = self.merge_small_chunks(chunks)
|
||||
|
||||
# Third pass: Create the skeleton and calculate the insert position
|
||||
# for all chunks
|
||||
self.skeletons.append(Skeleton(i, item, root, chunks))
|
||||
|
||||
def remove_namespaces(self, root):
|
||||
lang = None
|
||||
for attr, val in root.attrib.iteritems():
|
||||
if attr.rpartition('}')[-1] == 'lang':
|
||||
lang = val
|
||||
|
||||
# Remove all namespace information from the tree. This means namespaced
|
||||
# tags have their namespaces removed and all namespace declarations are
|
||||
# removed. We have to do this manual cloning of the tree as there is no
|
||||
# other way to remove namespace declarations in lxml. This is done so
|
||||
# that serialization creates clean HTML 5 markup with no namespaces. We
|
||||
# insert the XHTML namespace manually after serialization. The
|
||||
# preceding layers should have removed svg and any other non html
|
||||
# namespaced tags.
|
||||
attrib = {'lang':lang} if lang else {}
|
||||
nroot = etree.Element('html', attrib=attrib)
|
||||
nroot.text = root.text
|
||||
nroot.tail = '\n'
|
||||
|
||||
for tag in root.iterdescendants(etree.Element):
|
||||
# We are ignoring all non tag entities in the tree
|
||||
# like comments and processing instructions, as they make the
|
||||
# chunking code even harder, for minimal gain.
|
||||
elem = nroot.makeelement(tag.tag.rpartition('}')[-1],
|
||||
attrib={k.rpartition('}')[-1]:v for k, v in
|
||||
tag.attrib.iteritems()})
|
||||
elem.text, elem.tail = tag.text, tag.tail
|
||||
parent = node_from_path(nroot, path_to_node(tag.getparent()))
|
||||
parent.append(elem)
|
||||
|
||||
return nroot
|
||||
|
||||
|
||||
def step_into_tag(self, tag, chunks):
|
||||
aid = tag.get('aid')
|
||||
|
||||
first_chunk_idx = len(chunks)
|
||||
|
||||
# First handle any text
|
||||
if tag.text and tag.text.strip(): # Leave pure whitespace in the skel
|
||||
chunks.extend(self.chunk_up_text(tag.text))
|
||||
tag.text = None
|
||||
|
||||
# Now loop over children
|
||||
for child in list(tag):
|
||||
raw = etree.tostring(child, encoding='UTF-8',
|
||||
xml_declaration=False, with_tail=False)
|
||||
raw = close_self_closing_tags(raw)
|
||||
if len(raw) > CHUNK_SIZE and child.get('aid', None):
|
||||
self.step_into_tag(child, chunks)
|
||||
if child.tail and child.tail.strip(): # Leave pure whitespace
|
||||
chunks.extend(self.chunk_up_text(child.tail))
|
||||
child.tail = None
|
||||
else:
|
||||
if len(raw) > CHUNK_SIZE:
|
||||
self.log.warn('Tag %s has no aid and a too large chunk'
|
||||
' size. Adding anyway.'%child.tag)
|
||||
chunks.append(Chunk(raw))
|
||||
if child.tail:
|
||||
chunks.extend(self.chunk_up_text(child.tail))
|
||||
tag.remove(child)
|
||||
|
||||
if len(chunks) <= first_chunk_idx and chunks:
|
||||
raise ValueError('Stepped into a tag that generated no chunks.')
|
||||
|
||||
# Mark the first and last chunks of this tag
|
||||
if chunks:
|
||||
chunks[first_chunk_idx].starts_tags.append(aid)
|
||||
chunks[-1].ends_tags.append(aid)
|
||||
|
||||
def chunk_up_text(self, text):
|
||||
text = text.encode('utf-8')
|
||||
ans = []
|
||||
|
||||
def split_multibyte_text(raw):
|
||||
if len(raw) <= CHUNK_SIZE:
|
||||
return raw, b''
|
||||
l = raw[:CHUNK_SIZE]
|
||||
l = l.decode('utf-8', 'ignore').encode('utf-8')
|
||||
return l, raw[len(l):]
|
||||
|
||||
start, rest = split_multibyte_text(text)
|
||||
ans.append(start)
|
||||
while rest:
|
||||
start, rest = split_multibyte_text(rest)
|
||||
ans.append(b'<span class="AmznBigTextBlock">' + start + '</span>')
|
||||
return [Chunk(x) for x in ans]
|
||||
|
||||
def merge_small_chunks(self, chunks):
|
||||
ans = chunks[:1]
|
||||
for chunk in chunks[1:]:
|
||||
prev = ans[-1]
|
||||
if (
|
||||
chunk.starts_tags or # Starts a tag in the skel
|
||||
len(chunk) + len(prev) > CHUNK_SIZE or # Too large
|
||||
prev.ends_tags # Prev chunk ended a tag
|
||||
):
|
||||
ans.append(chunk)
|
||||
else:
|
||||
prev.merge(chunk)
|
||||
return ans
|
||||
|
@ -357,7 +357,21 @@ def urlnormalize(href):
|
||||
parts = (urlquote(part) for part in parts)
|
||||
return urlunparse(parts)
|
||||
|
||||
|
||||
def extract(elem):
|
||||
"""
|
||||
Removes this element from the tree, including its children and
|
||||
text. The tail text is joined to the previous element or
|
||||
parent.
|
||||
"""
|
||||
parent = elem.getparent()
|
||||
if parent is not None:
|
||||
if elem.tail:
|
||||
previous = elem.getprevious()
|
||||
if previous is None:
|
||||
parent.text = (parent.text or '') + elem.tail
|
||||
else:
|
||||
previous.tail = (previous.tail or '') + elem.tail
|
||||
parent.remove(elem)
|
||||
|
||||
class DummyHandler(logging.Handler):
|
||||
|
||||
|
@ -367,3 +367,17 @@ class EbookIterator(object):
|
||||
for x in self.delete_on_exit:
|
||||
if os.path.exists(x):
|
||||
os.remove(x)
|
||||
|
||||
def get_preprocess_html(path_to_ebook, output):
|
||||
from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
|
||||
iterator = EbookIterator(path_to_ebook)
|
||||
iterator.__enter__(only_input_plugin=True)
|
||||
preprocessor = HTMLPreProcessor(None, False)
|
||||
with open(output, 'wb') as out:
|
||||
for path in iterator.spine:
|
||||
with open(path, 'rb') as f:
|
||||
html = f.read().decode('utf-8', 'replace')
|
||||
html = preprocessor(html, get_preprocess_html=True)
|
||||
out.write(html.encode('utf-8'))
|
||||
out.write(b'\n\n' + b'-'*80 + b'\n\n')
|
||||
|
||||
|
@ -731,7 +731,7 @@ class Style(object):
|
||||
parent = self._get_parent()
|
||||
if parent is not None:
|
||||
pcss = parent._style.get('text-decoration', None)
|
||||
if css in ('none', None) and pcss not in (None, 'none'):
|
||||
if css in ('none', None, 'inherit') and pcss not in (None, 'none'):
|
||||
return pcss
|
||||
return css
|
||||
|
||||
|
11
src/calibre/ebooks/pdb/haodoo/__init__.py
Normal file
11
src/calibre/ebooks/pdb/haodoo/__init__.py
Normal file
@ -0,0 +1,11 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
|
||||
|
@ -641,6 +641,26 @@ def choose_files(window, name, title,
|
||||
return fd.get_files()
|
||||
return None
|
||||
|
||||
def choose_save_file(window, name, title, filters=[], all_files=True):
|
||||
'''
|
||||
Ask user to choose a file to save to. Can be a non-existent file.
|
||||
:param filters: list of allowable extensions. Each element of the list
|
||||
must be a 2-tuple with first element a string describing
|
||||
the type of files to be filtered and second element a list
|
||||
of extensions.
|
||||
:param all_files: If True add All files to filters.
|
||||
'''
|
||||
mode = QFileDialog.AnyFile
|
||||
fd = FileDialog(title=title, name=name, filters=filters,
|
||||
parent=window, add_all_files_filter=all_files, mode=mode)
|
||||
fd.setParent(None)
|
||||
ans = None
|
||||
if fd.accepted:
|
||||
ans = fd.get_files()
|
||||
if ans:
|
||||
ans = ans[0]
|
||||
return ans
|
||||
|
||||
def choose_images(window, name, title, select_only_single_file=True):
|
||||
mode = QFileDialog.ExistingFile if select_only_single_file else QFileDialog.ExistingFiles
|
||||
fd = FileDialog(title=title, name=name,
|
||||
|
@ -12,7 +12,7 @@ from PyQt4.Qt import (QMenu, Qt, QInputDialog, QToolButton, QDialog,
|
||||
QDialogButtonBox, QGridLayout, QLabel, QLineEdit, QIcon, QSize,
|
||||
QCoreApplication)
|
||||
|
||||
from calibre import isbytestring
|
||||
from calibre import isbytestring, sanitize_file_name_unicode
|
||||
from calibre.constants import filesystem_encoding, iswindows
|
||||
from calibre.utils.config import prefs
|
||||
from calibre.gui2 import (gprefs, warning_dialog, Dispatcher, error_dialog,
|
||||
@ -275,7 +275,7 @@ class ChooseLibraryAction(InterfaceAction):
|
||||
'<p>'+_('Choose a new name for the library <b>%s</b>. ')%name +
|
||||
'<p>'+_('Note that the actual library folder will be renamed.'),
|
||||
text=name)
|
||||
newname = unicode(newname)
|
||||
newname = sanitize_file_name_unicode(unicode(newname))
|
||||
if not ok or not newname or newname == name:
|
||||
return
|
||||
newloc = os.path.join(base, newname)
|
||||
|
@ -233,22 +233,26 @@ class Widget(QWidget):
|
||||
pass
|
||||
|
||||
def setup_help(self, help_provider):
|
||||
w = textwrap.TextWrapper(80)
|
||||
for name in self._options:
|
||||
g = getattr(self, 'opt_'+name, None)
|
||||
if g is None:
|
||||
continue
|
||||
help = help_provider(name)
|
||||
if not help: continue
|
||||
if self.setup_help_handler(g, help): continue
|
||||
g._help = help
|
||||
htext = u'<div>%s</div>'%prepare_string_for_xml(
|
||||
'\n'.join(w.wrap(help)))
|
||||
self.setup_widget_help(g)
|
||||
|
||||
def setup_widget_help(self, g):
|
||||
w = textwrap.TextWrapper(80)
|
||||
htext = u'<div>%s</div>'%prepare_string_for_xml('\n'.join(w.wrap(g._help)))
|
||||
g.setToolTip(htext)
|
||||
g.setWhatsThis(htext)
|
||||
g.__class__.enterEvent = lambda obj, event: self.set_help(getattr(obj, '_help', obj.toolTip()))
|
||||
|
||||
|
||||
def set_value_handler(self, g, val):
|
||||
'Return True iff you handle setting the value for g'
|
||||
return False
|
||||
|
||||
def post_set_value(self, g, val):
|
||||
@ -260,6 +264,9 @@ class Widget(QWidget):
|
||||
def post_get_value(self, g):
|
||||
pass
|
||||
|
||||
def setup_help_handler(self, g, help):
|
||||
return False
|
||||
|
||||
def break_cycles(self):
|
||||
self.db = None
|
||||
|
||||
|
@ -6,17 +6,16 @@ __docformat__ = 'restructuredtext en'
|
||||
|
||||
import re, os
|
||||
|
||||
from PyQt4.QtCore import SIGNAL, Qt, pyqtSignal
|
||||
from PyQt4.QtGui import (QDialog, QWidget, QDialogButtonBox,
|
||||
QBrush, QTextCursor, QTextEdit)
|
||||
from PyQt4.Qt import (QDialog, QWidget, QDialogButtonBox,
|
||||
QBrush, QTextCursor, QTextEdit, QByteArray, Qt, pyqtSignal)
|
||||
|
||||
from calibre.gui2.convert.regex_builder_ui import Ui_RegexBuilder
|
||||
from calibre.gui2.convert.xexp_edit_ui import Ui_Form as Ui_Edit
|
||||
from calibre.gui2 import error_dialog, choose_files
|
||||
from calibre.ebooks.oeb.iterator import EbookIterator
|
||||
from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
|
||||
from calibre.gui2 import error_dialog, choose_files, gprefs
|
||||
from calibre.gui2.dialogs.choose_format import ChooseFormatDialog
|
||||
from calibre.constants import iswindows
|
||||
from calibre.utils.ipc.simple_worker import fork_job, WorkerError
|
||||
from calibre.ptempfile import TemporaryFile
|
||||
|
||||
class RegexBuilder(QDialog, Ui_RegexBuilder):
|
||||
|
||||
@ -28,7 +27,8 @@ class RegexBuilder(QDialog, Ui_RegexBuilder):
|
||||
self.regex_valid()
|
||||
|
||||
if not db or not book_id:
|
||||
self.button_box.addButton(QDialogButtonBox.Open)
|
||||
button = self.button_box.addButton(QDialogButtonBox.Open)
|
||||
button.clicked.connect(self.open_clicked)
|
||||
elif not doc and not self.select_format(db, book_id):
|
||||
self.cancelled = True
|
||||
return
|
||||
@ -37,13 +37,23 @@ class RegexBuilder(QDialog, Ui_RegexBuilder):
|
||||
self.preview.setPlainText(doc)
|
||||
|
||||
self.cancelled = False
|
||||
self.connect(self.button_box, SIGNAL('clicked(QAbstractButton*)'), self.button_clicked)
|
||||
self.connect(self.regex, SIGNAL('textChanged(QString)'), self.regex_valid)
|
||||
self.connect(self.test, SIGNAL('clicked()'), self.do_test)
|
||||
self.connect(self.previous, SIGNAL('clicked()'), self.goto_previous)
|
||||
self.connect(self.next, SIGNAL('clicked()'), self.goto_next)
|
||||
self.button_box.accepted.connect(self.accept)
|
||||
self.regex.textChanged[str].connect(self.regex_valid)
|
||||
for src, slot in (('test', 'do'), ('previous', 'goto'), ('next',
|
||||
'goto')):
|
||||
getattr(self, src).clicked.connect(getattr(self, '%s_%s'%(slot,
|
||||
src)))
|
||||
self.test.setDefault(True)
|
||||
|
||||
self.match_locs = []
|
||||
geom = gprefs.get('regex_builder_geometry', None)
|
||||
if geom is not None:
|
||||
self.restoreGeometry(QByteArray(geom))
|
||||
self.finished.connect(self.save_state)
|
||||
|
||||
def save_state(self, result):
|
||||
geom = bytearray(self.saveGeometry())
|
||||
gprefs['regex_builder_geometry'] = geom
|
||||
|
||||
def regex_valid(self):
|
||||
regex = unicode(self.regex.text())
|
||||
@ -129,6 +139,8 @@ class RegexBuilder(QDialog, Ui_RegexBuilder):
|
||||
d.exec_()
|
||||
if d.result() == QDialog.Accepted:
|
||||
format = d.format()
|
||||
else:
|
||||
return False
|
||||
|
||||
if not format:
|
||||
error_dialog(self, _('No formats available'),
|
||||
@ -159,25 +171,27 @@ class RegexBuilder(QDialog, Ui_RegexBuilder):
|
||||
return True
|
||||
|
||||
def open_book(self, pathtoebook):
|
||||
self.iterator = EbookIterator(pathtoebook)
|
||||
self.iterator.__enter__(only_input_plugin=True)
|
||||
text = [u'']
|
||||
preprocessor = HTMLPreProcessor(None, False)
|
||||
for path in self.iterator.spine:
|
||||
with open(path, 'rb') as f:
|
||||
html = f.read().decode('utf-8', 'replace')
|
||||
html = preprocessor(html, get_preprocess_html=True)
|
||||
text.append(html)
|
||||
self.preview.setPlainText('\n---\n'.join(text))
|
||||
with TemporaryFile('_prepprocess_gui') as tf:
|
||||
err_msg = _('Failed to generate markup for testing. Click '
|
||||
'"Show Details" to learn more.')
|
||||
try:
|
||||
fork_job('calibre.ebooks.oeb.iterator', 'get_preprocess_html',
|
||||
(pathtoebook, tf))
|
||||
except WorkerError as e:
|
||||
return error_dialog(self, _('Failed to generate preview'),
|
||||
err_msg, det_msg=e.orig_tb, show=True)
|
||||
except:
|
||||
import traceback
|
||||
return error_dialog(self, _('Failed to generate preview'),
|
||||
err_msg, det_msg=traceback.format_exc(), show=True)
|
||||
with open(tf, 'rb') as f:
|
||||
self.preview.setPlainText(f.read().decode('utf-8'))
|
||||
|
||||
def button_clicked(self, button):
|
||||
if button == self.button_box.button(QDialogButtonBox.Open):
|
||||
def open_clicked(self):
|
||||
files = choose_files(self, 'regexp tester dialog', _('Open book'),
|
||||
select_only_single_file=True)
|
||||
if files:
|
||||
self.open_book(files[0])
|
||||
if button == self.button_box.button(QDialogButtonBox.Ok):
|
||||
self.accept()
|
||||
|
||||
def doc(self):
|
||||
return unicode(self.preview.toPlainText())
|
||||
@ -194,7 +208,7 @@ class RegexEdit(QWidget, Ui_Edit):
|
||||
self.db = None
|
||||
self.doc_cache = None
|
||||
|
||||
self.connect(self.button, SIGNAL('clicked()'), self.builder)
|
||||
self.button.clicked.connect(self.builder)
|
||||
|
||||
def builder(self):
|
||||
bld = RegexBuilder(self.db, self.book_id, self.edit.text(), self.doc_cache, self)
|
||||
@ -226,6 +240,9 @@ class RegexEdit(QWidget, Ui_Edit):
|
||||
def set_doc(self, doc):
|
||||
self.doc_cache = doc
|
||||
|
||||
def set_regex(self, regex):
|
||||
self.edit.setText(regex)
|
||||
|
||||
def break_cycles(self):
|
||||
self.db = self.doc_cache = None
|
||||
|
||||
@ -237,5 +254,8 @@ class RegexEdit(QWidget, Ui_Edit):
|
||||
def regex(self):
|
||||
return self.text
|
||||
|
||||
def clear(self):
|
||||
self.edit.clear()
|
||||
|
||||
def check(self):
|
||||
return True
|
||||
|
@ -6,8 +6,8 @@
|
||||
<rect>
|
||||
<x>0</x>
|
||||
<y>0</y>
|
||||
<width>580</width>
|
||||
<height>503</height>
|
||||
<width>882</width>
|
||||
<height>605</height>
|
||||
</rect>
|
||||
</property>
|
||||
<property name="windowTitle">
|
||||
|
@ -1,14 +1,18 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>, 2012 Eli Algranti <idea00@hotmail.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import re
|
||||
import re, codecs, json
|
||||
|
||||
from PyQt4.Qt import Qt, QTableWidgetItem
|
||||
|
||||
from calibre.gui2.convert.search_and_replace_ui import Ui_Form
|
||||
from calibre.gui2.convert import Widget
|
||||
from calibre.gui2 import error_dialog
|
||||
from calibre.gui2 import (error_dialog, question_dialog, choose_files,
|
||||
choose_save_file)
|
||||
from calibre import as_unicode
|
||||
|
||||
class SearchAndReplaceWidget(Widget, Ui_Form):
|
||||
|
||||
@ -19,26 +23,115 @@ class SearchAndReplaceWidget(Widget, Ui_Form):
|
||||
STRIP_TEXT_FIELDS = False
|
||||
|
||||
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
|
||||
# Dummy attributes to fool the Widget() option handler code. We handle
|
||||
# everything in our *handler methods.
|
||||
for i in range(1, 4):
|
||||
x = 'sr%d_'%i
|
||||
for y in ('search', 'replace'):
|
||||
z = x + y
|
||||
setattr(self, 'opt_'+z, z)
|
||||
self.opt_search_replace = 'search_replace'
|
||||
|
||||
Widget.__init__(self, parent,
|
||||
['sr1_search', 'sr1_replace',
|
||||
['search_replace',
|
||||
'sr1_search', 'sr1_replace',
|
||||
'sr2_search', 'sr2_replace',
|
||||
'sr3_search', 'sr3_replace']
|
||||
)
|
||||
self.db, self.book_id = db, book_id
|
||||
self.initialize_options(get_option, get_help, db, book_id)
|
||||
self.opt_sr1_search.set_msg(_('&Search Regular Expression'))
|
||||
self.opt_sr1_search.set_book_id(book_id)
|
||||
self.opt_sr1_search.set_db(db)
|
||||
self.opt_sr2_search.set_msg(_('&Search Regular Expression'))
|
||||
self.opt_sr2_search.set_book_id(book_id)
|
||||
self.opt_sr2_search.set_db(db)
|
||||
self.opt_sr3_search.set_msg(_('&Search Regular Expression'))
|
||||
self.opt_sr3_search.set_book_id(book_id)
|
||||
self.opt_sr3_search.set_db(db)
|
||||
|
||||
self.opt_sr1_search.doc_update.connect(self.update_doc)
|
||||
self.opt_sr2_search.doc_update.connect(self.update_doc)
|
||||
self.opt_sr3_search.doc_update.connect(self.update_doc)
|
||||
self.sr_search.set_msg(_('&Search Regular Expression'))
|
||||
self.sr_search.set_book_id(book_id)
|
||||
self.sr_search.set_db(db)
|
||||
|
||||
self.sr_search.doc_update.connect(self.update_doc)
|
||||
|
||||
proto = QTableWidgetItem()
|
||||
proto.setFlags(Qt.ItemFlags(Qt.ItemIsSelectable + Qt.ItemIsEnabled))
|
||||
self.search_replace.setItemPrototype(proto)
|
||||
self.search_replace.setColumnCount(2)
|
||||
self.search_replace.setColumnWidth(0, 300)
|
||||
self.search_replace.setColumnWidth(1, 300)
|
||||
self.search_replace.setHorizontalHeaderLabels([
|
||||
_('Search Regular Expression'), _('Replacement Text')])
|
||||
|
||||
self.sr_add.clicked.connect(self.sr_add_clicked)
|
||||
self.sr_change.clicked.connect(self.sr_change_clicked)
|
||||
self.sr_remove.clicked.connect(self.sr_remove_clicked)
|
||||
self.sr_load.clicked.connect(self.sr_load_clicked)
|
||||
self.sr_save.clicked.connect(self.sr_save_clicked)
|
||||
self.search_replace.currentCellChanged.connect(self.sr_currentCellChanged)
|
||||
|
||||
self.initialize_options(get_option, get_help, db, book_id)
|
||||
|
||||
def sr_add_clicked(self):
|
||||
if self.sr_search.regex:
|
||||
row = self.sr_add_row(self.sr_search.regex, self.sr_replace.text())
|
||||
self.search_replace.setCurrentCell(row, 0)
|
||||
|
||||
def sr_add_row(self, search, replace):
|
||||
row = self.search_replace.rowCount()
|
||||
self.search_replace.setRowCount(row + 1)
|
||||
newItem = self.search_replace.itemPrototype().clone()
|
||||
newItem.setText(search)
|
||||
self.search_replace.setItem(row,0, newItem)
|
||||
newItem = self.search_replace.itemPrototype().clone()
|
||||
newItem.setText(replace)
|
||||
self.search_replace.setItem(row,1, newItem)
|
||||
return row
|
||||
|
||||
def sr_change_clicked(self):
|
||||
row = self.search_replace.currentRow()
|
||||
if row >= 0:
|
||||
self.search_replace.item(row, 0).setText(self.sr_search.regex)
|
||||
self.search_replace.item(row, 1).setText(self.sr_replace.text())
|
||||
self.search_replace.setCurrentCell(row, 0)
|
||||
|
||||
def sr_remove_clicked(self):
|
||||
row = self.search_replace.currentRow()
|
||||
if row >= 0:
|
||||
self.search_replace.removeRow(row)
|
||||
self.search_replace.setCurrentCell(row-1, 0)
|
||||
self.sr_search.clear()
|
||||
self.sr_replace.clear()
|
||||
|
||||
def sr_load_clicked(self):
|
||||
files = choose_files(self, 'sr_saved_patterns',
|
||||
_('Load Calibre Search-Replace definitions file'),
|
||||
filters=[
|
||||
(_('Calibre Search-Replace definitions file'), ['csr'])
|
||||
], select_only_single_file=True)
|
||||
if files:
|
||||
from calibre.ebooks.conversion.cli import read_sr_patterns
|
||||
try:
|
||||
self.set_value(self.opt_search_replace,
|
||||
read_sr_patterns(files[0]))
|
||||
except Exception as e:
|
||||
error_dialog(self, _('Failed to read'),
|
||||
_('Failed to load patterns from %s, click Show details'
|
||||
' to learn more.')%files[0], det_msg=as_unicode(e),
|
||||
show=True)
|
||||
|
||||
def sr_save_clicked(self):
|
||||
filename = choose_save_file(self, 'sr_saved_patterns',
|
||||
_('Save Calibre Search-Replace definitions file'),
|
||||
filters=[
|
||||
(_('Calibre Search-Replace definitions file'), ['csr'])
|
||||
])
|
||||
if filename:
|
||||
with codecs.open(filename, 'w', 'utf-8') as f:
|
||||
for search, replace in self.get_definitions():
|
||||
f.write(search + u'\n' + replace + u'\n\n')
|
||||
|
||||
def sr_currentCellChanged(self, row, column, previousRow, previousColumn) :
|
||||
if row >= 0:
|
||||
self.sr_change.setEnabled(True)
|
||||
self.sr_remove.setEnabled(True)
|
||||
self.sr_search.set_regex(self.search_replace.item(row, 0).text())
|
||||
self.sr_replace.setText(self.search_replace.item(row, 1).text())
|
||||
else:
|
||||
self.sr_change.setEnabled(False)
|
||||
self.sr_remove.setEnabled(False)
|
||||
|
||||
def break_cycles(self):
|
||||
Widget.break_cycles(self)
|
||||
@ -49,29 +142,128 @@ class SearchAndReplaceWidget(Widget, Ui_Form):
|
||||
except:
|
||||
pass
|
||||
|
||||
d(self.opt_sr1_search)
|
||||
d(self.opt_sr2_search)
|
||||
d(self.opt_sr3_search)
|
||||
d(self.sr_search)
|
||||
|
||||
self.opt_sr1_search.break_cycles()
|
||||
self.opt_sr2_search.break_cycles()
|
||||
self.opt_sr3_search.break_cycles()
|
||||
self.sr_search.break_cycles()
|
||||
|
||||
def update_doc(self, doc):
|
||||
self.opt_sr1_search.set_doc(doc)
|
||||
self.opt_sr2_search.set_doc(doc)
|
||||
self.opt_sr3_search.set_doc(doc)
|
||||
self.sr_search.set_doc(doc)
|
||||
|
||||
def pre_commit_check(self):
|
||||
for x in ('sr1_search', 'sr2_search', 'sr3_search'):
|
||||
x = getattr(self, 'opt_'+x)
|
||||
definitions = self.get_definitions()
|
||||
|
||||
# Verify the search/replace in the edit widgets has been
|
||||
# included to the list of search/replace definitions
|
||||
|
||||
edit_search = self.sr_search.regex
|
||||
|
||||
if edit_search:
|
||||
edit_replace = unicode(self.sr_replace.text())
|
||||
found = False
|
||||
for search, replace in definitions:
|
||||
if search == edit_search and replace == edit_replace:
|
||||
found = True
|
||||
break
|
||||
if not found and not question_dialog(self,
|
||||
_('Unused Search & Replace definition'),
|
||||
_('The search / replace definition being edited '
|
||||
' has not been added to the list of definitions. '
|
||||
'Do you wish to continue with the conversion '
|
||||
'(the definition will not be used)?')):
|
||||
return False
|
||||
|
||||
# Verify all search expressions are valid
|
||||
for search, replace in definitions:
|
||||
try:
|
||||
pat = unicode(x.regex)
|
||||
re.compile(pat)
|
||||
re.compile(search)
|
||||
except Exception as err:
|
||||
error_dialog(self, _('Invalid regular expression'),
|
||||
_('Invalid regular expression: %s')%err, show=True)
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
# Options handling
|
||||
|
||||
def connect_gui_obj_handler(self, g, slot):
|
||||
if g is self.opt_search_replace:
|
||||
self.search_replace.cellChanged.connect(slot)
|
||||
|
||||
def get_value_handler(self, g):
|
||||
if g is self.opt_search_replace:
|
||||
return json.dumps(self.get_definitions())
|
||||
return None
|
||||
|
||||
def get_definitions(self):
|
||||
ans = []
|
||||
for row in xrange(0, self.search_replace.rowCount()):
|
||||
colItems = []
|
||||
for col in xrange(0, self.search_replace.columnCount()):
|
||||
colItems.append(unicode(self.search_replace.item(row, col).text()))
|
||||
ans.append(colItems)
|
||||
return ans
|
||||
|
||||
def set_value_handler(self, g, val):
|
||||
if g is not self.opt_search_replace:
|
||||
return True
|
||||
|
||||
try:
|
||||
rowItems = json.loads(val)
|
||||
if not isinstance(rowItems, list):
|
||||
rowItems = []
|
||||
except:
|
||||
rowItems = []
|
||||
|
||||
if len(rowItems) == 0:
|
||||
self.search_replace.clearContents()
|
||||
|
||||
self.search_replace.setRowCount(len(rowItems))
|
||||
|
||||
for row, colItems in enumerate(rowItems):
|
||||
for col, cellValue in enumerate(colItems):
|
||||
newItem = self.search_replace.itemPrototype().clone()
|
||||
newItem.setText(cellValue)
|
||||
self.search_replace.setItem(row,col, newItem)
|
||||
return True
|
||||
|
||||
def apply_recommendations(self, recs):
|
||||
'''
|
||||
Handle the legacy sr* options that may have been previously saved. They
|
||||
are applied only if the new search_replace option has not been set in
|
||||
recs.
|
||||
'''
|
||||
new_val = None
|
||||
legacy = {}
|
||||
rest = {}
|
||||
for name, val in recs.items():
|
||||
if name == 'search_replace':
|
||||
new_val = val
|
||||
if name in getattr(recs, 'disabled_options', []):
|
||||
self.search_replace.setDisabled(True)
|
||||
elif name.startswith('sr'):
|
||||
legacy[name] = val if val else ''
|
||||
else:
|
||||
rest[name] = val
|
||||
|
||||
if rest:
|
||||
super(SearchAndReplaceWidget, self).apply_recommendations(rest)
|
||||
|
||||
self.set_value(self.opt_search_replace, None)
|
||||
if new_val is None and legacy:
|
||||
for i in range(1, 4):
|
||||
x = 'sr%d'%i
|
||||
s, r = x+'_search', x+'_replace'
|
||||
s, r = legacy.get(s, ''), legacy.get(r, '')
|
||||
if s:
|
||||
self.sr_add_row(s, r)
|
||||
if new_val is not None:
|
||||
self.set_value(self.opt_search_replace, new_val)
|
||||
|
||||
def setup_help_handler(self, g, help):
|
||||
if g is self.opt_search_replace:
|
||||
self.search_replace._help = _(
|
||||
'The list of search/replace definitions that will be applied '
|
||||
'to this conversion.')
|
||||
self.setup_widget_help(self.search_replace)
|
||||
return True
|
||||
|
||||
|
@ -6,7 +6,7 @@
|
||||
<rect>
|
||||
<x>0</x>
|
||||
<y>0</y>
|
||||
<width>468</width>
|
||||
<width>667</width>
|
||||
<height>451</height>
|
||||
</rect>
|
||||
</property>
|
||||
@ -32,14 +32,14 @@
|
||||
</sizepolicy>
|
||||
</property>
|
||||
<property name="title">
|
||||
<string>First expression</string>
|
||||
<string>Search/Replace Definition Edit</string>
|
||||
</property>
|
||||
<layout class="QGridLayout" name="gridLayout_2">
|
||||
<property name="sizeConstraint">
|
||||
<enum>QLayout::SetMinimumSize</enum>
|
||||
</property>
|
||||
<item row="0" column="0">
|
||||
<widget class="RegexEdit" name="opt_sr1_search" native="true">
|
||||
<widget class="RegexEdit" name="sr_search" native="true">
|
||||
<property name="sizePolicy">
|
||||
<sizepolicy hsizetype="Minimum" vsizetype="Preferred">
|
||||
<horstretch>0</horstretch>
|
||||
@ -60,12 +60,12 @@
|
||||
<string>&Replacement Text</string>
|
||||
</property>
|
||||
<property name="buddy">
|
||||
<cstring>opt_sr1_replace</cstring>
|
||||
<cstring>sr_replace</cstring>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="2" column="0">
|
||||
<widget class="QLineEdit" name="opt_sr1_replace">
|
||||
<widget class="QLineEdit" name="sr_replace">
|
||||
<property name="sizePolicy">
|
||||
<sizepolicy hsizetype="Minimum" vsizetype="Fixed">
|
||||
<horstretch>0</horstretch>
|
||||
@ -78,117 +78,98 @@
|
||||
</widget>
|
||||
</item>
|
||||
<item row="2" column="0">
|
||||
<widget class="QGroupBox" name="groupBox_2">
|
||||
<property name="sizePolicy">
|
||||
<sizepolicy hsizetype="Minimum" vsizetype="Preferred">
|
||||
<horstretch>0</horstretch>
|
||||
<verstretch>0</verstretch>
|
||||
</sizepolicy>
|
||||
<layout class="QHBoxLayout" name="horizontalLayout">
|
||||
<property name="spacing">
|
||||
<number>-1</number>
|
||||
</property>
|
||||
<property name="title">
|
||||
<string>Second Expression</string>
|
||||
<property name="leftMargin">
|
||||
<number>0</number>
|
||||
</property>
|
||||
<layout class="QGridLayout" name="gridLayout">
|
||||
<property name="sizeConstraint">
|
||||
<enum>QLayout::SetMinimumSize</enum>
|
||||
</property>
|
||||
<item row="0" column="0">
|
||||
<widget class="RegexEdit" name="opt_sr2_search" native="true">
|
||||
<property name="sizePolicy">
|
||||
<sizepolicy hsizetype="Minimum" vsizetype="Preferred">
|
||||
<horstretch>0</horstretch>
|
||||
<verstretch>0</verstretch>
|
||||
</sizepolicy>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="1" column="0">
|
||||
<widget class="QLabel" name="label_5">
|
||||
<property name="sizePolicy">
|
||||
<sizepolicy hsizetype="Minimum" vsizetype="Preferred">
|
||||
<horstretch>0</horstretch>
|
||||
<verstretch>0</verstretch>
|
||||
</sizepolicy>
|
||||
<item>
|
||||
<widget class="QPushButton" name="sr_add">
|
||||
<property name="toolTip">
|
||||
<string>Add the current expression to the list of expressions that will be applied</string>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string>&Replacement Text</string>
|
||||
</property>
|
||||
<property name="buddy">
|
||||
<cstring>opt_sr2_replace</cstring>
|
||||
<string>&Add</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="2" column="0">
|
||||
<widget class="QLineEdit" name="opt_sr2_replace">
|
||||
<property name="sizePolicy">
|
||||
<sizepolicy hsizetype="Minimum" vsizetype="Fixed">
|
||||
<horstretch>0</horstretch>
|
||||
<verstretch>0</verstretch>
|
||||
</sizepolicy>
|
||||
<item>
|
||||
<widget class="QPushButton" name="sr_change">
|
||||
<property name="enabled">
|
||||
<bool>false</bool>
|
||||
</property>
|
||||
<property name="toolTip">
|
||||
<string>Edit the currently selected expression</string>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string>&Change</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QPushButton" name="sr_remove">
|
||||
<property name="enabled">
|
||||
<bool>false</bool>
|
||||
</property>
|
||||
<property name="toolTip">
|
||||
<string>Remove the currently selected expression</string>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string>&Remove</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QFrame" name="frame">
|
||||
<property name="frameShape">
|
||||
<enum>QFrame::VLine</enum>
|
||||
</property>
|
||||
<property name="frameShadow">
|
||||
<enum>QFrame::Raised</enum>
|
||||
</property>
|
||||
<property name="lineWidth">
|
||||
<number>3</number>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QPushButton" name="sr_load">
|
||||
<property name="toolTip">
|
||||
<string>Load a listof expression from a previously saved file</string>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string>&Load</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QPushButton" name="sr_save">
|
||||
<property name="toolTip">
|
||||
<string>Save this list of expression so that you can re-use it easily</string>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string>&Save</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="3" column="0">
|
||||
<widget class="QGroupBox" name="groupBox_3">
|
||||
<property name="sizePolicy">
|
||||
<sizepolicy hsizetype="Minimum" vsizetype="Preferred">
|
||||
<horstretch>0</horstretch>
|
||||
<verstretch>0</verstretch>
|
||||
</sizepolicy>
|
||||
<widget class="QTableWidget" name="search_replace">
|
||||
<property name="selectionMode">
|
||||
<enum>QAbstractItemView::SingleSelection</enum>
|
||||
</property>
|
||||
<property name="title">
|
||||
<string>Third expression</string>
|
||||
<property name="selectionBehavior">
|
||||
<enum>QAbstractItemView::SelectRows</enum>
|
||||
</property>
|
||||
<layout class="QGridLayout" name="gridLayout_3">
|
||||
<property name="sizeConstraint">
|
||||
<enum>QLayout::SetMinimumSize</enum>
|
||||
</property>
|
||||
<item row="0" column="0">
|
||||
<widget class="RegexEdit" name="opt_sr3_search" native="true">
|
||||
<property name="sizePolicy">
|
||||
<sizepolicy hsizetype="Minimum" vsizetype="Preferred">
|
||||
<horstretch>0</horstretch>
|
||||
<verstretch>0</verstretch>
|
||||
</sizepolicy>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="1" column="0">
|
||||
<widget class="QLabel" name="label_6">
|
||||
<property name="sizePolicy">
|
||||
<sizepolicy hsizetype="Minimum" vsizetype="Preferred">
|
||||
<horstretch>0</horstretch>
|
||||
<verstretch>0</verstretch>
|
||||
</sizepolicy>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string>&Replacement Text</string>
|
||||
</property>
|
||||
<property name="buddy">
|
||||
<cstring>opt_sr3_replace</cstring>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="2" column="0">
|
||||
<widget class="QLineEdit" name="opt_sr3_replace">
|
||||
<property name="sizePolicy">
|
||||
<sizepolicy hsizetype="Minimum" vsizetype="Fixed">
|
||||
<horstretch>0</horstretch>
|
||||
<verstretch>0</verstretch>
|
||||
</sizepolicy>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="0" column="0">
|
||||
<widget class="QLabel" name="label">
|
||||
<property name="text">
|
||||
<string><p>Search and replace uses <i>regular expressions</i>. See the <a href="http://manual.calibre-ebook.com/regexp.html">regular expressions tutorial</a> to get started with regular expressions. Also clicking the wizard buttons below will allow you to test your regular expression against the current input document.</string>
|
||||
<string><p>Search and replace uses <i>regular expressions</i>. See the <a href="http://manual.calibre-ebook.com/regexp.html">regular expressions tutorial</a> to get started with regular expressions. Also clicking the wizard button below will allow you to test your regular expression against the current input document. When you are happy with an expression, click the Add button to add it to the list of expressions.</string>
|
||||
</property>
|
||||
<property name="wordWrap">
|
||||
<bool>true</bool>
|
||||
|
@ -173,7 +173,7 @@ class SearchDialog(QDialog, Ui_Dialog):
|
||||
self.box_last_values['tags_box'] = tags
|
||||
tags = [t.strip() for t in tags.split(',') if t.strip()]
|
||||
if tags:
|
||||
tags = ['tags:"=' + t + '"' for t in tags]
|
||||
tags = ['tags:"' + self.mc + t + '"' for t in tags]
|
||||
ans.append('(' + ' or '.join(tags) + ')')
|
||||
general = unicode(self.general_box.text())
|
||||
self.box_last_values['general_box'] = general
|
||||
|
@ -232,8 +232,8 @@ def download(all_ids, tf, db, do_identify, covers, ensure_fields,
|
||||
metadata.iteritems()}
|
||||
try:
|
||||
ret = fork_job('calibre.ebooks.metadata.sources.worker', 'main',
|
||||
(do_identify, covers, metadata, ensure_fields),
|
||||
cwd=tdir, abort=abort, heartbeat=heartbeat, no_output=True)
|
||||
(do_identify, covers, metadata, ensure_fields, tdir),
|
||||
abort=abort, heartbeat=heartbeat, no_output=True)
|
||||
except WorkerError as e:
|
||||
if e.orig_tb:
|
||||
raise Exception('Failed to download metadata. Original '
|
||||
|
@ -573,8 +573,9 @@ class CoverWorker(Thread): # {{{
|
||||
try:
|
||||
res = fork_job('calibre.ebooks.metadata.sources.worker',
|
||||
'single_covers',
|
||||
(self.title, self.authors, self.identifiers, self.caches),
|
||||
cwd=tdir, no_output=True, abort=self.abort)
|
||||
(self.title, self.authors, self.identifiers, self.caches,
|
||||
tdir),
|
||||
no_output=True, abort=self.abort)
|
||||
self.log.append_dump(res['result'])
|
||||
finally:
|
||||
self.keep_going = False
|
||||
|
@ -2653,6 +2653,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
||||
def rename_author(self, old_id, new_name):
|
||||
# Make sure that any commas in new_name are changed to '|'!
|
||||
new_name = new_name.replace(',', '|').strip()
|
||||
if not new_name:
|
||||
new_name = _('Unknown')
|
||||
|
||||
# Get the list of books we must fix up, one way or the other
|
||||
# Save the list so we can use it twice
|
||||
|
@ -112,8 +112,11 @@ def html_to_lxml(raw):
|
||||
for a in remove:
|
||||
del x.attrib[a]
|
||||
raw = etree.tostring(root, encoding=None)
|
||||
try:
|
||||
return etree.fromstring(raw)
|
||||
|
||||
except:
|
||||
from calibre.ebooks.oeb.parse_utils import _html4_parse
|
||||
return _html4_parse(raw)
|
||||
|
||||
def CATALOG_ENTRY(item, item_kind, base_href, version, updated,
|
||||
ignore_count=False, add_kind=False):
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -4,9 +4,9 @@
|
||||
#
|
||||
msgid ""
|
||||
msgstr ""
|
||||
"Project-Id-Version: calibre 0.8.46\n"
|
||||
"POT-Creation-Date: 2012-04-08 15:08+IST\n"
|
||||
"PO-Revision-Date: 2012-04-08 15:08+IST\n"
|
||||
"Project-Id-Version: calibre 0.8.47\n"
|
||||
"POT-Creation-Date: 2012-04-13 09:24+IST\n"
|
||||
"PO-Revision-Date: 2012-04-13 09:24+IST\n"
|
||||
"Last-Translator: Automatically generated\n"
|
||||
"Language-Team: LANGUAGE\n"
|
||||
"MIME-Version: 1.0\n"
|
||||
@ -38,7 +38,7 @@ msgstr ""
|
||||
#: /home/kovid/work/calibre/src/calibre/devices/prs505/sony_cache.py:661
|
||||
#: /home/kovid/work/calibre/src/calibre/devices/prst1/driver.py:337
|
||||
#: /home/kovid/work/calibre/src/calibre/devices/prst1/driver.py:338
|
||||
#: /home/kovid/work/calibre/src/calibre/devices/usbms/driver.py:489
|
||||
#: /home/kovid/work/calibre/src/calibre/devices/usbms/driver.py:493
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/chm/metadata.py:57
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/conversion/plugins/chm_input.py:109
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/conversion/plugins/chm_input.py:112
|
||||
@ -78,7 +78,7 @@ msgstr ""
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/mobi.py:472
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/opf2.py:1134
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/opf2.py:1245
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/pdb.py:41
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/pdb.py:44
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/pdf.py:29
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/plucker.py:25
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/pml.py:23
|
||||
@ -4010,6 +4010,10 @@ msgstr ""
|
||||
msgid "Generating %s catalog..."
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/actions/catalog.py:71
|
||||
msgid "Catalog generation complete, with warnings."
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/actions/catalog.py:86
|
||||
msgid "Catalog generated."
|
||||
msgstr ""
|
||||
@ -4128,7 +4132,7 @@ msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/actions/choose_library.py:283
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/plugin_updater.py:726
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:201
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:204
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/preferences/plugins.py:308
|
||||
msgid "Already exists"
|
||||
msgstr ""
|
||||
@ -4343,7 +4347,7 @@ msgstr ""
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/plugin_updater.py:674
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/restore_library.py:93
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/tag_list_editor.py:216
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:371
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:374
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/jobs.py:597
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/jobs.py:607
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/preferences/columns.py:102
|
||||
@ -4590,7 +4594,7 @@ msgstr ""
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/actions/edit_metadata.py:101
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dnd.py:84
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:507
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:817
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:818
|
||||
msgid "Download failed"
|
||||
msgstr ""
|
||||
|
||||
@ -4618,7 +4622,7 @@ msgid "Download complete"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/actions/edit_metadata.py:121
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:879
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:880
|
||||
msgid "Download log"
|
||||
msgstr ""
|
||||
|
||||
@ -8107,15 +8111,15 @@ msgid "Copied"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:141
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:872
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:873
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/viewer/main_ui.py:205
|
||||
msgid "Copy to clipboard"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:189
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:244
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:936
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:1042
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:937
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:1043
|
||||
msgid "View log"
|
||||
msgstr ""
|
||||
|
||||
@ -9708,90 +9712,98 @@ msgstr ""
|
||||
msgid "&Preview {0}"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:141
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:112
|
||||
msgid "No recipes"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:113
|
||||
msgid "No custom recipes created."
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:144
|
||||
msgid "No recipe selected"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:146
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:149
|
||||
#, python-format
|
||||
msgid "The attached file: %(fname)s is a recipe to download %(title)s."
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:149
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:152
|
||||
msgid "Recipe for "
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:166
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:177
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:169
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:180
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles_ui.py:265
|
||||
msgid "Switch to Advanced mode"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:172
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:180
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:175
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:183
|
||||
msgid "Switch to Basic mode"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:190
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:193
|
||||
msgid "Feed must have a title"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:191
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:194
|
||||
msgid "The feed must have a title"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:195
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:198
|
||||
msgid "Feed must have a URL"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:196
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:199
|
||||
#, python-format
|
||||
msgid "The feed %s must have a URL"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:202
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:205
|
||||
msgid "This feed has already been added to the recipe"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:244
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:253
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:340
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:247
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:256
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:343
|
||||
msgid "Invalid input"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:245
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:254
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:341
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:248
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:257
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:344
|
||||
#, python-format
|
||||
msgid "<p>Could not create recipe. Error:<br>%s"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:258
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:317
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:344
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:261
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:320
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:347
|
||||
msgid "Replace recipe?"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:259
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:318
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:345
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:262
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:321
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:348
|
||||
#, python-format
|
||||
msgid "A custom recipe named %s already exists. Do you want to replace it?"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:285
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:288
|
||||
msgid "Choose builtin recipe"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:331
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:334
|
||||
msgid "Choose a recipe file"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:332
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:335
|
||||
msgid "Recipes"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:372
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles.py:375
|
||||
msgid "You will lose any unsaved changes. To save your changes, click the Add/Update recipe button. Continue?"
|
||||
msgstr ""
|
||||
|
||||
@ -10524,7 +10536,7 @@ msgid "Previous Page"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/lrf_renderer/main_ui.py:133
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:933
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:934
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/store/web_store_dialog_ui.py:62
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/viewer/main_ui.py:193
|
||||
msgid "Back"
|
||||
@ -10975,7 +10987,7 @@ msgid "Edit Metadata"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single.py:66
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:926
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:927
|
||||
#: /home/kovid/work/calibre/src/calibre/library/server/browse.py:107
|
||||
#: /home/kovid/work/calibre/src/calibre/web/feeds/templates.py:219
|
||||
#: /home/kovid/work/calibre/src/calibre/web/feeds/templates.py:410
|
||||
@ -11143,38 +11155,38 @@ msgstr ""
|
||||
msgid "Failed to find any books that match your search. Try making the search <b>less specific</b>. For example, use only the author's last name and a single distinctive word from the title.<p>To see the full log, click Show Details."
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:624
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:625
|
||||
msgid "Current cover"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:627
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:628
|
||||
msgid "Searching..."
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:787
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:788
|
||||
#, python-format
|
||||
msgid "Downloading covers for <b>%s</b>, please wait..."
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:818
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:819
|
||||
msgid "Failed to download any covers, click \"Show details\" for details."
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:824
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:825
|
||||
#, python-format
|
||||
msgid "Could not find any covers for <b>%s</b>"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:826
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:827
|
||||
#, python-format
|
||||
msgid "Found <b>%(num)d</b> covers of %(title)s. Pick the one you like best."
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:915
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:916
|
||||
msgid "Downloading metadata..."
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:1026
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:1027
|
||||
msgid "Downloading cover..."
|
||||
msgstr ""
|
||||
|
||||
@ -16461,7 +16473,7 @@ msgid "When searching for text without using lookup prefixes, as for example, Re
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/utils/config_base.py:420
|
||||
msgid "Choose columns to be searched when not using prefixes, as for example, when searching for Redd instead of title:Red. Enter a list of search/lookup names separated by commas. Only takes effect if you set the option to limit search columns above."
|
||||
msgid "Choose columns to be searched when not using prefixes, as for example, when searching for Red instead of title:Red. Enter a list of search/lookup names separated by commas. Only takes effect if you set the option to limit search columns above."
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/utils/formatter.py:31
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user