mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
various whitespace (extra-edit)
!partial 'E203,E222,E241,E271,E272'
This commit is contained in:
parent
41cee6f02d
commit
ed2930712d
@ -61,7 +61,7 @@ if use_archive:
|
|||||||
data = json.loads(raw)
|
data = json.loads(raw)
|
||||||
body = root.xpath('//body')[0]
|
body = root.xpath('//body')[0]
|
||||||
article = E(body, 'article')
|
article = E(body, 'article')
|
||||||
E(article, 'div', data['flyTitle'] , style='color: red; font-size:small; font-weight:bold;')
|
E(article, 'div', data['flyTitle'], style='color: red; font-size:small; font-weight:bold;')
|
||||||
E(article, 'h1', data['title'], title=safe_dict(data, 'url', 'canonical') or '')
|
E(article, 'h1', data['title'], title=safe_dict(data, 'url', 'canonical') or '')
|
||||||
E(article, 'div', data['rubric'], style='font-style: italic; color:#202020;')
|
E(article, 'div', data['rubric'], style='font-style: italic; color:#202020;')
|
||||||
try:
|
try:
|
||||||
@ -96,7 +96,7 @@ else:
|
|||||||
for child in tuple(body):
|
for child in tuple(body):
|
||||||
body.remove(child)
|
body.remove(child)
|
||||||
article = E(body, 'article')
|
article = E(body, 'article')
|
||||||
E(article, 'div', replace_entities(data['subheadline']) , style='color: red; font-size:small; font-weight:bold;')
|
E(article, 'div', replace_entities(data['subheadline']), style='color: red; font-size:small; font-weight:bold;')
|
||||||
E(article, 'h1', replace_entities(data['headline']))
|
E(article, 'h1', replace_entities(data['headline']))
|
||||||
E(article, 'div', replace_entities(data['description']), style='font-style: italic; color:#202020;')
|
E(article, 'div', replace_entities(data['description']), style='font-style: italic; color:#202020;')
|
||||||
if data['dateline'] is None:
|
if data['dateline'] is None:
|
||||||
|
@ -58,7 +58,7 @@ class AM730(BasicNewsRecipe):
|
|||||||
articles = []
|
articles = []
|
||||||
for aTag in soup.findAll('a',attrs={'class':'newsimglink'}):
|
for aTag in soup.findAll('a',attrs={'class':'newsimglink'}):
|
||||||
href = aTag.get('href',False)
|
href = aTag.get('href',False)
|
||||||
if not href.encode('utf-8').startswith(url.encode('utf-8')) :
|
if not href.encode('utf-8').startswith(url.encode('utf-8')):
|
||||||
continue # not in same section
|
continue # not in same section
|
||||||
|
|
||||||
title = href.split('/')[-1].split('-')[0]
|
title = href.split('/')[-1].split('-')[0]
|
||||||
|
@ -214,7 +214,7 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
divtags = soup.findAll('div', attrs={'id': ''})
|
divtags = soup.findAll('div', attrs={'id': ''})
|
||||||
if divtags:
|
if divtags:
|
||||||
for div in divtags:
|
for div in divtags:
|
||||||
del (div['id'])
|
del div['id']
|
||||||
|
|
||||||
pgall = soup.find('div', attrs={'id': 'storyphoto'})
|
pgall = soup.find('div', attrs={'id': 'storyphoto'})
|
||||||
if pgall is not None: # photo gallery perhaps
|
if pgall is not None: # photo gallery perhaps
|
||||||
|
@ -85,11 +85,11 @@ class Clarin(BasicNewsRecipe):
|
|||||||
self.oldest_article = float(d)
|
self.oldest_article = float(d)
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='p' , attrs={'class' : 'volanta'}),
|
dict(name='p', attrs={'class': 'volanta'}),
|
||||||
dict(name='h1' , attrs={'id': 'title'}),
|
dict(name='h1', attrs={'id': 'title'}),
|
||||||
dict(name='div', attrs={'class' : 'bajada'}),
|
dict(name='div', attrs={'class': 'bajada'}),
|
||||||
dict(name='div', attrs={'id' : 'galeria-trigger'}),
|
dict(name='div', attrs={'id': 'galeria-trigger'}),
|
||||||
dict(name='div', attrs={'class' : 'body-nota'})
|
dict(name='div', attrs={'class': 'body-nota'})
|
||||||
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -25,7 +25,7 @@ class ComputerWeekly(BasicNewsRecipe):
|
|||||||
('Financial services IT news', 'https://www.computerweekly.com/rss/Financial-services-IT-news.xml'),
|
('Financial services IT news', 'https://www.computerweekly.com/rss/Financial-services-IT-news.xml'),
|
||||||
('Public sector IT news', 'https://www.computerweekly.com/rss/Public-sector-IT-news.xml'),
|
('Public sector IT news', 'https://www.computerweekly.com/rss/Public-sector-IT-news.xml'),
|
||||||
('Enterprise software', 'https://www.computerweekly.com/rss/Enterprise-software.xml'),
|
('Enterprise software', 'https://www.computerweekly.com/rss/Enterprise-software.xml'),
|
||||||
('SME IT news' , 'https://www.computerweekly.com/rss/SME-IT-news.xml'),
|
('SME IT news', 'https://www.computerweekly.com/rss/SME-IT-news.xml'),
|
||||||
('Datacenter and cloud computing', 'https://www.computerweekly.com/rss/Datacentre-and-cloud-computing.xml'),
|
('Datacenter and cloud computing', 'https://www.computerweekly.com/rss/Datacentre-and-cloud-computing.xml'),
|
||||||
('Storage', 'https://www.computerweekly.com/rss/Storage.xml'),
|
('Storage', 'https://www.computerweekly.com/rss/Storage.xml'),
|
||||||
('Information Management', 'https://www.computerweekly.com/rss/Information-management.xml'),
|
('Information Management', 'https://www.computerweekly.com/rss/Information-management.xml'),
|
||||||
|
@ -27,25 +27,18 @@ class AdvancedUserRecipe(BasicNewsRecipe):
|
|||||||
remove_tags_after = dict(name='p', attrs={'class': ['firma-redazione']})
|
remove_tags_after = dict(name='p', attrs={'class': ['firma-redazione']})
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Politica',
|
(u'Politica', u'http://contropiano.org/news/politica-news/feed'),
|
||||||
u'http://contropiano.org/news/politica-news/feed'),
|
(u'Internazionale', u'http://contropiano.org/news/internazionale-news/feed'),
|
||||||
(u'Internazionale',
|
|
||||||
u'http://contropiano.org/news/internazionale-news/feed'),
|
|
||||||
(u'Aggiornamenti in breve', u'http://contropiano.org/news/aggiornamenti-in-breve/feed'),
|
(u'Aggiornamenti in breve', u'http://contropiano.org/news/aggiornamenti-in-breve/feed'),
|
||||||
(u'Economia',
|
(u'Economia', u'http://contropiano.org/news/news-economia/feed'),
|
||||||
u'http://contropiano.org/news/news-economia/feed'),
|
(u'Ambiente', u'http://contropiano.org/news/ambiente-news/feed'),
|
||||||
(u'Ambiente',
|
(u'Scienza', u'http://contropiano.org/news/scienza-news/feed'),
|
||||||
u'http://contropiano.org/news/ambiente-news/feed'),
|
(u'Cultura', u'http://contropiano.org/news/cultura-news/feed'),
|
||||||
(u'Scienza',
|
|
||||||
u'http://contropiano.org/news/scienza-news/feed'),
|
|
||||||
(u'Cultura',
|
|
||||||
u'http://contropiano.org/news/cultura-news/feed'),
|
|
||||||
(u'Locali', u'http://contropiano.org/regionali/feed'),
|
(u'Locali', u'http://contropiano.org/regionali/feed'),
|
||||||
(u'Lavoro', u'http://contropiano.org/news/lavoro-conflitto-news/feed'),
|
(u'Lavoro', u'http://contropiano.org/news/lavoro-conflitto-news/feed'),
|
||||||
(u'Malapolizia', u'http://contropiano.org/news/malapolizia-news/feed'),
|
(u'Malapolizia', u'http://contropiano.org/news/malapolizia-news/feed'),
|
||||||
(u'Interventi', u'http://contropiano.org/interventi/feed'),
|
(u'Interventi', u'http://contropiano.org/interventi/feed'),
|
||||||
(u'Documenti', u'http://contropiano.org/documenti/feed'),
|
(u'Documenti', u'http://contropiano.org/documenti/feed'),
|
||||||
(u'Vignette', u'http://contropiano.org/vignette/feed'),
|
(u'Vignette', u'http://contropiano.org/vignette/feed'),
|
||||||
(u'Altro',
|
(u'Altro', u'http://contropiano.org/altro/feed')
|
||||||
u'http://contropiano.org/altro/feed')
|
|
||||||
]
|
]
|
||||||
|
@ -57,7 +57,7 @@ def load_article_from_json(raw, root):
|
|||||||
data = json.loads(raw)
|
data = json.loads(raw)
|
||||||
body = root.xpath('//body')[0]
|
body = root.xpath('//body')[0]
|
||||||
article = E(body, 'article')
|
article = E(body, 'article')
|
||||||
E(article, 'div', data['flyTitle'] , style='color: red; font-size:small; font-weight:bold;')
|
E(article, 'div', data['flyTitle'], style='color: red; font-size:small; font-weight:bold;')
|
||||||
E(article, 'h1', data['title'], title=safe_dict(data, 'url', 'canonical') or '')
|
E(article, 'h1', data['title'], title=safe_dict(data, 'url', 'canonical') or '')
|
||||||
E(article, 'div', data['rubric'], style='font-style: italic; color:#202020;')
|
E(article, 'div', data['rubric'], style='font-style: italic; color:#202020;')
|
||||||
E(article, 'div', data['byline'], style='font-style: italic; color:#202020;')
|
E(article, 'div', data['byline'], style='font-style: italic; color:#202020;')
|
||||||
|
@ -214,7 +214,7 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
divtags = soup.findAll('div', attrs={'id': ''})
|
divtags = soup.findAll('div', attrs={'id': ''})
|
||||||
if divtags:
|
if divtags:
|
||||||
for div in divtags:
|
for div in divtags:
|
||||||
del (div['id'])
|
del div['id']
|
||||||
|
|
||||||
pgall = soup.find('div', attrs={'id': 'storyphoto'})
|
pgall = soup.find('div', attrs={'id': 'storyphoto'})
|
||||||
if pgall is not None: # photo gallery perhaps
|
if pgall is not None: # photo gallery perhaps
|
||||||
|
@ -125,7 +125,7 @@ img { background: none !important; float: none; margin: 0px; }
|
|||||||
break
|
break
|
||||||
elif strpost.startswith('<a href'):
|
elif strpost.startswith('<a href'):
|
||||||
url = post['href']
|
url = post['href']
|
||||||
if url.startswith(('http://www1.folha.uol.com.br/', 'https://www1.folha.uol.com.br/')) :
|
if url.startswith(('http://www1.folha.uol.com.br/', 'https://www1.folha.uol.com.br/')):
|
||||||
title = self.tag_to_string(post)
|
title = self.tag_to_string(post)
|
||||||
self.log()
|
self.log()
|
||||||
self.log('--> post: ', post)
|
self.log('--> post: ', post)
|
||||||
|
@ -188,7 +188,7 @@ class ForeignAffairsRecipe(BasicNewsRecipe):
|
|||||||
for by in soup.findAll(**classes('topper__byline topper__date font-style-italic')):
|
for by in soup.findAll(**classes('topper__byline topper__date font-style-italic')):
|
||||||
by.name = 'div'
|
by.name = 'div'
|
||||||
for img in soup.find_all('img', attrs={'srcset': True}):
|
for img in soup.find_all('img', attrs={'srcset': True}):
|
||||||
img['src'] = re.sub(r'_webp_small_\dx', '_webp_large_1x',img['srcset'].split()[0])
|
img['src'] = re.sub(r'_webp_small_\dx', '_webp_large_1x', img['srcset'].split()[0])
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def get_browser(self):
|
def get_browser(self):
|
||||||
|
@ -28,7 +28,7 @@ class LiveHindustan(BasicNewsRecipe):
|
|||||||
remove_tags_after = [classes('stry-bdy')]
|
remove_tags_after = [classes('stry-bdy')]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
('प्रमुख खबरें' ,'https://feed.livehindustan.com/rss/3127'),
|
('प्रमुख खबरें', 'https://feed.livehindustan.com/rss/3127'),
|
||||||
('देश', 'https://feed.livehindustan.com/rss/4911'),
|
('देश', 'https://feed.livehindustan.com/rss/4911'),
|
||||||
('विदेश', 'https://feed.livehindustan.com/rss/4913'),
|
('विदेश', 'https://feed.livehindustan.com/rss/4913'),
|
||||||
('ओपिनियन', 'https://feed.livehindustan.com/rss/5165'),
|
('ओपिनियन', 'https://feed.livehindustan.com/rss/5165'),
|
||||||
|
@ -27,8 +27,7 @@ class iHeuteRecipe(BasicNewsRecipe):
|
|||||||
|
|
||||||
remove_attributes = ['width', 'height']
|
remove_attributes = ['width', 'height']
|
||||||
remove_tags = [dict(name='div', attrs={'id': ['zooming']}),
|
remove_tags = [dict(name='div', attrs={'id': ['zooming']}),
|
||||||
dict(name='div', attrs={
|
dict(name='div', attrs={'class': ['related', 'mapa-wrapper']}),
|
||||||
'class': ['related', 'mapa-wrapper']}),
|
|
||||||
dict(name='table', attrs={'id': ['opener-img', 'portal']}),
|
dict(name='table', attrs={'id': ['opener-img', 'portal']}),
|
||||||
dict(name='table', attrs={'class': ['video-16ku9']})]
|
dict(name='table', attrs={'class': ['video-16ku9']})]
|
||||||
remove_tags_after = [
|
remove_tags_after = [
|
||||||
|
@ -10,29 +10,21 @@ class AdvancedUserRecipe1286477122(BasicNewsRecipe):
|
|||||||
__author__ = 'egilh'
|
__author__ = 'egilh'
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Politica & Palazzo',
|
(u'Politica & Palazzo', u'http://www.ilfattoquotidiano.it/category/politica-palazzo/feed/'),
|
||||||
u'http://www.ilfattoquotidiano.it/category/politica-palazzo/feed/'),
|
(u'Giustizia & impunit\xe0', u'http://www.ilfattoquotidiano.it/category/giustizia-impunita/feed/'),
|
||||||
(u'Giustizia & impunit\xe0',
|
|
||||||
u'http://www.ilfattoquotidiano.it/category/giustizia-impunita/feed/'),
|
|
||||||
(u'Media & regime', u'http://www.ilfattoquotidiano.it/category/media-regime/feed/'),
|
(u'Media & regime', u'http://www.ilfattoquotidiano.it/category/media-regime/feed/'),
|
||||||
(u'Economia & Lobby',
|
(u'Economia & Lobby', u'http://www.ilfattoquotidiano.it/category/economia-lobby/feed/'),
|
||||||
u'http://www.ilfattoquotidiano.it/category/economia-lobby/feed/'),
|
(u'Lavoro & precari', u'http://www.ilfattoquotidiano.it/category/lavoro-precari/feed/'),
|
||||||
(u'Lavoro & precari',
|
(u'Ambiente & Veleni', u'http://www.ilfattoquotidiano.it/category/ambiente-veleni/feed/'),
|
||||||
u'http://www.ilfattoquotidiano.it/category/lavoro-precari/feed/'),
|
(u'Sport & miliardi', u'http://www.ilfattoquotidiano.it/category/sport-miliardi/feed/'),
|
||||||
(u'Ambiente & Veleni',
|
|
||||||
u'http://www.ilfattoquotidiano.it/category/ambiente-veleni/feed/'),
|
|
||||||
(u'Sport & miliardi',
|
|
||||||
u'http://www.ilfattoquotidiano.it/category/sport-miliardi/feed/'),
|
|
||||||
(u'Cronaca', u'http://www.ilfattoquotidiano.it/category/cronaca/feed/'),
|
(u'Cronaca', u'http://www.ilfattoquotidiano.it/category/cronaca/feed/'),
|
||||||
(u'Mondo', u'http://www.ilfattoquotidiano.it/category/mondo/feed/'),
|
(u'Mondo', u'http://www.ilfattoquotidiano.it/category/mondo/feed/'),
|
||||||
(u'Societ\xe0', u'http://www.ilfattoquotidiano.it/category/societa/feed/'),
|
(u'Societ\xe0', u'http://www.ilfattoquotidiano.it/category/societa/feed/'),
|
||||||
(u'Scuola', u'http://www.ilfattoquotidiano.it/category/scuola/feed/'),
|
(u'Scuola', u'http://www.ilfattoquotidiano.it/category/scuola/feed/'),
|
||||||
(u'Tecno', u'http://www.ilfattoquotidiano.it/category/tecno/feed/'),
|
(u'Tecno', u'http://www.ilfattoquotidiano.it/category/tecno/feed/'),
|
||||||
(u'Terza pagina', u'http://www.ilfattoquotidiano.it/category/terza-pagina/feed/'),
|
(u'Terza pagina', u'http://www.ilfattoquotidiano.it/category/terza-pagina/feed/'),
|
||||||
(u'Piacere quotidiano',
|
(u'Piacere quotidiano', u'http://www.ilfattoquotidiano.it/category/piacere-quotidiano/feed/'),
|
||||||
u'http://www.ilfattoquotidiano.it/category/piacere-quotidiano/feed/'),
|
(u'Cervelli in fuga', u'http://www.ilfattoquotidiano.it/category/cervelli-in-fuga/feed/'),
|
||||||
(u'Cervelli in fuga',
|
|
||||||
u'http://www.ilfattoquotidiano.it/category/cervelli-in-fuga/feed/'),
|
|
||||||
(u'Documentati!', u'http://www.ilfattoquotidiano.it/category/documentati/feed/'),
|
(u'Documentati!', u'http://www.ilfattoquotidiano.it/category/documentati/feed/'),
|
||||||
(u'Misfatto', u'http://www.ilfattoquotidiano.it/category/misfatto/feed/')
|
(u'Misfatto', u'http://www.ilfattoquotidiano.it/category/misfatto/feed/')
|
||||||
]
|
]
|
||||||
|
@ -34,7 +34,7 @@ class IlManifesto(BasicNewsRecipe):
|
|||||||
startSoup = self.index_to_soup(startUrl)
|
startSoup = self.index_to_soup(startUrl)
|
||||||
lastEdition = startSoup.findAll('div', id='accordion_inedicola')[
|
lastEdition = startSoup.findAll('div', id='accordion_inedicola')[
|
||||||
1].find('a')['href']
|
1].find('a')['href']
|
||||||
del (startSoup)
|
del startSoup
|
||||||
self.manifesto_index = MANIFESTO_BASEURL + lastEdition
|
self.manifesto_index = MANIFESTO_BASEURL + lastEdition
|
||||||
urlsplit = lastEdition.split('/')
|
urlsplit = lastEdition.split('/')
|
||||||
self.manifesto_datestr = urlsplit[-1]
|
self.manifesto_datestr = urlsplit[-1]
|
||||||
@ -106,5 +106,5 @@ class IlManifesto(BasicNewsRecipe):
|
|||||||
summary = sommNode
|
summary = sommNode
|
||||||
|
|
||||||
template = "<html><head><title>%(title)s</title></head><body><h1>%(title)s</h1><h2>%(subtitle)s</h2><h3>%(author)s</h3><div style='font-size: x-large;'>%(summary)s</div><div>%(content)s</div></body></html>" # noqa: E501
|
template = "<html><head><title>%(title)s</title></head><body><h1>%(title)s</h1><h2>%(subtitle)s</h2><h3>%(author)s</h3><div style='font-size: x-large;'>%(summary)s</div><div>%(content)s</div></body></html>" # noqa: E501
|
||||||
del (bs)
|
del bs
|
||||||
return template % dict(title=title, subtitle=subtitle, author=author, summary=summary, content=content)
|
return template % dict(title=title, subtitle=subtitle, author=author, summary=summary, content=content)
|
||||||
|
@ -47,7 +47,7 @@ class jotdown(BasicNewsRecipe):
|
|||||||
dict(name='div', attrs={'id':'respond'})
|
dict(name='div', attrs={'id':'respond'})
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags_after = dict(name='div' , attrs={'id':'respond'})
|
remove_tags_after = dict(name='div', attrs={'id':'respond'})
|
||||||
|
|
||||||
preprocess_regexps = [
|
preprocess_regexps = [
|
||||||
# To change the small size of the text
|
# To change the small size of the text
|
||||||
|
@ -214,7 +214,7 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
divtags = soup.findAll('div', attrs={'id': ''})
|
divtags = soup.findAll('div', attrs={'id': ''})
|
||||||
if divtags:
|
if divtags:
|
||||||
for div in divtags:
|
for div in divtags:
|
||||||
del (div['id'])
|
del div['id']
|
||||||
|
|
||||||
pgall = soup.find('div', attrs={'id': 'storyphoto'})
|
pgall = soup.find('div', attrs={'id': 'storyphoto'})
|
||||||
if pgall is not None: # photo gallery perhaps
|
if pgall is not None: # photo gallery perhaps
|
||||||
|
@ -80,7 +80,7 @@ class NikkeiNet_paper_subscription(BasicNewsRecipe):
|
|||||||
sections = soup.findAll(attrs={'class': re.compile(r'.*cmn-article_title.*')})
|
sections = soup.findAll(attrs={'class': re.compile(r'.*cmn-article_title.*')})
|
||||||
|
|
||||||
for sect in sections:
|
for sect in sections:
|
||||||
sect_title = sect.find(attrs={'class' : re.compile(r'.*cmnc-((large)|(middle)|(small)).*')})
|
sect_title = sect.find(attrs={'class': re.compile(r'.*cmnc-((large)|(middle)|(small)).*')})
|
||||||
if sect_title is None:
|
if sect_title is None:
|
||||||
continue
|
continue
|
||||||
sect_title = sect_title.contents[0]
|
sect_title = sect_title.contents[0]
|
||||||
|
@ -214,7 +214,7 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
divtags = soup.findAll('div', attrs={'id': ''})
|
divtags = soup.findAll('div', attrs={'id': ''})
|
||||||
if divtags:
|
if divtags:
|
||||||
for div in divtags:
|
for div in divtags:
|
||||||
del (div['id'])
|
del div['id']
|
||||||
|
|
||||||
pgall = soup.find('div', attrs={'id': 'storyphoto'})
|
pgall = soup.find('div', attrs={'id': 'storyphoto'})
|
||||||
if pgall is not None: # photo gallery perhaps
|
if pgall is not None: # photo gallery perhaps
|
||||||
|
@ -76,22 +76,22 @@ class Pagina12(BasicNewsRecipe):
|
|||||||
return br
|
return br
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Diario de hoy' , u'https://www.pagina12.com.ar/rss/edicion-impresa'),
|
(u'Diario de hoy', u'https://www.pagina12.com.ar/rss/edicion-impresa'),
|
||||||
(u'Espectaculos' , u'https://www.pagina12.com.ar/rss/suplementos/cultura-y-espectaculos/notas'),
|
(u'Espectaculos', u'https://www.pagina12.com.ar/rss/suplementos/cultura-y-espectaculos/notas'),
|
||||||
(u'Radar' , u'https://www.pagina12.com.ar/rss/suplementos/radar/notas'),
|
(u'Radar', u'https://www.pagina12.com.ar/rss/suplementos/radar/notas'),
|
||||||
(u'Radar libros' , u'https://www.pagina12.com.ar/rss/suplementos/radar-libros/notas'),
|
(u'Radar libros', u'https://www.pagina12.com.ar/rss/suplementos/radar-libros/notas'),
|
||||||
(u'Cash' , u'https://www.pagina12.com.ar/rss/suplementos/cash/notas'),
|
(u'Cash', u'https://www.pagina12.com.ar/rss/suplementos/cash/notas'),
|
||||||
(u'NO' , u'https://www.pagina12.com.ar/rss/suplementos/no/notas'),
|
(u'NO', u'https://www.pagina12.com.ar/rss/suplementos/no/notas'),
|
||||||
(u'Las 12' , u'https://www.pagina12.com.ar/rss/suplementos/las12/notas'),
|
(u'Las 12', u'https://www.pagina12.com.ar/rss/suplementos/las12/notas'),
|
||||||
(u'Soy' , u'https://www.pagina12.com.ar/rss/suplementos/soy/notas'),
|
(u'Soy', u'https://www.pagina12.com.ar/rss/suplementos/soy/notas'),
|
||||||
(u'M2' , u'https://www.pagina12.com.ar/rss/suplementos/m2/notas'),
|
(u'M2', u'https://www.pagina12.com.ar/rss/suplementos/m2/notas'),
|
||||||
(u'Rosario 12' , u'https://www.pagina12.com.ar/rss/suplementos/rosario12/notas')
|
(u'Rosario 12', u'https://www.pagina12.com.ar/rss/suplementos/rosario12/notas')
|
||||||
]
|
]
|
||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
lurl = strftime('https://www.pagina12.com.ar/edicion-impresa/%d-%m-%Y')
|
lurl = strftime('https://www.pagina12.com.ar/edicion-impresa/%d-%m-%Y')
|
||||||
soup = self.index_to_soup(lurl)
|
soup = self.index_to_soup(lurl)
|
||||||
mydiv = soup.find('div', {'class' : lambda x: x and 'printed-edition-cover' in x.split()})
|
mydiv = soup.find('div', {'class': lambda x: x and 'printed-edition-cover' in x.split()})
|
||||||
if mydiv:
|
if mydiv:
|
||||||
for image in mydiv.findAll('img'):
|
for image in mydiv.findAll('img'):
|
||||||
if image['src'].startswith('https://images.pagina12.com.ar/styles/width700/public/'):
|
if image['src'].startswith('https://images.pagina12.com.ar/styles/width700/public/'):
|
||||||
|
@ -56,8 +56,7 @@ class RealClear(BasicNewsRecipe):
|
|||||||
printhints = [['realclear', '', '', 'printpage'],
|
printhints = [['realclear', '', '', 'printpage'],
|
||||||
['billoreilly.com', 'Print this entry', 'a', ''],
|
['billoreilly.com', 'Print this entry', 'a', ''],
|
||||||
['billoreilly.com', 'Print This Article', 'a', ''],
|
['billoreilly.com', 'Print This Article', 'a', ''],
|
||||||
['politico.com', 'Print',
|
['politico.com', 'Print', 'a', 'share-print'],
|
||||||
'a', 'share-print'],
|
|
||||||
['nationalreview.com', '>Print<', 'a', ''],
|
['nationalreview.com', '>Print<', 'a', ''],
|
||||||
['reason.com', '', 'a', 'printer']
|
['reason.com', '', 'a', 'printer']
|
||||||
# The following are not supported due to JavaScripting, and would require obfuscated_article to handle
|
# The following are not supported due to JavaScripting, and would require obfuscated_article to handle
|
||||||
|
@ -171,4 +171,4 @@ class respektRecipe(BasicNewsRecipe):
|
|||||||
o.getparent().replace(o,e)
|
o.getparent().replace(o,e)
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
return (BeautifulSoup(lxml.etree.tostring(root,encoding='unicode')))
|
return BeautifulSoup(lxml.etree.tostring(root,encoding='unicode'))
|
||||||
|
@ -54,7 +54,7 @@ def load_article_from_json(raw, root):
|
|||||||
for child in tuple(body):
|
for child in tuple(body):
|
||||||
body.remove(child)
|
body.remove(child)
|
||||||
article = E(body, 'article')
|
article = E(body, 'article')
|
||||||
E(article, 'div', replace_entities(data['firstTopic']['name']) , style='color: gray; font-size:small; font-weight:bold;')
|
E(article, 'div', replace_entities(data['firstTopic']['name']), style='color: gray; font-size:small; font-weight:bold;')
|
||||||
E(article, 'h1', replace_entities(data['headline']))
|
E(article, 'h1', replace_entities(data['headline']))
|
||||||
# E(article, 'p', replace_entities(data['subHeadline']['text']), style='font-style: italic; color:#202020;')
|
# E(article, 'p', replace_entities(data['subHeadline']['text']), style='font-style: italic; color:#202020;')
|
||||||
for subh in data['subHeadline']['json']:
|
for subh in data['subHeadline']['json']:
|
||||||
|
@ -39,16 +39,16 @@ class StraitsTimes(BasicNewsRecipe):
|
|||||||
]
|
]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'World' , u'https://www.straitstimes.com/news/world/rss.xml')
|
(u'World', u'https://www.straitstimes.com/news/world/rss.xml'),
|
||||||
(u'Business' , u'https://www.straitstimes.com/news/business/rss.xml'),
|
(u'Business', u'https://www.straitstimes.com/news/business/rss.xml'),
|
||||||
(u'Life' , u'https://www.straitstimes.com/news/life/rss.xml'),
|
(u'Life', u'https://www.straitstimes.com/news/life/rss.xml'),
|
||||||
(u'Tech' , u'https://www.straitstimes.com/news/tech/rss.xml'),
|
(u'Tech', u'https://www.straitstimes.com/news/tech/rss.xml'),
|
||||||
(u'Opinion' , u'https://www.straitstimes.com/news/opinion/rss.xml'),
|
(u'Opinion', u'https://www.straitstimes.com/news/opinion/rss.xml'),
|
||||||
(u'Life' , u'https://www.straitstimes.com/news/life/rss.xml'),
|
(u'Life', u'https://www.straitstimes.com/news/life/rss.xml'),
|
||||||
(u'Singapore' , u'https://www.straitstimes.com/news/singapore/rss.xml'),
|
(u'Singapore', u'https://www.straitstimes.com/news/singapore/rss.xml'),
|
||||||
(u'Asia' , u'https://www.straitstimes.com/news/asia/rss.xml'),
|
(u'Asia', u'https://www.straitstimes.com/news/asia/rss.xml'),
|
||||||
(u'Multimedia' , u'https://www.straitstimes.com/news/multimedia/rss.xml'),
|
(u'Multimedia', u'https://www.straitstimes.com/news/multimedia/rss.xml'),
|
||||||
(u'Sport' , u'https://www.straitstimes.com/news/sport/rss.xml'),
|
(u'Sport', u'https://www.straitstimes.com/news/sport/rss.xml'),
|
||||||
]
|
]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
|
@ -52,72 +52,79 @@ class PhilippineDailyInquirer(BasicNewsRecipe):
|
|||||||
feeds = [
|
feeds = [
|
||||||
|
|
||||||
('Headlines', 'http://newsinfo.inquirer.net/category/inquirer-headlines/feed'),
|
('Headlines', 'http://newsinfo.inquirer.net/category/inquirer-headlines/feed'),
|
||||||
('Latest Stories' , 'http://newsinfo.inquirer.net/category/latest-stories/feed'),
|
('Latest Stories', 'http://newsinfo.inquirer.net/category/latest-stories/feed'),
|
||||||
('Nation' , 'http://newsinfo.inquirer.net/category/nation/feed'),
|
('Nation', 'http://newsinfo.inquirer.net/category/nation/feed'),
|
||||||
('Nation - Latest Stories' , 'http://newsinfo.inquirer.net/category/latest-stories/nation-latest-stories/feed'),
|
('Nation - Latest Stories', 'http://newsinfo.inquirer.net/category/latest-stories/nation-latest-stories/feed'),
|
||||||
('Metro' , 'http://newsinfo.inquirer.net/category/metro/feed'),
|
('Metro', 'http://newsinfo.inquirer.net/category/metro/feed'),
|
||||||
('Metro - Latest Stories' , 'http://newsinfo.inquirer.net/category/latest-stories/metro-latest-stories/feed'),
|
('Metro - Latest Stories', 'http://newsinfo.inquirer.net/category/latest-stories/metro-latest-stories/feed'),
|
||||||
('Regions' , 'http://newsinfo.inquirer.net/category/regions/feed'),
|
('Regions', 'http://newsinfo.inquirer.net/category/regions/feed'),
|
||||||
('Regions - Latest Stories' , 'http://newsinfo.inquirer.net/category/latest-stories/regions-latest-stories/feed'),
|
('Regions - Latest Stories', 'http://newsinfo.inquirer.net/category/latest-stories/regions-latest-stories/feed'),
|
||||||
('News' , 'http://www.inquirer.net/fullfeed'),
|
('News', 'http://www.inquirer.net/fullfeed'),
|
||||||
('More News' , 'http://newsinfo.inquirer.net/feed')
|
('More News', 'http://newsinfo.inquirer.net/feed'),
|
||||||
,
|
|
||||||
('Global Nation' , 'http://globalnation.inquirer.net/feed'),
|
('Global Nation', 'http://globalnation.inquirer.net/feed'),
|
||||||
('Global Nation - Latest Stories', 'http://globalnation.inquirer.net/category/latest-stories/feed'),
|
('Global Nation - Latest Stories', 'http://globalnation.inquirer.net/category/latest-stories/feed'),
|
||||||
('Global Nation - Philippines', 'http://globalnation.inquirer.net/category/news/philippines/feed'),
|
('Global Nation - Philippines', 'http://globalnation.inquirer.net/category/news/philippines/feed'),
|
||||||
('Global Nation - Asia & Pacific', 'http://globalnation.inquirer.net/category/news/asiaaustralia/feed'),
|
('Global Nation - Asia & Pacific', 'http://globalnation.inquirer.net/category/news/asiaaustralia/feed'),
|
||||||
('Global Nation - Americas', 'http://globalnation.inquirer.net/category/news/uscanada/feed'),
|
('Global Nation - Americas', 'http://globalnation.inquirer.net/category/news/uscanada/feed'),
|
||||||
('Global Nation - Middle East & Africa', 'http://globalnation.inquirer.net/category/news/middle-eastafrica/feed'),
|
('Global Nation - Middle East & Africa', 'http://globalnation.inquirer.net/category/news/middle-eastafrica/feed'),
|
||||||
('Global Nation - Europe' , 'http://globalnation.inquirer.net/category/news/europe/feed'),
|
('Global Nation - Europe', 'http://globalnation.inquirer.net/category/news/europe/feed'),
|
||||||
('Global Nation - Global Pinoy', 'http://globalnation.inquirer.net/category/global-pinoy/feed'),
|
('Global Nation - Global Pinoy', 'http://globalnation.inquirer.net/category/global-pinoy/feed'),
|
||||||
('Global Nation - Events' , 'http://globalnation.inquirer.net/category/events/feed'),
|
('Global Nation - Events', 'http://globalnation.inquirer.net/category/events/feed'),
|
||||||
('Business' , 'http://business.inquirer.net/feed'),
|
|
||||||
('Business - Latest Stories' , 'http://business.inquirer.net/category/latest-stories/feed'),
|
('Business', 'http://business.inquirer.net/feed'),
|
||||||
('Business - Money' , 'http://business.inquirer.net/category/money/feed'),
|
('Business - Latest Stories', 'http://business.inquirer.net/category/latest-stories/feed'),
|
||||||
|
('Business - Money', 'http://business.inquirer.net/category/money/feed'),
|
||||||
('Business - Science & Health', 'http://business.inquirer.net/category/science-and-health/feed'),
|
('Business - Science & Health', 'http://business.inquirer.net/category/science-and-health/feed'),
|
||||||
('Business - Motoring' , 'http://business.inquirer.net/category/motoring/feed'),
|
('Business - Motoring', 'http://business.inquirer.net/category/motoring/feed'),
|
||||||
('Business - Property Guide' , 'http://business.inquirer.net/category/property-guide/feed'),
|
('Business - Property Guide', 'http://business.inquirer.net/category/property-guide/feed'),
|
||||||
('Business - Columnists' , 'http://business.inquirer.net/category/columnists/feed'),
|
('Business - Columnists', 'http://business.inquirer.net/category/columnists/feed'),
|
||||||
('Sports' , 'http://sports.inquirer.net/feed'),
|
|
||||||
('Sports - Latest Stories' , 'http://sports.inquirer.net/category/latest-stories/feed'),
|
('Sports', 'http://sports.inquirer.net/feed'),
|
||||||
('Sports - Basketball' , 'http://sports.inquirer.net/category/section/basketball/feed'),
|
('Sports - Latest Stories', 'http://sports.inquirer.net/category/latest-stories/feed'),
|
||||||
|
('Sports - Basketball', 'http://sports.inquirer.net/category/section/basketball/feed'),
|
||||||
('Sports - Boxing & MMA', 'http://sports.inquirer.net/category/section/boxing-mma/feed'),
|
('Sports - Boxing & MMA', 'http://sports.inquirer.net/category/section/boxing-mma/feed'),
|
||||||
('Sports - Golf' , 'http://sports.inquirer.net/category/section/golf/feed'),
|
('Sports - Golf', 'http://sports.inquirer.net/category/section/golf/feed'),
|
||||||
('Sports - Football' , 'http://sports.inquirer.net/category/section/other-sports/football/feed'),
|
('Sports - Football', 'http://sports.inquirer.net/category/section/other-sports/football/feed'),
|
||||||
('Sports - Other Sports' , 'http://sports.inquirer.net/category/section/other-sports/feed'),
|
('Sports - Other Sports', 'http://sports.inquirer.net/category/section/other-sports/feed'),
|
||||||
('Technology' , 'http://technology.inquirer.net/feed'),
|
|
||||||
|
('Technology', 'http://technology.inquirer.net/feed'),
|
||||||
('Technology Latest Stories', 'http://technology.inquirer.net/category/latest-stories/feed'),
|
('Technology Latest Stories', 'http://technology.inquirer.net/category/latest-stories/feed'),
|
||||||
('Entertainment' , 'http://entertainment.inquirer.net/feed'),
|
|
||||||
|
('Entertainment', 'http://entertainment.inquirer.net/feed'),
|
||||||
('Entertainment - Headlines', 'http://entertainment.inquirer.net/category/headlines/feed'),
|
('Entertainment - Headlines', 'http://entertainment.inquirer.net/category/headlines/feed'),
|
||||||
('Entertainment - Latest Stories', 'http://entertainment.inquirer.net/category/latest-stories/feed'),
|
('Entertainment - Latest Stories', 'http://entertainment.inquirer.net/category/latest-stories/feed'),
|
||||||
('Entertainment - Movies' , 'http://movies.inquirer.net/feed'),
|
('Entertainment - Movies', 'http://movies.inquirer.net/feed'),
|
||||||
('Lifestyle' , 'http://lifestyle.inquirer.net/feed'),
|
|
||||||
|
('Lifestyle', 'http://lifestyle.inquirer.net/feed'),
|
||||||
('Lifestyle - Latest Stories', 'http://lifestyle.inquirer.net/category/latest-stories/feed'),
|
('Lifestyle - Latest Stories', 'http://lifestyle.inquirer.net/category/latest-stories/feed'),
|
||||||
('Lifestyle - Arts & Books' , 'http://lifestyle.inquirer.net/category/arts-and-books/feed'),
|
('Lifestyle - Arts & Books', 'http://lifestyle.inquirer.net/category/arts-and-books/feed'),
|
||||||
('Lifestyle - Wellness' , 'http://lifestyle.inquirer.net/category/wellness/feed'),
|
('Lifestyle - Wellness', 'http://lifestyle.inquirer.net/category/wellness/feed'),
|
||||||
('Lifestyle - Home & Entertaining', 'http://lifestyle.inquirer.net/category/home-and-entertaining/feed'),
|
('Lifestyle - Home & Entertaining', 'http://lifestyle.inquirer.net/category/home-and-entertaining/feed'),
|
||||||
('Lifestyle - Parenting' , 'http://lifestyle.inquirer.net/category/parenting/feed'),
|
('Lifestyle - Parenting', 'http://lifestyle.inquirer.net/category/parenting/feed'),
|
||||||
('Lifestyle - Food' , 'http://lifestyle.inquirer.net/category/food/feed'),
|
('Lifestyle - Food', 'http://lifestyle.inquirer.net/category/food/feed'),
|
||||||
('Lifestyle - Fashion & Beauty', 'http://lifestyle.inquirer.net/category/fashion-and-beauty/feed'),
|
('Lifestyle - Fashion & Beauty', 'http://lifestyle.inquirer.net/category/fashion-and-beauty/feed'),
|
||||||
('Lifestyle - Super' , 'http://lifestyle.inquirer.net/category/super/feed'),
|
('Lifestyle - Super', 'http://lifestyle.inquirer.net/category/super/feed'),
|
||||||
('Lifestyle - 2BU' , 'http://lifestyle.inquirer.net/category/2bu/feed'),
|
('Lifestyle - 2BU', 'http://lifestyle.inquirer.net/category/2bu/feed'),
|
||||||
('Lifestyle - Sunday Lifestyle', 'http://lifestyle.inquirer.net/category/sunday-lifestyle/feed'),
|
('Lifestyle - Sunday Lifestyle', 'http://lifestyle.inquirer.net/category/sunday-lifestyle/feed'),
|
||||||
('Lifestyle - Wedding' , 'http://lifestyle.inquirer.net/category/sunday-lifestyle/wedding/feed'),
|
('Lifestyle - Wedding', 'http://lifestyle.inquirer.net/category/sunday-lifestyle/wedding/feed'),
|
||||||
('Lifestyle - Travel' , 'http://lifestyle.inquirer.net/category/sunday-lifestyle/travel/feed'),
|
('Lifestyle - Travel', 'http://lifestyle.inquirer.net/category/sunday-lifestyle/travel/feed'),
|
||||||
('Lifestyle - Relationship' , 'http://lifestyle.inquirer.net/category/sunday-lifestyle/relationship/feed'),
|
('Lifestyle - Relationship', 'http://lifestyle.inquirer.net/category/sunday-lifestyle/relationship/feed'),
|
||||||
('Opinion' , 'http://opinion.inquirer.net/feed'),
|
|
||||||
('Opinion - Viewpoints' , 'http://opinion.inquirer.net/category/viewpoints/feed'),
|
('Opinion', 'http://opinion.inquirer.net/feed'),
|
||||||
|
('Opinion - Viewpoints', 'http://opinion.inquirer.net/category/viewpoints/feed'),
|
||||||
('Opinion - Talk of the Town', 'http://opinion.inquirer.net/category/inquirer-opinion/talk-of-the-town/feed'),
|
('Opinion - Talk of the Town', 'http://opinion.inquirer.net/category/inquirer-opinion/talk-of-the-town/feed'),
|
||||||
('Editorial' , 'http://opinion.inquirer.net/category/editorial/feed'),
|
('Editorial', 'http://opinion.inquirer.net/category/editorial/feed'),
|
||||||
('Letters to the Editor' , 'http://opinion.inquirer.net/category/letters-to-the-editor/feed'),
|
('Letters to the Editor', 'http://opinion.inquirer.net/category/letters-to-the-editor/feed'),
|
||||||
('Columns' , 'http://opinion.inquirer.net/category/columns/feed'),
|
('Columns', 'http://opinion.inquirer.net/category/columns/feed'),
|
||||||
('Citizens Journalism' , 'http://newsinfo.inquirer.net/category/citizens-journalism/feed'),
|
|
||||||
('Cebu - Daily News' , 'http://newsinfo.inquirer.net/category/cdn/feed'),
|
('Citizens Journalism', 'http://newsinfo.inquirer.net/category/citizens-journalism/feed'),
|
||||||
('Cebu - More News' , 'http://newsinfo.inquirer.net/category/cdn/cdn-news/feed'),
|
('Cebu - Daily News', 'http://newsinfo.inquirer.net/category/cdn/feed'),
|
||||||
('Cebu - Community' , 'http://newsinfo.inquirer.net/category/cdn/cdn-community/feed'),
|
('Cebu - More News', 'http://newsinfo.inquirer.net/category/cdn/cdn-news/feed'),
|
||||||
('Cebu - Metro' , 'http://newsinfo.inquirer.net/category/cdn/cdn-metro/feed'),
|
('Cebu - Community', 'http://newsinfo.inquirer.net/category/cdn/cdn-community/feed'),
|
||||||
('Cebu - Business' , 'http://newsinfo.inquirer.net/category/cdn/cdn-enterprise/feed'),
|
('Cebu - Metro', 'http://newsinfo.inquirer.net/category/cdn/cdn-metro/feed'),
|
||||||
('Cebu - Sports' , 'http://newsinfo.inquirer.net/category/cdn/cdn-sports/feed'),
|
('Cebu - Business', 'http://newsinfo.inquirer.net/category/cdn/cdn-enterprise/feed'),
|
||||||
('Cebu - Visayas' , 'http://newsinfo.inquirer.net/category/cdn/cdn-visayas/feed'),
|
('Cebu - Sports', 'http://newsinfo.inquirer.net/category/cdn/cdn-sports/feed'),
|
||||||
('Cebu - Opinion' , 'http://newsinfo.inquirer.net/category/cdn/cdn-opinion/feed')
|
('Cebu - Visayas', 'http://newsinfo.inquirer.net/category/cdn/cdn-visayas/feed'),
|
||||||
|
('Cebu - Opinion', 'http://newsinfo.inquirer.net/category/cdn/cdn-opinion/feed'),
|
||||||
]
|
]
|
||||||
|
@ -227,7 +227,7 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
divtags = soup.findAll('div', attrs={'id': ''})
|
divtags = soup.findAll('div', attrs={'id': ''})
|
||||||
if divtags:
|
if divtags:
|
||||||
for div in divtags:
|
for div in divtags:
|
||||||
del (div['id'])
|
del div['id']
|
||||||
|
|
||||||
pgall = soup.find('div', attrs={'id': 'storyphoto'})
|
pgall = soup.find('div', attrs={'id': 'storyphoto'})
|
||||||
if pgall is not None: # photo gallery perhaps
|
if pgall is not None: # photo gallery perhaps
|
||||||
|
@ -215,7 +215,7 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
divtags = soup.findAll('div', attrs={'id': ''})
|
divtags = soup.findAll('div', attrs={'id': ''})
|
||||||
if divtags:
|
if divtags:
|
||||||
for div in divtags:
|
for div in divtags:
|
||||||
del (div['id'])
|
del div['id']
|
||||||
|
|
||||||
pgall = soup.find('div', attrs={'id': 'storyphoto'})
|
pgall = soup.find('div', attrs={'id': 'storyphoto'})
|
||||||
if pgall is not None: # photo gallery perhaps
|
if pgall is not None: # photo gallery perhaps
|
||||||
|
@ -38,7 +38,7 @@ class version2(BasicNewsRecipe):
|
|||||||
dict(name='span', attrs={'class': 'article-link-id'}),
|
dict(name='span', attrs={'class': 'article-link-id'}),
|
||||||
dict(name='section', attrs={'class': 'social-tools-pane'}),
|
dict(name='section', attrs={'class': 'social-tools-pane'}),
|
||||||
dict(name='section', attrs={'class': 'article-timeline'}),
|
dict(name='section', attrs={'class': 'article-timeline'}),
|
||||||
dict(name='div', attrs={'id' : 'mini-panel-comments_and_form'}),
|
dict(name='div', attrs={'id': 'mini-panel-comments_and_form'}),
|
||||||
dict(name='div', attrs={'class': 'related-articles top-three'}),
|
dict(name='div', attrs={'class': 'related-articles top-three'}),
|
||||||
dict(name='div', attrs={'id': 'mini-panel-jobfinder_1'}),
|
dict(name='div', attrs={'id': 'mini-panel-jobfinder_1'}),
|
||||||
dict(name='section', attrs={'id': 'mini-panel-frontpage_debat_zone'}),
|
dict(name='section', attrs={'id': 'mini-panel-frontpage_debat_zone'}),
|
||||||
@ -53,7 +53,7 @@ class version2(BasicNewsRecipe):
|
|||||||
dict(name='section', attrs={'class': 'jobs-list'}),
|
dict(name='section', attrs={'class': 'jobs-list'}),
|
||||||
dict(name='footer', attrs={'id': 'footer'}),
|
dict(name='footer', attrs={'id': 'footer'}),
|
||||||
dict(name='section', attrs={'class': 'banner'}),
|
dict(name='section', attrs={'class': 'banner'}),
|
||||||
dict(name='div', attrs={'class' : 'fast-track-frontpage'}),
|
dict(name='div', attrs={'class': 'fast-track-frontpage'}),
|
||||||
dict(name='a', attrs={'class': 'byline-comments'})
|
dict(name='a', attrs={'class': 'byline-comments'})
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -101,7 +101,7 @@ class weblogssl(BasicNewsRecipe):
|
|||||||
dict(name='div', attrs={'id':'comments'})
|
dict(name='div', attrs={'id':'comments'})
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags_after = dict(name='div' , attrs={'id':'comments'})
|
remove_tags_after = dict(name='div', attrs={'id':'comments'})
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
if url.startswith('http://www'):
|
if url.startswith('http://www'):
|
||||||
|
@ -57,7 +57,7 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
divtags = soup.findAll('div', attrs={'id': ''})
|
divtags = soup.findAll('div', attrs={'id': ''})
|
||||||
if divtags:
|
if divtags:
|
||||||
for div in divtags:
|
for div in divtags:
|
||||||
del (div['id'])
|
del div['id']
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
|
@ -57,7 +57,7 @@ class WiredDailyNews(BasicNewsRecipe):
|
|||||||
self.log('Parsing index page', currenturl)
|
self.log('Parsing index page', currenturl)
|
||||||
soup = self.index_to_soup(currenturl)
|
soup = self.index_to_soup(currenturl)
|
||||||
baseurl = 'https://www.wired.com'
|
baseurl = 'https://www.wired.com'
|
||||||
for a in soup.find('div', {'class' : 'multi-packages'}).findAll('a', href=True):
|
for a in soup.find('div', {'class': 'multi-packages'}).findAll('a', href=True):
|
||||||
url = a['href']
|
url = a['href']
|
||||||
if url.startswith('/story') and url.endswith('/'):
|
if url.startswith('/story') and url.endswith('/'):
|
||||||
title = self.tag_to_string(a.parent.find('h3'))
|
title = self.tag_to_string(a.parent.find('h3'))
|
||||||
|
@ -115,7 +115,7 @@ class ZeitDe(BasicNewsRecipe):
|
|||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
# If there is a complete page, use that one
|
# If there is a complete page, use that one
|
||||||
req = Request(url=url+'/komplettansicht')
|
req = Request(url=url+'/komplettansicht')
|
||||||
req.get_method = lambda : 'HEAD'
|
req.get_method = lambda: 'HEAD'
|
||||||
try:
|
try:
|
||||||
urlopen(req)
|
urlopen(req)
|
||||||
return url + '/komplettansicht'
|
return url + '/komplettansicht'
|
||||||
|
@ -326,7 +326,7 @@ def parse_plugin(raw, names, zf):
|
|||||||
|
|
||||||
def get_plugin_init(zf):
|
def get_plugin_init(zf):
|
||||||
metadata = None
|
metadata = None
|
||||||
names = {x.decode('utf-8') if isinstance(x, bytes) else x : x for x in zf.namelist()}
|
names = {x.decode('utf-8') if isinstance(x, bytes) else x: x for x in zf.namelist()}
|
||||||
inits = [x for x in names if x.rpartition('/')[-1] == '__init__.py']
|
inits = [x for x in names if x.rpartition('/')[-1] == '__init__.py']
|
||||||
inits.sort(key=lambda x:x.count('/'))
|
inits.sort(key=lambda x:x.count('/'))
|
||||||
if inits and inits[0] == '__init__.py':
|
if inits and inits[0] == '__init__.py':
|
||||||
@ -378,7 +378,7 @@ def fetch_plugin(old_index, entry):
|
|||||||
# Previously downloaded plugin
|
# Previously downloaded plugin
|
||||||
lm = datetime(*tuple(map(int, re.split(r'\D', plugin['last_modified'])))[:6])
|
lm = datetime(*tuple(map(int, re.split(r'\D', plugin['last_modified'])))[:6])
|
||||||
request = Request(url)
|
request = Request(url)
|
||||||
request.get_method = lambda : 'HEAD'
|
request.get_method = lambda: 'HEAD'
|
||||||
with closing(urlopen(request)) as response:
|
with closing(urlopen(request)) as response:
|
||||||
info = response.info()
|
info = response.info()
|
||||||
slm = datetime(*parsedate(info.get('Last-Modified'))[:6])
|
slm = datetime(*parsedate(info.get('Last-Modified'))[:6])
|
||||||
|
@ -566,7 +566,7 @@ class Translations(POT): # {{{
|
|||||||
srcbase = self.j(self.d(self.SRC), 'translations', 'manual')
|
srcbase = self.j(self.d(self.SRC), 'translations', 'manual')
|
||||||
destbase = self.j(self.d(self.SRC), 'manual', 'locale')
|
destbase = self.j(self.d(self.SRC), 'manual', 'locale')
|
||||||
complete = {}
|
complete = {}
|
||||||
all_stats = defaultdict(lambda : {'translated': 0, 'untranslated': 0})
|
all_stats = defaultdict(lambda: {'translated': 0, 'untranslated': 0})
|
||||||
files = []
|
files = []
|
||||||
for x in os.listdir(srcbase):
|
for x in os.listdir(srcbase):
|
||||||
q = self.j(srcbase, x)
|
q = self.j(srcbase, x)
|
||||||
|
@ -266,10 +266,10 @@ def get_parsed_proxy(typ='http', debug=True):
|
|||||||
if match:
|
if match:
|
||||||
try:
|
try:
|
||||||
ans = {
|
ans = {
|
||||||
'host' : match.group('host'),
|
'host': match.group('host'),
|
||||||
'port' : match.group('port'),
|
'port': match.group('port'),
|
||||||
'user' : match.group('user'),
|
'user': match.group('user'),
|
||||||
'pass' : match.group('pass')
|
'pass': match.group('pass')
|
||||||
}
|
}
|
||||||
if ans['port']:
|
if ans['port']:
|
||||||
ans['port'] = int(ans['port'])
|
ans['port'] = int(ans['port'])
|
||||||
@ -452,11 +452,11 @@ def my_unichr(num):
|
|||||||
|
|
||||||
|
|
||||||
XML_ENTITIES = {
|
XML_ENTITIES = {
|
||||||
'"' : '"',
|
'"': '"',
|
||||||
"'" : ''',
|
"'": ''',
|
||||||
'<' : '<',
|
'<': '<',
|
||||||
'>' : '>',
|
'>': '>',
|
||||||
'&' : '&'
|
'&': '&'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -1171,7 +1171,7 @@ class ActionPluginUpdater(InterfaceActionBase):
|
|||||||
plugins += [ActionAdd, ActionAllActions, ActionFetchAnnotations, ActionGenerateCatalog,
|
plugins += [ActionAdd, ActionAllActions, ActionFetchAnnotations, ActionGenerateCatalog,
|
||||||
ActionConvert, ActionDelete, ActionEditMetadata, ActionView,
|
ActionConvert, ActionDelete, ActionEditMetadata, ActionView,
|
||||||
ActionFetchNews, ActionSaveToDisk, ActionQuickview, ActionPolish,
|
ActionFetchNews, ActionSaveToDisk, ActionQuickview, ActionPolish,
|
||||||
ActionShowBookDetails,ActionRestart, ActionOpenFolder, ActionConnectShare,
|
ActionShowBookDetails, ActionRestart, ActionOpenFolder, ActionConnectShare,
|
||||||
ActionSendToDevice, ActionHelp, ActionPreferences, ActionSimilarBooks,
|
ActionSendToDevice, ActionHelp, ActionPreferences, ActionSimilarBooks,
|
||||||
ActionAddToLibrary, ActionEditCollections, ActionMatchBooks, ActionShowMatchedBooks, ActionChooseLibrary,
|
ActionAddToLibrary, ActionEditCollections, ActionMatchBooks, ActionShowMatchedBooks, ActionChooseLibrary,
|
||||||
ActionCopyToLibrary, ActionTweakEpub, ActionUnpackBook, ActionNextMatch, ActionStore,
|
ActionCopyToLibrary, ActionTweakEpub, ActionUnpackBook, ActionNextMatch, ActionStore,
|
||||||
@ -1605,7 +1605,7 @@ class StoreBNStore(StoreBase):
|
|||||||
class StoreBeamEBooksDEStore(StoreBase):
|
class StoreBeamEBooksDEStore(StoreBase):
|
||||||
name = 'Beam EBooks DE'
|
name = 'Beam EBooks DE'
|
||||||
author = 'Charles Haley'
|
author = 'Charles Haley'
|
||||||
description = 'Bei uns finden Sie: Tausende deutschsprachige e-books; Alle e-books ohne hartes DRM; PDF, ePub und Mobipocket Format; Sofortige Verfügbarkeit - 24 Stunden am Tag; Günstige Preise; e-books für viele Lesegeräte, PC,Mac und Smartphones; Viele Gratis e-books' # noqa: E501
|
description = 'Bei uns finden Sie: Tausende deutschsprachige e-books; Alle e-books ohne hartes DRM; PDF, ePub und Mobipocket Format; Sofortige Verfügbarkeit - 24 Stunden am Tag; Günstige Preise; e-books für viele Lesegeräte, PC, Mac und Smartphones; Viele Gratis e-books' # noqa: E501
|
||||||
actual_plugin = 'calibre.gui2.store.stores.beam_ebooks_de_plugin:BeamEBooksDEStore'
|
actual_plugin = 'calibre.gui2.store.stores.beam_ebooks_de_plugin:BeamEBooksDEStore'
|
||||||
|
|
||||||
drm_free_only = True
|
drm_free_only = True
|
||||||
|
@ -97,7 +97,7 @@ class Field:
|
|||||||
self._default_sort_key = ''
|
self._default_sort_key = ''
|
||||||
|
|
||||||
if self.name == 'languages':
|
if self.name == 'languages':
|
||||||
self._sort_key = lambda x:sort_key(calibre_langcode_to_name(x))
|
self._sort_key = lambda x: sort_key(calibre_langcode_to_name(x))
|
||||||
self.is_multiple = (bool(self.metadata['is_multiple']) or self.name ==
|
self.is_multiple = (bool(self.metadata['is_multiple']) or self.name ==
|
||||||
'formats')
|
'formats')
|
||||||
self.sort_sort_key = True
|
self.sort_sort_key = True
|
||||||
@ -224,8 +224,8 @@ class OneToOneField(Field):
|
|||||||
ans = dk
|
ans = dk
|
||||||
return ans
|
return ans
|
||||||
return none_safe_key
|
return none_safe_key
|
||||||
return lambda book_id:bcmg(book_id, dk)
|
return lambda book_id: bcmg(book_id, dk)
|
||||||
return lambda book_id:sk(bcmg(book_id, dk))
|
return lambda book_id: sk(bcmg(book_id, dk))
|
||||||
|
|
||||||
def iter_searchable_values(self, get_metadata, candidates, default_value=None):
|
def iter_searchable_values(self, get_metadata, candidates, default_value=None):
|
||||||
cbm = self.table.book_col_map
|
cbm = self.table.book_col_map
|
||||||
@ -339,8 +339,8 @@ class CompositeField(OneToOneField):
|
|||||||
gv = self.get_value_with_cache
|
gv = self.get_value_with_cache
|
||||||
sk = self._sort_key
|
sk = self._sort_key
|
||||||
if sk is IDENTITY:
|
if sk is IDENTITY:
|
||||||
return lambda book_id:gv(book_id, get_metadata)
|
return lambda book_id: gv(book_id, get_metadata)
|
||||||
return lambda book_id:sk(gv(book_id, get_metadata))
|
return lambda book_id: sk(gv(book_id, get_metadata))
|
||||||
|
|
||||||
def iter_searchable_values(self, get_metadata, candidates, default_value=None):
|
def iter_searchable_values(self, get_metadata, candidates, default_value=None):
|
||||||
val_map = defaultdict(set)
|
val_map = defaultdict(set)
|
||||||
@ -517,7 +517,7 @@ class ManyToOneField(Field):
|
|||||||
def sort_keys_for_books(self, get_metadata, lang_map):
|
def sort_keys_for_books(self, get_metadata, lang_map):
|
||||||
sk_map = LazySortMap(self._default_sort_key, self._sort_key, self.table.id_map)
|
sk_map = LazySortMap(self._default_sort_key, self._sort_key, self.table.id_map)
|
||||||
bcmg = self.table.book_col_map.get
|
bcmg = self.table.book_col_map.get
|
||||||
return lambda book_id:sk_map(bcmg(book_id, None))
|
return lambda book_id: sk_map(bcmg(book_id, None))
|
||||||
|
|
||||||
def iter_searchable_values(self, get_metadata, candidates, default_value=None):
|
def iter_searchable_values(self, get_metadata, candidates, default_value=None):
|
||||||
cbm = self.table.col_book_map
|
cbm = self.table.col_book_map
|
||||||
|
@ -514,7 +514,7 @@ class LibraryDatabase:
|
|||||||
with self.new_api.safe_read_lock:
|
with self.new_api.safe_read_lock:
|
||||||
book_ids = self.new_api._books_for_field('series', series_id)
|
book_ids = self.new_api._books_for_field('series', series_id)
|
||||||
ff = self.new_api._field_for
|
ff = self.new_api._field_for
|
||||||
return sorted(book_ids, key=lambda x:ff('series_index', x))
|
return sorted(book_ids, key=lambda x: ff('series_index', x))
|
||||||
|
|
||||||
def books_in_series_of(self, index, index_is_id=False):
|
def books_in_series_of(self, index, index_is_id=False):
|
||||||
book_id = index if index_is_id else self.id(index)
|
book_id = index if index_is_id else self.id(index)
|
||||||
@ -892,10 +892,10 @@ LibraryDatabase.get_tags = lambda self, book_id:set(self.new_api.field_for('tags
|
|||||||
LibraryDatabase.get_categories = lambda self, sort='name', ids=None:self.new_api.get_categories(sort=sort, book_ids=ids)
|
LibraryDatabase.get_categories = lambda self, sort='name', ids=None:self.new_api.get_categories(sort=sort, book_ids=ids)
|
||||||
LibraryDatabase.get_identifiers = lambda self, index, index_is_id=False: self.new_api.field_for('identifiers', index if index_is_id else self.id(index))
|
LibraryDatabase.get_identifiers = lambda self, index, index_is_id=False: self.new_api.field_for('identifiers', index if index_is_id else self.id(index))
|
||||||
LibraryDatabase.isbn = lambda self, index, index_is_id=False: self.get_identifiers(index, index_is_id=index_is_id).get('isbn', None)
|
LibraryDatabase.isbn = lambda self, index, index_is_id=False: self.get_identifiers(index, index_is_id=index_is_id).get('isbn', None)
|
||||||
LibraryDatabase.get_books_for_category = lambda self, category, id_:self.new_api.get_books_for_category(category, id_)
|
LibraryDatabase.get_books_for_category = lambda self, category, id_: self.new_api.get_books_for_category(category, id_)
|
||||||
LibraryDatabase.get_data_as_dict = get_data_as_dict
|
LibraryDatabase.get_data_as_dict = get_data_as_dict
|
||||||
LibraryDatabase.find_identical_books = lambda self, mi:self.new_api.find_identical_books(mi)
|
LibraryDatabase.find_identical_books = lambda self, mi: self.new_api.find_identical_books(mi)
|
||||||
LibraryDatabase.get_top_level_move_items = lambda self:self.new_api.get_top_level_move_items()
|
LibraryDatabase.get_top_level_move_items = lambda self: self.new_api.get_top_level_move_items()
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
# Legacy setter API {{{
|
# Legacy setter API {{{
|
||||||
@ -955,7 +955,7 @@ for field in ('authors', 'tags', 'publisher', 'series'):
|
|||||||
return func
|
return func
|
||||||
name = field[:-1] if field in {'authors', 'tags'} else field
|
name = field[:-1] if field in {'authors', 'tags'} else field
|
||||||
setattr(LibraryDatabase, 'all_%s_names' % name, getter(field))
|
setattr(LibraryDatabase, 'all_%s_names' % name, getter(field))
|
||||||
LibraryDatabase.all_formats = lambda self:self.new_api.all_field_names('formats')
|
LibraryDatabase.all_formats = lambda self: self.new_api.all_field_names('formats')
|
||||||
LibraryDatabase.all_custom = lambda self, label=None, num=None:self.new_api.all_field_names(self.custom_field_name(label, num))
|
LibraryDatabase.all_custom = lambda self, label=None, num=None:self.new_api.all_field_names(self.custom_field_name(label, num))
|
||||||
|
|
||||||
for func, field in iteritems({'all_authors':'authors', 'all_titles':'title', 'all_tags2':'tags', 'all_series':'series', 'all_publishers':'publisher'}):
|
for func, field in iteritems({'all_authors':'authors', 'all_titles':'title', 'all_tags2':'tags', 'all_series':'series', 'all_publishers':'publisher'}):
|
||||||
@ -1030,12 +1030,12 @@ for meth in ('get_next_series_num_for', 'has_book',):
|
|||||||
return func
|
return func
|
||||||
setattr(LibraryDatabase, meth, getter(meth))
|
setattr(LibraryDatabase, meth, getter(meth))
|
||||||
|
|
||||||
LibraryDatabase.saved_search_names = lambda self:self.new_api.saved_search_names()
|
LibraryDatabase.saved_search_names = lambda self: self.new_api.saved_search_names()
|
||||||
LibraryDatabase.saved_search_lookup = lambda self, x:self.new_api.saved_search_lookup(x)
|
LibraryDatabase.saved_search_lookup = lambda self, x: self.new_api.saved_search_lookup(x)
|
||||||
LibraryDatabase.saved_search_set_all = lambda self, smap:self.new_api.saved_search_set_all(smap)
|
LibraryDatabase.saved_search_set_all = lambda self, smap: self.new_api.saved_search_set_all(smap)
|
||||||
LibraryDatabase.saved_search_delete = lambda self, x:self.new_api.saved_search_delete(x)
|
LibraryDatabase.saved_search_delete = lambda self, x: self.new_api.saved_search_delete(x)
|
||||||
LibraryDatabase.saved_search_add = lambda self, x, y:self.new_api.saved_search_add(x, y)
|
LibraryDatabase.saved_search_add = lambda self, x, y: self.new_api.saved_search_add(x, y)
|
||||||
LibraryDatabase.saved_search_rename = lambda self, x, y:self.new_api.saved_search_rename(x, y)
|
LibraryDatabase.saved_search_rename = lambda self, x, y: self.new_api.saved_search_rename(x, y)
|
||||||
LibraryDatabase.commit_dirty_cache = lambda self: self.new_api.commit_dirty_cache()
|
LibraryDatabase.commit_dirty_cache = lambda self: self.new_api.commit_dirty_cache()
|
||||||
LibraryDatabase.author_sort_from_authors = lambda self, x: self.new_api.author_sort_from_authors(x)
|
LibraryDatabase.author_sort_from_authors = lambda self, x: self.new_api.author_sort_from_authors(x)
|
||||||
# Cleaning is not required anymore
|
# Cleaning is not required anymore
|
||||||
|
@ -416,7 +416,7 @@ class SavedSearchQueries: # {{{
|
|||||||
self._db = weakref.ref(db)
|
self._db = weakref.ref(db)
|
||||||
except TypeError:
|
except TypeError:
|
||||||
# db could be None
|
# db could be None
|
||||||
self._db = lambda : None
|
self._db = lambda: None
|
||||||
self.load_from_db()
|
self.load_from_db()
|
||||||
|
|
||||||
def load_from_db(self):
|
def load_from_db(self):
|
||||||
|
@ -26,12 +26,12 @@ class ReadingTest(BaseTest):
|
|||||||
'Test the reading of data from the database'
|
'Test the reading of data from the database'
|
||||||
cache = self.init_cache(self.library_path)
|
cache = self.init_cache(self.library_path)
|
||||||
tests = {
|
tests = {
|
||||||
3 : {
|
3: {
|
||||||
'title': 'Unknown',
|
'title': 'Unknown',
|
||||||
'sort': 'Unknown',
|
'sort': 'Unknown',
|
||||||
'authors': ('Unknown',),
|
'authors': ('Unknown',),
|
||||||
'author_sort': 'Unknown',
|
'author_sort': 'Unknown',
|
||||||
'series' : None,
|
'series': None,
|
||||||
'series_index': 1.0,
|
'series_index': 1.0,
|
||||||
'rating': None,
|
'rating': None,
|
||||||
'tags': (),
|
'tags': (),
|
||||||
@ -58,12 +58,12 @@ class ReadingTest(BaseTest):
|
|||||||
'size':None,
|
'size':None,
|
||||||
},
|
},
|
||||||
|
|
||||||
2 : {
|
2: {
|
||||||
'title': 'Title One',
|
'title': 'Title One',
|
||||||
'sort': 'One',
|
'sort': 'One',
|
||||||
'authors': ('Author One',),
|
'authors': ('Author One',),
|
||||||
'author_sort': 'One, Author',
|
'author_sort': 'One, Author',
|
||||||
'series' : 'A Series One',
|
'series': 'A Series One',
|
||||||
'series_index': 1.0,
|
'series_index': 1.0,
|
||||||
'tags':('Tag One', 'Tag Two'),
|
'tags':('Tag One', 'Tag Two'),
|
||||||
'formats': ('FMT1',),
|
'formats': ('FMT1',),
|
||||||
@ -88,12 +88,12 @@ class ReadingTest(BaseTest):
|
|||||||
'#comments': '<div>My Comments One<p></p></div>',
|
'#comments': '<div>My Comments One<p></p></div>',
|
||||||
'size':9,
|
'size':9,
|
||||||
},
|
},
|
||||||
1 : {
|
1: {
|
||||||
'title': 'Title Two',
|
'title': 'Title Two',
|
||||||
'sort': 'Title Two',
|
'sort': 'Title Two',
|
||||||
'authors': ('Author Two', 'Author One'),
|
'authors': ('Author Two', 'Author One'),
|
||||||
'author_sort': 'Two, Author & One, Author',
|
'author_sort': 'Two, Author & One, Author',
|
||||||
'series' : 'A Series One',
|
'series': 'A Series One',
|
||||||
'series_index': 2.0,
|
'series_index': 2.0,
|
||||||
'rating': 6.0,
|
'rating': 6.0,
|
||||||
'tags': ('Tag One', 'News'),
|
'tags': ('Tag One', 'News'),
|
||||||
@ -217,7 +217,7 @@ class ReadingTest(BaseTest):
|
|||||||
cache.set_field('#two', {10:2})
|
cache.set_field('#two', {10:2})
|
||||||
cache.set_field('#three', {i:i for i in range(1, 11)})
|
cache.set_field('#three', {i:i for i in range(1, 11)})
|
||||||
ae(list(range(1, 11)), cache.multisort([('#one', True), ('#two', True)], ids_to_sort=sorted(cache.all_book_ids())))
|
ae(list(range(1, 11)), cache.multisort([('#one', True), ('#two', True)], ids_to_sort=sorted(cache.all_book_ids())))
|
||||||
ae([4, 5, 1, 2, 3, 7,8, 9, 10, 6], cache.multisort([('#one', True), ('#two', False)], ids_to_sort=sorted(cache.all_book_ids())))
|
ae([4, 5, 1, 2, 3, 7, 8, 9, 10, 6], cache.multisort([('#one', True), ('#two', False)], ids_to_sort=sorted(cache.all_book_ids())))
|
||||||
ae([5, 4, 3, 2, 1, 10, 9, 8, 7, 6], cache.multisort([('#one', True), ('#two', False), ('#three', False)], ids_to_sort=sorted(cache.all_book_ids())))
|
ae([5, 4, 3, 2, 1, 10, 9, 8, 7, 6], cache.multisort([('#one', True), ('#two', False), ('#three', False)], ids_to_sort=sorted(cache.all_book_ids())))
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
|
@ -225,7 +225,7 @@ class ThumbnailCache:
|
|||||||
except OSError as err:
|
except OSError as err:
|
||||||
self.log('Failed to read thumbnail cache dir:', as_unicode(err))
|
self.log('Failed to read thumbnail cache dir:', as_unicode(err))
|
||||||
|
|
||||||
self.items = OrderedDict(sorted(items, key=lambda x:order.get(x[0], 0)))
|
self.items = OrderedDict(sorted(items, key=lambda x: order.get(x[0], 0)))
|
||||||
self._apply_size()
|
self._apply_size()
|
||||||
|
|
||||||
def _invalidate_sizes(self):
|
def _invalidate_sizes(self):
|
||||||
|
@ -26,7 +26,8 @@ class ANDROID(USBMS):
|
|||||||
|
|
||||||
VENDOR_ID = {
|
VENDOR_ID = {
|
||||||
# HTC
|
# HTC
|
||||||
0x0bb4 : {0xc02 : HTC_BCDS,
|
0x0bb4: {
|
||||||
|
0xc02 : HTC_BCDS,
|
||||||
0xc01 : HTC_BCDS,
|
0xc01 : HTC_BCDS,
|
||||||
0xff9 : HTC_BCDS,
|
0xff9 : HTC_BCDS,
|
||||||
0xc86 : HTC_BCDS,
|
0xc86 : HTC_BCDS,
|
||||||
@ -46,131 +47,132 @@ class ANDROID(USBMS):
|
|||||||
0xcd6 : HTC_BCDS,
|
0xcd6 : HTC_BCDS,
|
||||||
0xce5 : HTC_BCDS,
|
0xce5 : HTC_BCDS,
|
||||||
0xcec : HTC_BCDS,
|
0xcec : HTC_BCDS,
|
||||||
0x0cf5 : HTC_BCDS,
|
0x0cf5: HTC_BCDS,
|
||||||
0x2910 : HTC_BCDS,
|
0x2910: HTC_BCDS,
|
||||||
0xe77 : HTC_BCDS,
|
0xe77 : HTC_BCDS,
|
||||||
0x0001 : [0x255],
|
0x0001: [0x255],
|
||||||
},
|
},
|
||||||
|
|
||||||
# Eken
|
# Eken
|
||||||
0x040d : {0x8510 : [0x0001], 0x0851 : [0x1]},
|
0x040d: {0x8510: [0x0001], 0x0851: [0x1]},
|
||||||
|
|
||||||
# Trekstor
|
# Trekstor
|
||||||
0x1e68 : {
|
0x1e68: {
|
||||||
0x006a : [0x0231],
|
0x006a: [0x0231],
|
||||||
0x0062 : [0x222], # Surftab ventos https://bugs.launchpad.net/bugs/1204885
|
0x0062: [0x222], # Surftab ventos https://bugs.launchpad.net/bugs/1204885
|
||||||
},
|
},
|
||||||
|
|
||||||
# Motorola
|
# Motorola
|
||||||
0x22b8 : {
|
0x22b8: {
|
||||||
0x41d9 : [0x216], 0x2d61 : [0x100], 0x2d67 : [0x100],
|
0x41d9: [0x216], 0x2d61: [0x100], 0x2d67: [0x100],
|
||||||
0x2de8 : [0x229],
|
0x2de8: [0x229],
|
||||||
0x41db : [0x216], 0x4285 : [0x216], 0x42a3 : [0x216],
|
0x41db: [0x216], 0x4285: [0x216], 0x42a3: [0x216],
|
||||||
0x4286 : [0x216], 0x42b3 : [0x216], 0x42b4 : [0x216],
|
0x4286: [0x216], 0x42b3: [0x216], 0x42b4: [0x216],
|
||||||
0x7086 : [0x0226], 0x70a8: [0x9999], 0x42c4 : [0x216],
|
0x7086: [0x0226], 0x70a8: [0x9999], 0x42c4: [0x216],
|
||||||
0x70c6 : [0x226], 0x70c7: [0x226],
|
0x70c6: [0x226], 0x70c7: [0x226],
|
||||||
0x4316 : [0x216],
|
0x4316: [0x216],
|
||||||
0x4317 : [0x216],
|
0x4317: [0x216],
|
||||||
0x42d6 : [0x216],
|
0x42d6: [0x216],
|
||||||
0x42d7 : [0x216],
|
0x42d7: [0x216],
|
||||||
0x42f7 : [0x216],
|
0x42f7: [0x216],
|
||||||
0x4365 : [0x216],
|
0x4365: [0x216],
|
||||||
0x4366 : [0x216],
|
0x4366: [0x216],
|
||||||
0x4371 : [0x216],
|
0x4371: [0x216],
|
||||||
},
|
},
|
||||||
# Freescale
|
# Freescale
|
||||||
0x15a2 : {
|
0x15a2: {
|
||||||
0x0c01 : [0x226]
|
0x0c01: [0x226]
|
||||||
},
|
},
|
||||||
|
|
||||||
# Alcatel
|
# Alcatel
|
||||||
0x05c6 : {
|
0x05c6: {
|
||||||
0x9018 : [0x0226],
|
0x9018: [0x0226],
|
||||||
},
|
},
|
||||||
|
|
||||||
# Sony Ericsson
|
# Sony Ericsson
|
||||||
0xfce : {
|
0xfce : {
|
||||||
0xa173 : [0x216],
|
0xa173: [0x216],
|
||||||
0xd12e : [0x0100],
|
0xd12e: [0x0100],
|
||||||
0xe156 : [0x226],
|
0xe156: [0x226],
|
||||||
0xe15d : [0x226],
|
0xe15d: [0x226],
|
||||||
0xe14f : [0x0226],
|
0xe14f: [0x0226],
|
||||||
0x614f : [0x0226, 0x100],
|
0x614f: [0x0226, 0x100],
|
||||||
0x6156 : [0x0226, 0x100],
|
0x6156: [0x0226, 0x100],
|
||||||
},
|
},
|
||||||
|
|
||||||
# Google
|
# Google
|
||||||
0x18d1 : {
|
0x18d1: {
|
||||||
0x0001 : [0x0222, 0x0223, 0x230, 0x255, 0x9999],
|
0x0001: [0x0222, 0x0223, 0x230, 0x255, 0x9999],
|
||||||
0x0002 : [0x9999],
|
0x0002: [0x9999],
|
||||||
0x0003 : [0x0230, 0x9999],
|
0x0003: [0x0230, 0x9999],
|
||||||
0x4e11 : [0x0100, 0x226, 0x227],
|
0x4e11: [0x0100, 0x226, 0x227],
|
||||||
0x4e12 : [0x0100, 0x226, 0x227],
|
0x4e12: [0x0100, 0x226, 0x227],
|
||||||
0x4e21 : [0x0100, 0x226, 0x227, 0x231],
|
0x4e21: [0x0100, 0x226, 0x227, 0x231],
|
||||||
0x4e22 : [0x0100, 0x226, 0x227, 0x231],
|
0x4e22: [0x0100, 0x226, 0x227, 0x231],
|
||||||
0xb058 : [0x0222, 0x226, 0x227],
|
0xb058: [0x0222, 0x226, 0x227],
|
||||||
0x0ff9 : [0x0226],
|
0x0ff9: [0x0226],
|
||||||
0xc91 : HTC_BCDS,
|
0xc91 : HTC_BCDS,
|
||||||
0xdddd : [0x216],
|
0xdddd: [0x216],
|
||||||
0x0d01 : [0x9999],
|
0x0d01: [0x9999],
|
||||||
0x0d02 : [0x9999],
|
0x0d02: [0x9999],
|
||||||
0x2d01 : [0x9999],
|
0x2d01: [0x9999],
|
||||||
0xdeed : [0x231, 0x226],
|
0xdeed: [0x231, 0x226],
|
||||||
},
|
},
|
||||||
|
|
||||||
# Samsung
|
# Samsung
|
||||||
0x04e8 : {0x681d : [0x0222, 0x0223, 0x0224, 0x0400],
|
0x04e8: {
|
||||||
0x681c : [0x0222, 0x0223, 0x0224, 0x0400],
|
0x681d: [0x0222, 0x0223, 0x0224, 0x0400],
|
||||||
0x6640 : [0x0100],
|
0x681c: [0x0222, 0x0223, 0x0224, 0x0400],
|
||||||
0x685b : [0x0400, 0x0226],
|
0x6640: [0x0100],
|
||||||
0x685e : [0x0400, 0x226],
|
0x685b: [0x0400, 0x0226],
|
||||||
0x6860 : [0x0400],
|
0x685e: [0x0400, 0x226],
|
||||||
0x6863 : [0x226],
|
0x6860: [0x0400],
|
||||||
0x6877 : [0x0400],
|
0x6863: [0x226],
|
||||||
0x689e : [0x0400],
|
0x6877: [0x0400],
|
||||||
0xdeed : [0x0222],
|
0x689e: [0x0400],
|
||||||
0x1234 : [0x0400],
|
0xdeed: [0x0222],
|
||||||
|
0x1234: [0x0400],
|
||||||
},
|
},
|
||||||
|
|
||||||
# Viewsonic/Vizio
|
# Viewsonic/Vizio
|
||||||
0x0489 : {
|
0x0489: {
|
||||||
0xc000 : [0x0226],
|
0xc000: [0x0226],
|
||||||
0xc001 : [0x0226],
|
0xc001: [0x0226],
|
||||||
0xc004 : [0x0226],
|
0xc004: [0x0226],
|
||||||
0x8801 : [0x0226, 0x0227],
|
0x8801: [0x0226, 0x0227],
|
||||||
0xe115 : [0x0216], # PocketBook A10
|
0xe115: [0x0216], # PocketBook A10
|
||||||
},
|
},
|
||||||
|
|
||||||
# Another Viewsonic
|
# Another Viewsonic
|
||||||
0x0bb0 : {
|
0x0bb0: {
|
||||||
0x2a2b : [0x0226, 0x0227],
|
0x2a2b: [0x0226, 0x0227],
|
||||||
},
|
},
|
||||||
|
|
||||||
# Acer
|
# Acer
|
||||||
0x502 : {0x3203 : [0x0100, 0x224]},
|
0x502 : {0x3203: [0x0100, 0x224]},
|
||||||
|
|
||||||
# Dell
|
# Dell
|
||||||
0x413c : {0xb007 : [0x0100, 0x0224, 0x0226]},
|
0x413c: {0xb007: [0x0100, 0x0224, 0x0226]},
|
||||||
|
|
||||||
# LG
|
# LG
|
||||||
0x1004 : {
|
0x1004: {
|
||||||
0x61c5 : [0x100, 0x226, 0x227, 0x229, 0x9999],
|
0x61c5: [0x100, 0x226, 0x227, 0x229, 0x9999],
|
||||||
0x61cc : [0x226, 0x227, 0x9999, 0x100],
|
0x61cc: [0x226, 0x227, 0x9999, 0x100],
|
||||||
0x61ce : [0x226, 0x227, 0x9999, 0x100],
|
0x61ce: [0x226, 0x227, 0x9999, 0x100],
|
||||||
0x618e : [0x226, 0x227, 0x9999, 0x100],
|
0x618e: [0x226, 0x227, 0x9999, 0x100],
|
||||||
0x6205 : [0x226, 0x227, 0x9999, 0x100],
|
0x6205: [0x226, 0x227, 0x9999, 0x100],
|
||||||
0x6234 : [0x231],
|
0x6234: [0x231],
|
||||||
},
|
},
|
||||||
|
|
||||||
# Archos
|
# Archos
|
||||||
0x0e79 : {
|
0x0e79: {
|
||||||
0x1400 : [0x0222, 0x0216],
|
0x1400: [0x0222, 0x0216],
|
||||||
0x1408 : [0x0222, 0x0216],
|
0x1408: [0x0222, 0x0216],
|
||||||
0x1411 : [0x216],
|
0x1411: [0x216],
|
||||||
0x1417 : [0x0216],
|
0x1417: [0x0216],
|
||||||
0x1419 : [0x0216],
|
0x1419: [0x0216],
|
||||||
0x1420 : [0x0216],
|
0x1420: [0x0216],
|
||||||
0x1422 : [0x0216]
|
0x1422: [0x0216]
|
||||||
},
|
},
|
||||||
|
|
||||||
# Huawei
|
# Huawei
|
||||||
@ -178,35 +180,35 @@ class ANDROID(USBMS):
|
|||||||
# 0x45e : { 0x00e1 : [0x007], },
|
# 0x45e : { 0x00e1 : [0x007], },
|
||||||
|
|
||||||
# T-Mobile
|
# T-Mobile
|
||||||
0x0408 : {0x03ba : [0x0109], },
|
0x0408: {0x03ba: [0x0109]},
|
||||||
|
|
||||||
# Xperia
|
# Xperia
|
||||||
0x13d3 : {0x3304 : [0x0001, 0x0002]},
|
0x13d3: {0x3304: [0x0001, 0x0002]},
|
||||||
|
|
||||||
# ZTE
|
# ZTE
|
||||||
0x19d2 : {0x1353 : [0x226], 0x1351 : [0x227]},
|
0x19d2: {0x1353: [0x226], 0x1351: [0x227]},
|
||||||
|
|
||||||
# Advent
|
# Advent
|
||||||
0x0955 : {0x7100 : [0x9999]}, # This is the same as the Notion Ink Adam
|
0x0955: {0x7100: [0x9999]}, # This is the same as the Notion Ink Adam
|
||||||
|
|
||||||
# Kobo
|
# Kobo
|
||||||
0x2237: {0x2208 : [0x0226]},
|
0x2237: {0x2208: [0x0226]},
|
||||||
|
|
||||||
# Lenovo
|
# Lenovo
|
||||||
0x17ef : {
|
0x17ef: {
|
||||||
0x7421 : [0x0216],
|
0x7421: [0x0216],
|
||||||
0x741b : [0x9999],
|
0x741b: [0x9999],
|
||||||
0x7640 : [0x0255],
|
0x7640: [0x0255],
|
||||||
},
|
},
|
||||||
|
|
||||||
# Pantech
|
# Pantech
|
||||||
0x10a9 : {0x6050 : [0x227]},
|
0x10a9: {0x6050: [0x227]},
|
||||||
|
|
||||||
# Prestigio and Teclast
|
# Prestigio and Teclast
|
||||||
0x2207 : {0 : [0x222], 0x10 : [0x222]},
|
0x2207: {0: [0x222], 0x10: [0x222]},
|
||||||
|
|
||||||
# OPPO
|
# OPPO
|
||||||
0x22d9 : {0x2768: [0x228]},
|
0x22d9: {0x2768: [0x228]},
|
||||||
|
|
||||||
}
|
}
|
||||||
EBOOK_DIR_MAIN = ['eBooks/import', 'wordplayer/calibretransfer', 'Books',
|
EBOOK_DIR_MAIN = ['eBooks/import', 'wordplayer/calibretransfer', 'Books',
|
||||||
|
@ -312,7 +312,7 @@ class KINDLE(USBMS):
|
|||||||
divTag.insert(dtc, annot)
|
divTag.insert(dtc, annot)
|
||||||
dtc += 1
|
dtc += 1
|
||||||
|
|
||||||
ka_soup.insert(0,divTag)
|
ka_soup.insert(0, divTag)
|
||||||
return ka_soup
|
return ka_soup
|
||||||
|
|
||||||
def add_annotation_to_library(self, db, db_id, annotation):
|
def add_annotation_to_library(self, db, db_id, annotation):
|
||||||
|
@ -135,7 +135,7 @@ class Bookmark: # {{{
|
|||||||
'chapter_title': chapter_title,
|
'chapter_title': chapter_title,
|
||||||
'chapter_progress': chapter_progress}
|
'chapter_progress': chapter_progress}
|
||||||
previous_chapter = current_chapter
|
previous_chapter = current_chapter
|
||||||
# debug_print("e_type:" , e_type, '\t', 'loc: ', note_id, 'text: ', text,
|
# debug_print("e_type:", e_type, '\t', 'loc: ', note_id, 'text: ', text,
|
||||||
# 'annotation: ', annotation, 'chapter_title: ', chapter_title,
|
# 'annotation: ', annotation, 'chapter_title: ', chapter_title,
|
||||||
# 'chapter_progress: ', chapter_progress, 'date: ')
|
# 'chapter_progress: ', chapter_progress, 'date: ')
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2010-2012, , Timothy Legge <timlegge at gmail.com> and David Forrester <davidfor@internode.on.net>'
|
__copyright__ = '2010-2012, Timothy Legge <timlegge at gmail.com> and David Forrester <davidfor@internode.on.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
@ -292,7 +292,7 @@ class KOBO(USBMS):
|
|||||||
if lpath.startswith(os.sep):
|
if lpath.startswith(os.sep):
|
||||||
lpath = lpath[len(os.sep):]
|
lpath = lpath[len(os.sep):]
|
||||||
lpath = lpath.replace('\\', '/')
|
lpath = lpath.replace('\\', '/')
|
||||||
# debug_print("LPATH: ", lpath, " - Title: " , title)
|
# debug_print("LPATH: ", lpath, " - Title: ", title)
|
||||||
|
|
||||||
playlist_map = {}
|
playlist_map = {}
|
||||||
|
|
||||||
@ -1241,7 +1241,7 @@ class KOBO(USBMS):
|
|||||||
myBookmark = Bookmark(connection, ContentID, path_map[book_id], book_id, book_ext[book_id], bookmark_ext)
|
myBookmark = Bookmark(connection, ContentID, path_map[book_id], book_id, book_ext[book_id], bookmark_ext)
|
||||||
bookmarked_books[book_id] = self.UserAnnotation(type='kobo_bookmark', value=myBookmark)
|
bookmarked_books[book_id] = self.UserAnnotation(type='kobo_bookmark', value=myBookmark)
|
||||||
|
|
||||||
# This returns as job.result in gui2.ui.annotations_fetched(self,job)
|
# This returns as job.result in gui2.ui.annotations_fetched(self, job)
|
||||||
return bookmarked_books
|
return bookmarked_books
|
||||||
|
|
||||||
def generate_annotation_html(self, bookmark):
|
def generate_annotation_html(self, bookmark):
|
||||||
@ -1348,7 +1348,7 @@ class KOBO(USBMS):
|
|||||||
divTag.insert(dtc, annot)
|
divTag.insert(dtc, annot)
|
||||||
dtc += 1
|
dtc += 1
|
||||||
|
|
||||||
ka_soup.insert(0,divTag)
|
ka_soup.insert(0, divTag)
|
||||||
return ka_soup
|
return ka_soup
|
||||||
|
|
||||||
def add_annotation_to_library(self, db, db_id, annotation):
|
def add_annotation_to_library(self, db, db_id, annotation):
|
||||||
@ -1749,7 +1749,7 @@ class KOBOTOUCH(KOBO):
|
|||||||
if lpath.startswith(os.sep):
|
if lpath.startswith(os.sep):
|
||||||
lpath = lpath[len(os.sep):]
|
lpath = lpath[len(os.sep):]
|
||||||
lpath = lpath.replace('\\', '/')
|
lpath = lpath.replace('\\', '/')
|
||||||
# debug_print("KoboTouch:update_booklist - LPATH: ", lpath, " - Title: " , title)
|
# debug_print("KoboTouch:update_booklist - LPATH: ", lpath, " - Title: ", title)
|
||||||
|
|
||||||
playlist_map = {}
|
playlist_map = {}
|
||||||
|
|
||||||
|
@ -177,7 +177,7 @@ class FileOrFolder:
|
|||||||
line = '%s%s %s [id:%s %s]'%(prefix, c, self.name, self.object_id, data)
|
line = '%s%s %s [id:%s %s]'%(prefix, c, self.name, self.object_id, data)
|
||||||
prints(line, file=out)
|
prints(line, file=out)
|
||||||
for c in (self.folders, self.files):
|
for c in (self.folders, self.files):
|
||||||
for e in sorted(c, key=lambda x:sort_key(x.name)):
|
for e in sorted(c, key=lambda x: sort_key(x.name)):
|
||||||
e.dump(prefix=prefix+' ', out=out)
|
e.dump(prefix=prefix+' ', out=out)
|
||||||
|
|
||||||
def list(self, recurse=False):
|
def list(self, recurse=False):
|
||||||
|
@ -51,7 +51,7 @@ MIME_MAP = {
|
|||||||
'lrx' : 'application/x-sony-bbeb',
|
'lrx' : 'application/x-sony-bbeb',
|
||||||
'rtf' : 'application/rtf',
|
'rtf' : 'application/rtf',
|
||||||
'pdf' : 'application/pdf',
|
'pdf' : 'application/pdf',
|
||||||
'txt' : 'text/plain' ,
|
'txt' : 'text/plain',
|
||||||
'epub': 'application/epub+zip',
|
'epub': 'application/epub+zip',
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -482,7 +482,7 @@ class XMLCache:
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
pubdate = strftime(book.pubdate.utctimetuple(),
|
pubdate = strftime(book.pubdate.utctimetuple(),
|
||||||
zone=lambda x : x)
|
zone=lambda x: x)
|
||||||
record.set('publicationDate', pubdate)
|
record.set('publicationDate', pubdate)
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
@ -705,8 +705,8 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
|
|||||||
'metadata': book_metadata, 'thisBook': this_book,
|
'metadata': book_metadata, 'thisBook': this_book,
|
||||||
'totalBooks': total_books,
|
'totalBooks': total_books,
|
||||||
'willStreamBooks': True,
|
'willStreamBooks': True,
|
||||||
'willStreamBinary' : True,
|
'willStreamBinary': True,
|
||||||
'wantsSendOkToSendbook' : self.can_send_ok_to_sendbook,
|
'wantsSendOkToSendbook': self.can_send_ok_to_sendbook,
|
||||||
'canSupportLpathChanges': True},
|
'canSupportLpathChanges': True},
|
||||||
print_debug_info=False,
|
print_debug_info=False,
|
||||||
wait_for_response=self.can_send_ok_to_sendbook)
|
wait_for_response=self.can_send_ok_to_sendbook)
|
||||||
@ -744,7 +744,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
|
|||||||
lastmod = parse_date(lastmod)
|
lastmod = parse_date(lastmod)
|
||||||
if key in self.device_book_cache and self.device_book_cache[key]['book'].last_modified == lastmod:
|
if key in self.device_book_cache and self.device_book_cache[key]['book'].last_modified == lastmod:
|
||||||
self.device_book_cache[key]['last_used'] = now()
|
self.device_book_cache[key]['last_used'] = now()
|
||||||
return self.device_book_cache[key]['book'].deepcopy(lambda : SDBook('', ''))
|
return self.device_book_cache[key]['book'].deepcopy(lambda: SDBook('', ''))
|
||||||
except:
|
except:
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
return None
|
return None
|
||||||
@ -1247,8 +1247,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
|
|||||||
{'location_code': 'main', 'name':name})
|
{'location_code': 'main', 'name':name})
|
||||||
|
|
||||||
@synchronous('sync_lock')
|
@synchronous('sync_lock')
|
||||||
def reset(self, key='-1', log_packets=False, report_progress=None,
|
def reset(self, key='-1', log_packets=False, report_progress=None, detected_device=None):
|
||||||
detected_device=None) :
|
|
||||||
self._debug()
|
self._debug()
|
||||||
self.set_progress_reporter(report_progress)
|
self.set_progress_reporter(report_progress)
|
||||||
|
|
||||||
@ -1573,7 +1572,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
|
|||||||
position = 0
|
position = 0
|
||||||
while not eof:
|
while not eof:
|
||||||
opcode, result = self._call_client('GET_BOOK_FILE_SEGMENT',
|
opcode, result = self._call_client('GET_BOOK_FILE_SEGMENT',
|
||||||
{'lpath' : path, 'position': position,
|
{'lpath': path, 'position': position,
|
||||||
'thisBook': this_book, 'totalBooks': total_books,
|
'thisBook': this_book, 'totalBooks': total_books,
|
||||||
'canStream':True, 'canStreamBinary': True},
|
'canStream':True, 'canStreamBinary': True},
|
||||||
print_debug_info=False)
|
print_debug_info=False)
|
||||||
@ -1614,7 +1613,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
|
|||||||
other_info['id_link_rules'] = msprefs.get('id_link_rules', {})
|
other_info['id_link_rules'] = msprefs.get('id_link_rules', {})
|
||||||
|
|
||||||
self._call_client('SET_LIBRARY_INFO',
|
self._call_client('SET_LIBRARY_INFO',
|
||||||
{'libraryName' : library_name,
|
{'libraryName': library_name,
|
||||||
'libraryUuid': library_uuid,
|
'libraryUuid': library_uuid,
|
||||||
'fieldMetadata': field_metadata.all_metadata(),
|
'fieldMetadata': field_metadata.all_metadata(),
|
||||||
'otherInfo': other_info},
|
'otherInfo': other_info},
|
||||||
|
@ -100,7 +100,7 @@ def find_declared_encoding(raw, limit=50*1024):
|
|||||||
return ans
|
return ans
|
||||||
|
|
||||||
|
|
||||||
_CHARSET_ALIASES = {'macintosh' : 'mac-roman', 'x-sjis' : 'shift-jis', 'mac-centraleurope': 'cp1250'}
|
_CHARSET_ALIASES = {'macintosh': 'mac-roman', 'x-sjis': 'shift-jis', 'mac-centraleurope': 'cp1250'}
|
||||||
|
|
||||||
|
|
||||||
def detect(bytestring):
|
def detect(bytestring):
|
||||||
|
@ -198,13 +198,13 @@ def add_input_output_options(parser, plumber):
|
|||||||
|
|
||||||
def add_pipeline_options(parser, plumber):
|
def add_pipeline_options(parser, plumber):
|
||||||
groups = OrderedDict((
|
groups = OrderedDict((
|
||||||
('' , ('',
|
('', ('',
|
||||||
[
|
[
|
||||||
'input_profile',
|
'input_profile',
|
||||||
'output_profile',
|
'output_profile',
|
||||||
]
|
]
|
||||||
)),
|
)),
|
||||||
(_('LOOK AND FEEL') , (
|
(_('LOOK AND FEEL'), (
|
||||||
_('Options to control the look and feel of the output'),
|
_('Options to control the look and feel of the output'),
|
||||||
[
|
[
|
||||||
'base_font_size', 'disable_font_rescaling',
|
'base_font_size', 'disable_font_rescaling',
|
||||||
@ -223,7 +223,7 @@ def add_pipeline_options(parser, plumber):
|
|||||||
]
|
]
|
||||||
)),
|
)),
|
||||||
|
|
||||||
(_('HEURISTIC PROCESSING') , (
|
(_('HEURISTIC PROCESSING'), (
|
||||||
_('Modify the document text and structure using common'
|
_('Modify the document text and structure using common'
|
||||||
' patterns. Disabled by default. Use %(en)s to enable. '
|
' patterns. Disabled by default. Use %(en)s to enable. '
|
||||||
' Individual actions can be disabled with the %(dis)s options.')
|
' Individual actions can be disabled with the %(dis)s options.')
|
||||||
@ -231,7 +231,7 @@ def add_pipeline_options(parser, plumber):
|
|||||||
['enable_heuristics'] + HEURISTIC_OPTIONS
|
['enable_heuristics'] + HEURISTIC_OPTIONS
|
||||||
)),
|
)),
|
||||||
|
|
||||||
(_('SEARCH AND REPLACE') , (
|
(_('SEARCH AND REPLACE'), (
|
||||||
_('Modify the document text and structure using user defined patterns.'),
|
_('Modify the document text and structure using user defined patterns.'),
|
||||||
[
|
[
|
||||||
'sr1_search', 'sr1_replace',
|
'sr1_search', 'sr1_replace',
|
||||||
@ -241,7 +241,7 @@ def add_pipeline_options(parser, plumber):
|
|||||||
]
|
]
|
||||||
)),
|
)),
|
||||||
|
|
||||||
(_('STRUCTURE DETECTION') , (
|
(_('STRUCTURE DETECTION'), (
|
||||||
_('Control auto-detection of document structure.'),
|
_('Control auto-detection of document structure.'),
|
||||||
[
|
[
|
||||||
'chapter', 'chapter_mark',
|
'chapter', 'chapter_mark',
|
||||||
@ -251,7 +251,7 @@ def add_pipeline_options(parser, plumber):
|
|||||||
]
|
]
|
||||||
)),
|
)),
|
||||||
|
|
||||||
(_('TABLE OF CONTENTS') , (
|
(_('TABLE OF CONTENTS'), (
|
||||||
_('Control the automatic generation of a Table of Contents. By '
|
_('Control the automatic generation of a Table of Contents. By '
|
||||||
'default, if the source file has a Table of Contents, it will '
|
'default, if the source file has a Table of Contents, it will '
|
||||||
'be used in preference to the automatically generated one.'),
|
'be used in preference to the automatically generated one.'),
|
||||||
@ -262,7 +262,7 @@ def add_pipeline_options(parser, plumber):
|
|||||||
]
|
]
|
||||||
)),
|
)),
|
||||||
|
|
||||||
(_('METADATA') , (_('Options to set metadata in the output'),
|
(_('METADATA'), (_('Options to set metadata in the output'),
|
||||||
plumber.metadata_option_names + ['read_metadata_from_opf'],
|
plumber.metadata_option_names + ['read_metadata_from_opf'],
|
||||||
)),
|
)),
|
||||||
(_('DEBUG'), (_('Options to help with debugging the conversion'),
|
(_('DEBUG'), (_('Options to help with debugging the conversion'),
|
||||||
|
@ -13,7 +13,7 @@ from calibre.customize.conversion import OptionRecommendation, OutputFormatPlugi
|
|||||||
from calibre.ptempfile import TemporaryDirectory
|
from calibre.ptempfile import TemporaryDirectory
|
||||||
from polyglot.builtins import iteritems
|
from polyglot.builtins import iteritems
|
||||||
|
|
||||||
UNITS = ('millimeter', 'centimeter', 'point', 'inch' , 'pica' , 'didot',
|
UNITS = ('millimeter', 'centimeter', 'point', 'inch', 'pica', 'didot',
|
||||||
'cicero', 'devicepixel')
|
'cicero', 'devicepixel')
|
||||||
|
|
||||||
PAPER_SIZES = ('a0', 'a1', 'a2', 'a3', 'a4', 'a5', 'a6', 'b0', 'b1',
|
PAPER_SIZES = ('a0', 'a1', 'a2', 'a3', 'a4', 'a5', 'a6', 'b0', 'b1',
|
||||||
|
@ -11,8 +11,8 @@ from calibre.utils.resources import get_path as P
|
|||||||
from polyglot.builtins import as_bytes, iteritems
|
from polyglot.builtins import as_bytes, iteritems
|
||||||
|
|
||||||
border_style_map = {
|
border_style_map = {
|
||||||
'single' : 'solid',
|
'single': 'solid',
|
||||||
'double-thickness-border' : 'double',
|
'double-thickness-border': 'double',
|
||||||
'shadowed-border': 'outset',
|
'shadowed-border': 'outset',
|
||||||
'double-border': 'double',
|
'double-border': 'double',
|
||||||
'dotted-border': 'dotted',
|
'dotted-border': 'dotted',
|
||||||
@ -288,7 +288,7 @@ class RTFInput(InputFormatPlugin):
|
|||||||
self.log('Converting XML to HTML...')
|
self.log('Converting XML to HTML...')
|
||||||
inline_class = InlineClass(self.log)
|
inline_class = InlineClass(self.log)
|
||||||
styledoc = safe_xml_fromstring(P('templates/rtf.xsl', data=True), recover=False)
|
styledoc = safe_xml_fromstring(P('templates/rtf.xsl', data=True), recover=False)
|
||||||
extensions = {('calibre', 'inline-class') : inline_class}
|
extensions = {('calibre', 'inline-class'): inline_class}
|
||||||
transform = etree.XSLT(styledoc, extensions=extensions)
|
transform = etree.XSLT(styledoc, extensions=extensions)
|
||||||
result = transform(doc)
|
result = transform(doc)
|
||||||
html = 'index.xhtml'
|
html = 'index.xhtml'
|
||||||
|
@ -49,12 +49,14 @@ class SNBInput(InputFormatPlugin):
|
|||||||
meta = snbFile.GetFileStream('snbf/book.snbf')
|
meta = snbFile.GetFileStream('snbf/book.snbf')
|
||||||
if meta is not None:
|
if meta is not None:
|
||||||
meta = safe_xml_fromstring(meta)
|
meta = safe_xml_fromstring(meta)
|
||||||
l = {'title' : './/head/name',
|
l = {
|
||||||
|
'title' : './/head/name',
|
||||||
'creator' : './/head/author',
|
'creator' : './/head/author',
|
||||||
'language' : './/head/language',
|
'language' : './/head/language',
|
||||||
'generator': './/head/generator',
|
'generator': './/head/generator',
|
||||||
'publisher': './/head/publisher',
|
'publisher': './/head/publisher',
|
||||||
'cover' : './/head/cover', }
|
'cover' : './/head/cover',
|
||||||
|
}
|
||||||
d = {}
|
d = {}
|
||||||
for item in l:
|
for item in l:
|
||||||
node = meta.find(l[item])
|
node = meta.find(l[item])
|
||||||
|
@ -480,7 +480,7 @@ class HTMLPreProcessor:
|
|||||||
start_rules = []
|
start_rules = []
|
||||||
|
|
||||||
if not getattr(self.extra_opts, 'keep_ligatures', False):
|
if not getattr(self.extra_opts, 'keep_ligatures', False):
|
||||||
html = _ligpat.sub(lambda m:LIGATURES[m.group()], html)
|
html = _ligpat.sub(lambda m: LIGATURES[m.group()], html)
|
||||||
|
|
||||||
user_sr_rules = {}
|
user_sr_rules = {}
|
||||||
# Function for processing search and replace
|
# Function for processing search and replace
|
||||||
|
@ -449,7 +449,7 @@ class HeuristicProcessor:
|
|||||||
for i in range(2):
|
for i in range(2):
|
||||||
html = re.sub(r'\s*<span[^>]*>\s*(<span[^>]*>\s*</span>){0,2}\s*</span>\s*', ' ', html)
|
html = re.sub(r'\s*<span[^>]*>\s*(<span[^>]*>\s*</span>){0,2}\s*</span>\s*', ' ', html)
|
||||||
html = re.sub(
|
html = re.sub(
|
||||||
r'\s*{open}\s*({open}\s*{close}\s*){{0,2}}\s*{close}'.format(open=open_fmt_pat, close=close_fmt_pat) , ' ', html)
|
r'\s*{open}\s*({open}\s*{close}\s*){{0,2}}\s*{close}'.format(open=open_fmt_pat, close=close_fmt_pat), ' ', html)
|
||||||
# delete surrounding divs from empty paragraphs
|
# delete surrounding divs from empty paragraphs
|
||||||
html = re.sub(r'<div[^>]*>\s*<p[^>]*>\s*</p>\s*</div>', '<p> </p>', html)
|
html = re.sub(r'<div[^>]*>\s*<p[^>]*>\s*</p>\s*</div>', '<p> </p>', html)
|
||||||
# Empty heading tags
|
# Empty heading tags
|
||||||
|
@ -109,7 +109,7 @@ class DJVUFile:
|
|||||||
def __init__(self, instream, verbose=0):
|
def __init__(self, instream, verbose=0):
|
||||||
self.instream = instream
|
self.instream = instream
|
||||||
buf = self.instream.read(4)
|
buf = self.instream.read(4)
|
||||||
assert (buf == b'AT&T')
|
assert buf == b'AT&T'
|
||||||
buf = self.instream.read()
|
buf = self.instream.read()
|
||||||
self.dc = DjvuChunk(buf, 0, len(buf), verbose=verbose)
|
self.dc = DjvuChunk(buf, 0, len(buf), verbose=verbose)
|
||||||
|
|
||||||
|
@ -416,7 +416,7 @@ class BZZDecoder:
|
|||||||
# Create machine independent ffz table
|
# Create machine independent ffz table
|
||||||
for i in range(256):
|
for i in range(256):
|
||||||
j = i
|
j = i
|
||||||
while (j & 0x80):
|
while j & 0x80:
|
||||||
self.ffzt[i] += 1
|
self.ffzt[i] += 1
|
||||||
j <<= 1
|
j <<= 1
|
||||||
# Initialize table
|
# Initialize table
|
||||||
|
@ -88,7 +88,7 @@ LINE_STYLES = { # {{{
|
|||||||
'thick': 'solid',
|
'thick': 'solid',
|
||||||
'thickThinLargeGap': 'double',
|
'thickThinLargeGap': 'double',
|
||||||
'thickThinMediumGap': 'double',
|
'thickThinMediumGap': 'double',
|
||||||
'thickThinSmallGap' : 'double',
|
'thickThinSmallGap': 'double',
|
||||||
'thinThickLargeGap': 'double',
|
'thinThickLargeGap': 'double',
|
||||||
'thinThickMediumGap': 'double',
|
'thinThickMediumGap': 'double',
|
||||||
'thinThickSmallGap': 'double',
|
'thinThickSmallGap': 'double',
|
||||||
|
@ -106,7 +106,7 @@ def process_index(field, index, xe_fields, log, XPath, expand):
|
|||||||
if styles:
|
if styles:
|
||||||
heading_style = styles[0]
|
heading_style = styles[0]
|
||||||
else:
|
else:
|
||||||
items = sorted(xe_fields, key=lambda x:sort_key(x['text']))
|
items = sorted(xe_fields, key=lambda x: sort_key(x['text']))
|
||||||
|
|
||||||
hyperlinks = []
|
hyperlinks = []
|
||||||
blocks = []
|
blocks = []
|
||||||
|
@ -19,7 +19,7 @@ TRANSITIONAL_NAMES = {
|
|||||||
'STYLES' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles',
|
'STYLES' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles',
|
||||||
'NUMBERING' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/numbering',
|
'NUMBERING' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/numbering',
|
||||||
'FONTS' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/fontTable',
|
'FONTS' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/fontTable',
|
||||||
'EMBEDDED_FONT' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/font',
|
'EMBEDDED_FONT': 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/font',
|
||||||
'IMAGES' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/image',
|
'IMAGES' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/image',
|
||||||
'LINKS' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink',
|
'LINKS' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink',
|
||||||
'FOOTNOTES' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes',
|
'FOOTNOTES' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes',
|
||||||
|
@ -39,7 +39,7 @@ def alphabet(val, lower=True):
|
|||||||
|
|
||||||
alphabet_map = {
|
alphabet_map = {
|
||||||
'lower-alpha':alphabet, 'upper-alpha':partial(alphabet, lower=False),
|
'lower-alpha':alphabet, 'upper-alpha':partial(alphabet, lower=False),
|
||||||
'lower-roman':lambda x:roman(x).lower(), 'upper-roman':roman,
|
'lower-roman':lambda x: roman(x).lower(), 'upper-roman':roman,
|
||||||
'decimal-leading-zero': lambda x: '0%d' % x
|
'decimal-leading-zero': lambda x: '0%d' % x
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -361,7 +361,7 @@ class Numbering:
|
|||||||
if child.tag == 'li':
|
if child.tag == 'li':
|
||||||
if current_run:
|
if current_run:
|
||||||
last = current_run[-1]
|
last = current_run[-1]
|
||||||
if (last.get('list-id') , last.get('list-lvl')) != (child.get('list-id'), child.get('list-lvl')):
|
if (last.get('list-id'), last.get('list-lvl')) != (child.get('list-id'), child.get('list-lvl')):
|
||||||
commit(current_run)
|
commit(current_run)
|
||||||
current_run.append(child)
|
current_run.append(child)
|
||||||
else:
|
else:
|
||||||
|
@ -36,7 +36,7 @@ class PRS500_PROFILE:
|
|||||||
|
|
||||||
def find_custom_fonts(options, logger):
|
def find_custom_fonts(options, logger):
|
||||||
from calibre.utils.fonts.scanner import font_scanner
|
from calibre.utils.fonts.scanner import font_scanner
|
||||||
fonts = {'serif' : None, 'sans' : None, 'mono' : None}
|
fonts = {'serif': None, 'sans': None, 'mono': None}
|
||||||
|
|
||||||
def family(cmd):
|
def family(cmd):
|
||||||
return cmd.split(',')[-1].strip()
|
return cmd.split(',')[-1].strip()
|
||||||
@ -106,7 +106,7 @@ def Book(options, logger, font_delta=0, header=None,
|
|||||||
|
|
||||||
for family in ['serif', 'sans', 'mono']:
|
for family in ['serif', 'sans', 'mono']:
|
||||||
if not fonts[family]:
|
if not fonts[family]:
|
||||||
fonts[family] = {'normal' : (None, profile.default_fonts[family])}
|
fonts[family] = {'normal': (None, profile.default_fonts[family])}
|
||||||
elif 'normal' not in fonts[family]:
|
elif 'normal' not in fonts[family]:
|
||||||
raise ConversionError('Could not find the normal version of the ' + family + ' font')
|
raise ConversionError('Could not find the normal version of the ' + family + ' font')
|
||||||
return book, fonts
|
return book, fonts
|
||||||
|
@ -10,7 +10,7 @@ Default fonts used in the PRS500
|
|||||||
|
|
||||||
LIBERATION_FONT_MAP = {
|
LIBERATION_FONT_MAP = {
|
||||||
'Swis721 BT Roman' : 'LiberationSans-Regular',
|
'Swis721 BT Roman' : 'LiberationSans-Regular',
|
||||||
'Dutch801 Rm BT Roman' : 'LiberationSerif-Regular',
|
'Dutch801 Rm BT Roman': 'LiberationSerif-Regular',
|
||||||
'Courier10 BT Roman' : 'LiberationMono-Regular',
|
'Courier10 BT Roman' : 'LiberationMono-Regular',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1706,7 +1706,7 @@ class HTMLConverter:
|
|||||||
|
|
||||||
self.process_children(tag, tag_css, tag_pseudo_css)
|
self.process_children(tag, tag_css, tag_pseudo_css)
|
||||||
|
|
||||||
if self.current_para.contents :
|
if self.current_para.contents:
|
||||||
self.current_block.append(self.current_para)
|
self.current_block.append(self.current_para)
|
||||||
self.current_para = Paragraph()
|
self.current_para = Paragraph()
|
||||||
if tagname.startswith('h') or self.blank_after_para:
|
if tagname.startswith('h') or self.blank_after_para:
|
||||||
|
@ -338,11 +338,11 @@ class Page(LRFStream):
|
|||||||
}
|
}
|
||||||
tag_map.update(PageAttr.tag_map)
|
tag_map.update(PageAttr.tag_map)
|
||||||
tag_map.update(LRFStream.tag_map)
|
tag_map.update(LRFStream.tag_map)
|
||||||
style = property(fget=lambda self : self._document.objects[self.style_id])
|
style = property(fget=lambda self: self._document.objects[self.style_id])
|
||||||
evenheader = property(fget=lambda self : self._document.objects[self.style.evenheaderid])
|
evenheader = property(fget=lambda self: self._document.objects[self.style.evenheaderid])
|
||||||
evenfooter = property(fget=lambda self : self._document.objects[self.style.evenfooterid])
|
evenfooter = property(fget=lambda self: self._document.objects[self.style.evenfooterid])
|
||||||
oddheader = property(fget=lambda self : self._document.objects[self.style.oddheaderid])
|
oddheader = property(fget=lambda self: self._document.objects[self.style.oddheaderid])
|
||||||
oddfooter = property(fget=lambda self : self._document.objects[self.style.oddfooterid])
|
oddfooter = property(fget=lambda self: self._document.objects[self.style.oddfooterid])
|
||||||
|
|
||||||
class Content(LRFContentObject):
|
class Content(LRFContentObject):
|
||||||
tag_map = {
|
tag_map = {
|
||||||
@ -531,7 +531,7 @@ class TextCSS:
|
|||||||
|
|
||||||
class TextAttr(StyleObject, LRFObject, TextCSS):
|
class TextAttr(StyleObject, LRFObject, TextCSS):
|
||||||
|
|
||||||
FONT_MAP = collections.defaultdict(lambda : 'serif')
|
FONT_MAP = collections.defaultdict(lambda: 'serif')
|
||||||
for key, value in PRS500_PROFILE.default_fonts.items():
|
for key, value in PRS500_PROFILE.default_fonts.items():
|
||||||
FONT_MAP[value] = key
|
FONT_MAP[value] = key
|
||||||
|
|
||||||
@ -571,8 +571,8 @@ class Block(LRFStream, TextCSS):
|
|||||||
extra_attrs = [i[0] for i in BlockAttr.tag_map.values()]
|
extra_attrs = [i[0] for i in BlockAttr.tag_map.values()]
|
||||||
extra_attrs.extend([i[0] for i in TextAttr.tag_map.values()])
|
extra_attrs.extend([i[0] for i in TextAttr.tag_map.values()])
|
||||||
|
|
||||||
style = property(fget=lambda self : self._document.objects[self.style_id])
|
style = property(fget=lambda self: self._document.objects[self.style_id])
|
||||||
textstyle = property(fget=lambda self : self._document.objects[self.textstyle_id])
|
textstyle = property(fget=lambda self: self._document.objects[self.textstyle_id])
|
||||||
|
|
||||||
def initialize(self):
|
def initialize(self):
|
||||||
self.attrs = {}
|
self.attrs = {}
|
||||||
@ -638,7 +638,7 @@ class Text(LRFStream):
|
|||||||
tag_map.update(TextAttr.tag_map)
|
tag_map.update(TextAttr.tag_map)
|
||||||
tag_map.update(LRFStream.tag_map)
|
tag_map.update(LRFStream.tag_map)
|
||||||
|
|
||||||
style = property(fget=lambda self : self._document.objects[self.style_id])
|
style = property(fget=lambda self: self._document.objects[self.style_id])
|
||||||
|
|
||||||
text_map = {0x22: '"', 0x26: '&', 0x27: "'", 0x3c: '<', 0x3e: '>'}
|
text_map = {0x22: '"', 0x26: '&', 0x27: "'", 0x3c: '<', 0x3e: '>'}
|
||||||
entity_pattern = re.compile(r'&(\S+?);')
|
entity_pattern = re.compile(r'&(\S+?);')
|
||||||
@ -931,8 +931,8 @@ class Image(LRFObject):
|
|||||||
def parse_image_size(self, tag, f):
|
def parse_image_size(self, tag, f):
|
||||||
self.xsize, self.ysize = struct.unpack('<HH', tag.contents)
|
self.xsize, self.ysize = struct.unpack('<HH', tag.contents)
|
||||||
|
|
||||||
encoding = property(fget=lambda self : self._document.objects[self.refstream].encoding)
|
encoding = property(fget=lambda self: self._document.objects[self.refstream].encoding)
|
||||||
data = property(fget=lambda self : self._document.objects[self.refstream].stream)
|
data = property(fget=lambda self: self._document.objects[self.refstream].stream)
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return '<Image objid="%s" x0="%d" y0="%d" x1="%d" y1="%d" xsize="%d" ysize="%d" refstream="%d" />\n'%\
|
return '<Image objid="%s" x0="%d" y0="%d" x1="%d" y1="%d" xsize="%d" ysize="%d" refstream="%d" />\n'%\
|
||||||
@ -1021,7 +1021,7 @@ class ImageStream(LRFStream):
|
|||||||
|
|
||||||
tag_map.update(LRFStream.tag_map)
|
tag_map.update(LRFStream.tag_map)
|
||||||
|
|
||||||
encoding = property(fget=lambda self : self.imgext[self.stream_flags & 0xFF].upper())
|
encoding = property(fget=lambda self: self.imgext[self.stream_flags & 0xFF].upper())
|
||||||
|
|
||||||
def end_stream(self, *args):
|
def end_stream(self, *args):
|
||||||
LRFStream.end_stream(self, *args)
|
LRFStream.end_stream(self, *args)
|
||||||
@ -1122,8 +1122,8 @@ class Button(LRFObject):
|
|||||||
s += '</Button>\n'
|
s += '</Button>\n'
|
||||||
return s
|
return s
|
||||||
|
|
||||||
refpage = property(fget=lambda self : self.jump_action(2)[0])
|
refpage = property(fget=lambda self: self.jump_action(2)[0])
|
||||||
refobj = property(fget=lambda self : self.jump_action(2)[1])
|
refobj = property(fget=lambda self: self.jump_action(2)[1])
|
||||||
|
|
||||||
|
|
||||||
class Window(LRFObject):
|
class Window(LRFObject):
|
||||||
@ -1173,7 +1173,7 @@ class BookAttr(StyleObject, LRFObject):
|
|||||||
}
|
}
|
||||||
tag_map.update(ruby_tags)
|
tag_map.update(ruby_tags)
|
||||||
tag_map.update(LRFObject.tag_map)
|
tag_map.update(LRFObject.tag_map)
|
||||||
binding_map = {1: 'Lr', 16 : 'Rl'}
|
binding_map = {1: 'Lr', 16: 'Rl'}
|
||||||
|
|
||||||
def __init__(self, document, stream, id, scramble_key, boundary):
|
def __init__(self, document, stream, id, scramble_key, boundary):
|
||||||
self.font_link_list = []
|
self.font_link_list = []
|
||||||
|
@ -243,7 +243,7 @@ def get_comic_images(path, tdir, first=1, last=0): # first and last use 1 based
|
|||||||
if fmt == 'rar':
|
if fmt == 'rar':
|
||||||
from calibre.utils.unrar import headers
|
from calibre.utils.unrar import headers
|
||||||
for h in headers(path):
|
for h in headers(path):
|
||||||
items[h['filename']] = lambda : partial(h.get, 'file_time', 0)
|
items[h['filename']] = lambda: partial(h.get, 'file_time', 0)
|
||||||
else:
|
else:
|
||||||
from zipfile import ZipFile
|
from zipfile import ZipFile
|
||||||
with ZipFile(path) as zf:
|
with ZipFile(path) as zf:
|
||||||
|
@ -213,7 +213,7 @@ class Metadata:
|
|||||||
if f['datatype'] == 'composite' and f['#value#'] is None:
|
if f['datatype'] == 'composite' and f['#value#'] is None:
|
||||||
self.get(field)
|
self.get(field)
|
||||||
|
|
||||||
def deepcopy(self, class_generator=lambda : Metadata(None)):
|
def deepcopy(self, class_generator=lambda: Metadata(None)):
|
||||||
''' Do not use this method unless you know what you are doing, if you
|
''' Do not use this method unless you know what you are doing, if you
|
||||||
want to create a simple clone of this object, use :meth:`deepcopy_metadata`
|
want to create a simple clone of this object, use :meth:`deepcopy_metadata`
|
||||||
instead. Class_generator must be a function that returns an instance
|
instead. Class_generator must be a function that returns an instance
|
||||||
|
@ -43,7 +43,7 @@ COMMENT_NAMES = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
META_NAMES = {
|
META_NAMES = {
|
||||||
'title' : ('dc.title', 'dcterms.title', 'title'),
|
'title': ('dc.title', 'dcterms.title', 'title'),
|
||||||
'authors': ('author', 'dc.creator.aut', 'dcterms.creator.aut', 'dc.creator'),
|
'authors': ('author', 'dc.creator.aut', 'dcterms.creator.aut', 'dc.creator'),
|
||||||
'publisher': ('publisher', 'dc.publisher', 'dcterms.publisher'),
|
'publisher': ('publisher', 'dc.publisher', 'dcterms.publisher'),
|
||||||
'isbn': ('isbn',),
|
'isbn': ('isbn',),
|
||||||
|
@ -181,7 +181,7 @@ class MetadataUpdater:
|
|||||||
def patch(self, off, new_record0):
|
def patch(self, off, new_record0):
|
||||||
# Save the current size of each record
|
# Save the current size of each record
|
||||||
record_sizes = [len(new_record0)]
|
record_sizes = [len(new_record0)]
|
||||||
for i in range(1,self.nrecs-1):
|
for i in range(1, self.nrecs-1):
|
||||||
record_sizes.append(self.pdbrecords[i+1][0]-self.pdbrecords[i][0])
|
record_sizes.append(self.pdbrecords[i+1][0]-self.pdbrecords[i][0])
|
||||||
# And the last one
|
# And the last one
|
||||||
record_sizes.append(self.data.stop - self.pdbrecords[self.nrecs-1][0])
|
record_sizes.append(self.data.stop - self.pdbrecords[self.nrecs-1][0])
|
||||||
@ -192,7 +192,7 @@ class MetadataUpdater:
|
|||||||
record0_offset = self.pdbrecords[0][0]
|
record0_offset = self.pdbrecords[0][0]
|
||||||
updated_offset = record0_offset + len(new_record0)
|
updated_offset = record0_offset + len(new_record0)
|
||||||
|
|
||||||
for i in range(1,self.nrecs-1):
|
for i in range(1, self.nrecs-1):
|
||||||
updated_pdbrecords.append(updated_offset)
|
updated_pdbrecords.append(updated_offset)
|
||||||
updated_offset += record_sizes[i]
|
updated_offset += record_sizes[i]
|
||||||
# Update the last pdbrecord
|
# Update the last pdbrecord
|
||||||
@ -200,7 +200,7 @@ class MetadataUpdater:
|
|||||||
|
|
||||||
# Read in current records 1 to last
|
# Read in current records 1 to last
|
||||||
data_blocks = [new_record0]
|
data_blocks = [new_record0]
|
||||||
for i in range(1,self.nrecs):
|
for i in range(1, self.nrecs):
|
||||||
data_blocks.append(self.data[self.pdbrecords[i][0]:self.pdbrecords[i][0] + record_sizes[i]])
|
data_blocks.append(self.data[self.pdbrecords[i][0]:self.pdbrecords[i][0] + record_sizes[i]])
|
||||||
|
|
||||||
# Rewrite the stream
|
# Rewrite the stream
|
||||||
@ -272,7 +272,7 @@ class MetadataUpdater:
|
|||||||
new_record0.write(b'\0'*(1024*8))
|
new_record0.write(b'\0'*(1024*8))
|
||||||
|
|
||||||
# Rebuild the stream, update the pdbrecords pointers
|
# Rebuild the stream, update the pdbrecords pointers
|
||||||
self.patchSection(0,new_record0.getvalue())
|
self.patchSection(0, new_record0.getvalue())
|
||||||
|
|
||||||
# Update record0
|
# Update record0
|
||||||
self.record0 = self.record(0)
|
self.record0 = self.record(0)
|
||||||
@ -283,7 +283,7 @@ class MetadataUpdater:
|
|||||||
N=0
|
N=0
|
||||||
result=''
|
result=''
|
||||||
while src:
|
while src:
|
||||||
s,src = src[:length],src[length:]
|
s, src = src[:length],src[length:]
|
||||||
hexa = ' '.join(['%02X'%ord(x) for x in s])
|
hexa = ' '.join(['%02X'%ord(x) for x in s])
|
||||||
s = s.translate(FILTER)
|
s = s.translate(FILTER)
|
||||||
result += '%04X %-*s %s\n' % (N, length*3, hexa, s)
|
result += '%04X %-*s %s\n' % (N, length*3, hexa, s)
|
||||||
|
@ -568,7 +568,7 @@ def dump_dict(cats):
|
|||||||
|
|
||||||
|
|
||||||
XPATH_NS = {
|
XPATH_NS = {
|
||||||
'dc': 'http://purl.org/dc/elements/1.1/',
|
'dc' : 'http://purl.org/dc/elements/1.1/',
|
||||||
'opf': 'http://www.idpf.org/2007/opf',
|
'opf': 'http://www.idpf.org/2007/opf',
|
||||||
're' : 'http://exslt.org/regular-expressions'
|
're' : 'http://exslt.org/regular-expressions'
|
||||||
}
|
}
|
||||||
|
@ -15,18 +15,18 @@ from calibre.ebooks.metadata.plucker import get_metadata as get_plucker
|
|||||||
from calibre.ebooks.pdb.header import PdbHeaderReader
|
from calibre.ebooks.pdb.header import PdbHeaderReader
|
||||||
|
|
||||||
MREADER = {
|
MREADER = {
|
||||||
'PNPdPPrs' : get_eReader,
|
'PNPdPPrs': get_eReader,
|
||||||
'PNRdPPrs' : get_eReader,
|
'PNRdPPrs': get_eReader,
|
||||||
'DataPlkr' : get_plucker,
|
'DataPlkr': get_plucker,
|
||||||
'BOOKMTIT' : get_Haodoo,
|
'BOOKMTIT': get_Haodoo,
|
||||||
'BOOKMTIU' : get_Haodoo,
|
'BOOKMTIU': get_Haodoo,
|
||||||
}
|
}
|
||||||
|
|
||||||
from calibre.ebooks.metadata.ereader import set_metadata as set_eReader
|
from calibre.ebooks.metadata.ereader import set_metadata as set_eReader
|
||||||
|
|
||||||
MWRITER = {
|
MWRITER = {
|
||||||
'PNPdPPrs' : set_eReader,
|
'PNPdPPrs': set_eReader,
|
||||||
'PNRdPPrs' : set_eReader,
|
'PNRdPPrs': set_eReader,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -158,7 +158,7 @@ class MetadataUpdater:
|
|||||||
N=0
|
N=0
|
||||||
result=''
|
result=''
|
||||||
while src:
|
while src:
|
||||||
s,src = src[:length],src[length:]
|
s, src = src[:length],src[length:]
|
||||||
hexa = ' '.join(['%02X'%ord(x) for x in s])
|
hexa = ' '.join(['%02X'%ord(x) for x in s])
|
||||||
s = s.translate(FILTER)
|
s = s.translate(FILTER)
|
||||||
result += '%04X %-*s %s\n' % (N, length*3, hexa, s)
|
result += '%04X %-*s %s\n' % (N, length*3, hexa, s)
|
||||||
@ -170,7 +170,7 @@ class MetadataUpdater:
|
|||||||
for tag in self.metadata:
|
for tag in self.metadata:
|
||||||
print(f'{tag}: {self.metadata[tag]!r}')
|
print(f'{tag}: {self.metadata[tag]!r}')
|
||||||
|
|
||||||
def encode_vwi(self,value):
|
def encode_vwi(self, value):
|
||||||
ans = []
|
ans = []
|
||||||
multi_byte = (value > 0x7f)
|
multi_byte = (value > 0x7f)
|
||||||
while value:
|
while value:
|
||||||
@ -333,7 +333,7 @@ class MetadataUpdater:
|
|||||||
self.original_md_len = original_md_len
|
self.original_md_len = original_md_len
|
||||||
return ths.getvalue().encode('iso-8859-1')
|
return ths.getvalue().encode('iso-8859-1')
|
||||||
|
|
||||||
def update(self,mi):
|
def update(self, mi):
|
||||||
# Collect the original metadata
|
# Collect the original metadata
|
||||||
self.get_original_metadata()
|
self.get_original_metadata()
|
||||||
|
|
||||||
|
@ -301,15 +301,15 @@ class MOBIHeader: # {{{
|
|||||||
2 : 'Mobipocket book',
|
2 : 'Mobipocket book',
|
||||||
3 : 'PalmDOC book',
|
3 : 'PalmDOC book',
|
||||||
4 : 'Audio',
|
4 : 'Audio',
|
||||||
257 : 'News',
|
257: 'News',
|
||||||
258 : 'News Feed',
|
258: 'News Feed',
|
||||||
259 : 'News magazine',
|
259: 'News magazine',
|
||||||
513 : 'PICS',
|
513: 'PICS',
|
||||||
514 : 'Word',
|
514: 'Word',
|
||||||
515 : 'XLS',
|
515: 'XLS',
|
||||||
516 : 'PPT',
|
516: 'PPT',
|
||||||
517 : 'TEXT',
|
517: 'TEXT',
|
||||||
518 : 'HTML',
|
518: 'HTML',
|
||||||
}.get(self.type_raw, repr(self.type_raw))
|
}.get(self.type_raw, repr(self.type_raw))
|
||||||
|
|
||||||
self.encoding_raw, = struct.unpack(b'>I', self.raw[28:32])
|
self.encoding_raw, = struct.unpack(b'>I', self.raw[28:32])
|
||||||
|
@ -242,12 +242,12 @@ class Tag: # {{{
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
TAG_MAP = {
|
TAG_MAP = {
|
||||||
1: ('offset', 'Offset in HTML'),
|
1 : ('offset', 'Offset in HTML'),
|
||||||
2: ('size', 'Size in HTML'),
|
2 : ('size', 'Size in HTML'),
|
||||||
3: ('label_offset', 'Label offset in CNCX'),
|
3 : ('label_offset', 'Label offset in CNCX'),
|
||||||
4: ('depth', 'Depth of this entry in TOC'),
|
4 : ('depth', 'Depth of this entry in TOC'),
|
||||||
5: ('class_offset', 'Class offset in CNCX'),
|
5 : ('class_offset', 'Class offset in CNCX'),
|
||||||
6: ('pos_fid', 'File Index'),
|
6 : ('pos_fid', 'File Index'),
|
||||||
|
|
||||||
11: ('secondary', '[unknown, unknown, '
|
11: ('secondary', '[unknown, unknown, '
|
||||||
'tag type from TAGX in primary index header]'),
|
'tag type from TAGX in primary index header]'),
|
||||||
@ -256,14 +256,14 @@ class Tag: # {{{
|
|||||||
22: ('first_child_index', 'First child'),
|
22: ('first_child_index', 'First child'),
|
||||||
23: ('last_child_index', 'Last child'),
|
23: ('last_child_index', 'Last child'),
|
||||||
|
|
||||||
69 : ('image_index', 'Offset from first image record to the'
|
69: ('image_index', 'Offset from first image record to the'
|
||||||
' image record associated with this entry'
|
' image record associated with this entry'
|
||||||
' (masthead for periodical or thumbnail for'
|
' (masthead for periodical or thumbnail for'
|
||||||
' article entry).'),
|
' article entry).'),
|
||||||
70 : ('desc_offset', 'Description offset in cncx'),
|
70: ('desc_offset', 'Description offset in cncx'),
|
||||||
71 : ('author_offset', 'Author offset in cncx'),
|
71: ('author_offset', 'Author offset in cncx'),
|
||||||
72 : ('image_caption_offset', 'Image caption offset in cncx'),
|
72: ('image_caption_offset', 'Image caption offset in cncx'),
|
||||||
73 : ('image_attr_offset', 'Image attribution offset in cncx'),
|
73: ('image_attr_offset', 'Image attribution offset in cncx'),
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -305,7 +305,7 @@ class MobiMLizer:
|
|||||||
inline = etree.SubElement(inline, XHTML('i'))
|
inline = etree.SubElement(inline, XHTML('i'))
|
||||||
if istate.bold:
|
if istate.bold:
|
||||||
inline = etree.SubElement(inline, XHTML('b'))
|
inline = etree.SubElement(inline, XHTML('b'))
|
||||||
if istate.bgcolor is not None and istate.bgcolor != 'transparent' :
|
if istate.bgcolor is not None and istate.bgcolor != 'transparent':
|
||||||
inline = etree.SubElement(inline, XHTML('span'),
|
inline = etree.SubElement(inline, XHTML('span'),
|
||||||
bgcolor=convert_color_for_font_tag(istate.bgcolor))
|
bgcolor=convert_color_for_font_tag(istate.bgcolor))
|
||||||
if istate.fgcolor != 'black':
|
if istate.fgcolor != 'black':
|
||||||
|
@ -45,7 +45,7 @@ class EXTHHeader: # {{{
|
|||||||
self.page_progression_direction = None
|
self.page_progression_direction = None
|
||||||
self.primary_writing_mode = None
|
self.primary_writing_mode = None
|
||||||
|
|
||||||
self.decode = lambda x : clean_ascii_chars(x.decode(codec, 'replace'))
|
self.decode = lambda x: clean_ascii_chars(x.decode(codec, 'replace'))
|
||||||
|
|
||||||
while left > 0:
|
while left > 0:
|
||||||
left -= 1
|
left -= 1
|
||||||
|
@ -24,10 +24,10 @@ tag_fieldname_map = {
|
|||||||
22: ['child1',0],
|
22: ['child1',0],
|
||||||
23: ['childn',0],
|
23: ['childn',0],
|
||||||
69: ['image_index',0],
|
69: ['image_index',0],
|
||||||
70 : ['desc_offset', 0], # 'Description offset in cncx'
|
70: ['desc_offset', 0], # 'Description offset in cncx'
|
||||||
71 : ['author_offset', 0], # 'Author offset in cncx'
|
71: ['author_offset', 0], # 'Author offset in cncx'
|
||||||
72 : ['image_caption_offset', 0], # 'Image caption offset in cncx',
|
72: ['image_caption_offset', 0], # 'Image caption offset in cncx',
|
||||||
73 : ['image_attr_offset', 0], # 'Image attribution offset in cncx',
|
73: ['image_attr_offset', 0], # 'Image attribution offset in cncx',
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -35,13 +35,13 @@ default_entry = {
|
|||||||
'pos': -1,
|
'pos': -1,
|
||||||
'len': 0,
|
'len': 0,
|
||||||
'noffs': -1,
|
'noffs': -1,
|
||||||
'text' : 'Unknown Text',
|
'text': 'Unknown Text',
|
||||||
'hlvl' : -1,
|
'hlvl': -1,
|
||||||
'kind' : 'Unknown Class',
|
'kind': 'Unknown Class',
|
||||||
'pos_fid' : None,
|
'pos_fid': None,
|
||||||
'parent' : -1,
|
'parent': -1,
|
||||||
'child1' : -1,
|
'child1': -1,
|
||||||
'childn' : -1,
|
'childn': -1,
|
||||||
'description': None,
|
'description': None,
|
||||||
'author': None,
|
'author': None,
|
||||||
'image_caption': None,
|
'image_caption': None,
|
||||||
|
@ -37,7 +37,7 @@ class TAGX: # {{{
|
|||||||
BITMASKS.update({x:(1 << i) for i, x in enumerate([1, 2, 3, 4, 5, 21, 22, 23])})
|
BITMASKS.update({x:(1 << i) for i, x in enumerate([1, 2, 3, 4, 5, 21, 22, 23])})
|
||||||
BITMASKS.update({x:(1 << i) for i, x in enumerate([69, 70, 71, 72, 73])})
|
BITMASKS.update({x:(1 << i) for i, x in enumerate([69, 70, 71, 72, 73])})
|
||||||
|
|
||||||
NUM_VALUES = defaultdict(lambda :1)
|
NUM_VALUES = defaultdict(lambda:1)
|
||||||
NUM_VALUES[11] = 3
|
NUM_VALUES[11] = 3
|
||||||
NUM_VALUES[0] = 0
|
NUM_VALUES[0] = 0
|
||||||
|
|
||||||
@ -64,7 +64,7 @@ class TAGX: # {{{
|
|||||||
'''
|
'''
|
||||||
TAGX block for the Primary index header of a periodical
|
TAGX block for the Primary index header of a periodical
|
||||||
'''
|
'''
|
||||||
for i in (1, 2, 3, 4, 5, 21, 22, 23, 0, 69, 70, 71, 72,73, 0):
|
for i in (1, 2, 3, 4, 5, 21, 22, 23, 0, 69, 70, 71, 72, 73, 0):
|
||||||
self.add_tag(i)
|
self.add_tag(i)
|
||||||
return self.header(2) + bytes(self.byts)
|
return self.header(2) + bytes(self.byts)
|
||||||
|
|
||||||
|
@ -163,7 +163,7 @@ class Serializer:
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
buf.write(b'<reference type="')
|
buf.write(b'<reference type="')
|
||||||
if ref.type.startswith('other.') :
|
if ref.type.startswith('other.'):
|
||||||
self.serialize_text(ref.type.replace('other.',''), quot=True)
|
self.serialize_text(ref.type.replace('other.',''), quot=True)
|
||||||
else:
|
else:
|
||||||
self.serialize_text(ref.type, quot=True)
|
self.serialize_text(ref.type, quot=True)
|
||||||
|
@ -73,7 +73,7 @@ def tostring(raw, **kwargs):
|
|||||||
ans = etree.tostring(raw, **kwargs)
|
ans = etree.tostring(raw, **kwargs)
|
||||||
if xml_declaration:
|
if xml_declaration:
|
||||||
ans = '<?xml version="1.0" encoding="%s"?>\n'%encoding + ans
|
ans = '<?xml version="1.0" encoding="%s"?>\n'%encoding + ans
|
||||||
return re.sub(r'&#x([0-9A-Fa-f]+);', lambda m:my_unichr(int(m.group(1), 16)),
|
return re.sub(r'&#x([0-9A-Fa-f]+);', lambda m: my_unichr(int(m.group(1), 16)),
|
||||||
ans).encode(encoding)
|
ans).encode(encoding)
|
||||||
|
|
||||||
|
|
||||||
|
@ -343,7 +343,7 @@ def test_normalization(return_tests=False): # {{{
|
|||||||
|
|
||||||
def test_edge_normalization(self):
|
def test_edge_normalization(self):
|
||||||
def edge_dict(prefix, expected):
|
def edge_dict(prefix, expected):
|
||||||
return {f'{prefix}-{edge}' : x for edge, x in zip(EDGES, expected)}
|
return {f'{prefix}-{edge}': x for edge, x in zip(EDGES, expected)}
|
||||||
for raw, expected in iteritems({
|
for raw, expected in iteritems({
|
||||||
'2px': ('2px', '2px', '2px', '2px'),
|
'2px': ('2px', '2px', '2px', '2px'),
|
||||||
'1em 2em': ('1em', '2em', '1em', '2em'),
|
'1em 2em': ('1em', '2em', '1em', '2em'),
|
||||||
@ -356,7 +356,7 @@ def test_normalization(return_tests=False): # {{{
|
|||||||
|
|
||||||
def test_list_style_normalization(self):
|
def test_list_style_normalization(self):
|
||||||
def ls_dict(expected):
|
def ls_dict(expected):
|
||||||
ans = {'list-style-%s' % x : DEFAULTS['list-style-%s' % x] for x in ('type', 'image', 'position')}
|
ans = {'list-style-%s' % x: DEFAULTS['list-style-%s' % x] for x in ('type', 'image', 'position')}
|
||||||
for k, v in iteritems(expected):
|
for k, v in iteritems(expected):
|
||||||
ans['list-style-%s' % k] = v
|
ans['list-style-%s' % k] = v
|
||||||
return ans
|
return ans
|
||||||
@ -385,17 +385,17 @@ def test_normalization(return_tests=False): # {{{
|
|||||||
|
|
||||||
def test_edge_condensation(self):
|
def test_edge_condensation(self):
|
||||||
for s, v in iteritems({
|
for s, v in iteritems({
|
||||||
(1, 1, 3) : None,
|
(1, 1, 3): None,
|
||||||
(1, 2, 3, 4) : '2pt 3pt 4pt 1pt',
|
(1, 2, 3, 4): '2pt 3pt 4pt 1pt',
|
||||||
(1, 2, 3, 2) : '2pt 3pt 2pt 1pt',
|
(1, 2, 3, 2): '2pt 3pt 2pt 1pt',
|
||||||
(1, 2, 1, 3) : '2pt 1pt 3pt',
|
(1, 2, 1, 3): '2pt 1pt 3pt',
|
||||||
(1, 2, 1, 2) : '2pt 1pt',
|
(1, 2, 1, 2): '2pt 1pt',
|
||||||
(1, 1, 1, 1) : '1pt',
|
(1, 1, 1, 1): '1pt',
|
||||||
('2%', '2%', '2%', '2%') : '2%',
|
('2%', '2%', '2%', '2%'): '2%',
|
||||||
tuple('0 0 0 0'.split()) : '0',
|
tuple('0 0 0 0'.split()): '0',
|
||||||
}):
|
}):
|
||||||
for prefix in ('margin', 'padding'):
|
for prefix in ('margin', 'padding'):
|
||||||
css = {f'{prefix}-{x}' : str(y)+'pt' if isinstance(y, numbers.Number) else y
|
css = {f'{prefix}-{x}': str(y)+'pt' if isinstance(y, numbers.Number) else y
|
||||||
for x, y in zip(('left', 'top', 'right', 'bottom'), s)}
|
for x, y in zip(('left', 'top', 'right', 'bottom'), s)}
|
||||||
css = '; '.join((f'{k}:{v}' for k, v in iteritems(css)))
|
css = '; '.join((f'{k}:{v}' for k, v in iteritems(css)))
|
||||||
style = parseStyle(css)
|
style = parseStyle(css)
|
||||||
|
@ -198,7 +198,7 @@ def parse_html(data, log=None, decoder=None, preprocessor=None,
|
|||||||
user_entities[match.group(1)] = val
|
user_entities[match.group(1)] = val
|
||||||
if user_entities:
|
if user_entities:
|
||||||
pat = re.compile(r'&(%s);'%('|'.join(list(user_entities.keys()))))
|
pat = re.compile(r'&(%s);'%('|'.join(list(user_entities.keys()))))
|
||||||
data = pat.sub(lambda m:user_entities[m.group(1)], data)
|
data = pat.sub(lambda m: user_entities[m.group(1)], data)
|
||||||
|
|
||||||
if preprocessor is not None:
|
if preprocessor is not None:
|
||||||
data = preprocessor(data)
|
data = preprocessor(data)
|
||||||
|
@ -105,7 +105,7 @@ class NamedEntities(BaseError):
|
|||||||
for name, mt in iteritems(container.mime_map):
|
for name, mt in iteritems(container.mime_map):
|
||||||
if mt in check_types:
|
if mt in check_types:
|
||||||
raw = container.raw_data(name)
|
raw = container.raw_data(name)
|
||||||
nraw = replace_pat.sub(lambda m:html5_entities[m.group(1)], raw)
|
nraw = replace_pat.sub(lambda m: html5_entities[m.group(1)], raw)
|
||||||
if raw != nraw:
|
if raw != nraw:
|
||||||
changed = True
|
changed = True
|
||||||
with container.open(name, 'wb') as f:
|
with container.open(name, 'wb') as f:
|
||||||
|
@ -57,7 +57,7 @@ def handle_private_entities(data):
|
|||||||
if user_entities:
|
if user_entities:
|
||||||
data = ('\n' * num_of_nl_in_pre) + data[idx:]
|
data = ('\n' * num_of_nl_in_pre) + data[idx:]
|
||||||
pat = re.compile(r'&(%s);'%('|'.join(user_entities.keys())))
|
pat = re.compile(r'&(%s);'%('|'.join(user_entities.keys())))
|
||||||
data = pat.sub(lambda m:user_entities[m.group(1)], data)
|
data = pat.sub(lambda m: user_entities[m.group(1)], data)
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
@ -270,7 +270,7 @@ def css_data(container, book_locale, result_data, *args):
|
|||||||
html_sheets[name].append(
|
html_sheets[name].append(
|
||||||
css_rules(name, parser.parse_stylesheet(force_unicode(style.text, 'utf-8')).rules, style.sourceline - 1))
|
css_rules(name, parser.parse_stylesheet(force_unicode(style.text, 'utf-8')).rules, style.sourceline - 1))
|
||||||
|
|
||||||
rule_map = defaultdict(lambda : defaultdict(list))
|
rule_map = defaultdict(lambda: defaultdict(list))
|
||||||
|
|
||||||
def rules_in_sheet(sheet):
|
def rules_in_sheet(sheet):
|
||||||
for rule in sheet:
|
for rule in sheet:
|
||||||
@ -323,11 +323,11 @@ def css_data(container, book_locale, result_data, *args):
|
|||||||
|
|
||||||
return (MatchLocation(tag_text(elem), elem.sourceline) for elem in matches)
|
return (MatchLocation(tag_text(elem), elem.sourceline) for elem in matches)
|
||||||
|
|
||||||
class_map = defaultdict(lambda : defaultdict(list))
|
class_map = defaultdict(lambda: defaultdict(list))
|
||||||
|
|
||||||
for name, inline_sheets in iteritems(html_sheets):
|
for name, inline_sheets in iteritems(html_sheets):
|
||||||
root = container.parsed(name)
|
root = container.parsed(name)
|
||||||
cmap = defaultdict(lambda : defaultdict(list))
|
cmap = defaultdict(lambda: defaultdict(list))
|
||||||
for elem in root.xpath('//*[@class]'):
|
for elem in root.xpath('//*[@class]'):
|
||||||
for cls in elem.get('class', '').split():
|
for cls in elem.get('class', '').split():
|
||||||
cmap[cls][elem] = []
|
cmap[cls][elem] = []
|
||||||
|
@ -93,7 +93,7 @@ class Structure(BaseTest):
|
|||||||
self.assertTrue(len(get_toc(c))) # detect NCX toc even in epub 3 files
|
self.assertTrue(len(get_toc(c))) # detect NCX toc even in epub 3 files
|
||||||
c.add_file('nav.html', b'<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">'
|
c.add_file('nav.html', b'<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">'
|
||||||
b'<body><nav epub:type="toc"><ol><li><a href="start.xhtml">EPUB 3 nav</a></li></ol></nav></body></html>',
|
b'<body><nav epub:type="toc"><ol><li><a href="start.xhtml">EPUB 3 nav</a></li></ol></nav></body></html>',
|
||||||
process_manifest_item=lambda item:item.set('properties', 'nav'))
|
process_manifest_item=lambda item: item.set('properties', 'nav'))
|
||||||
toc = get_toc(c)
|
toc = get_toc(c)
|
||||||
self.assertTrue(len(toc))
|
self.assertTrue(len(toc))
|
||||||
self.assertEqual(toc.as_dict['children'][0]['title'], 'EPUB 3 nav')
|
self.assertEqual(toc.as_dict['children'][0]['title'], 'EPUB 3 nav')
|
||||||
@ -133,7 +133,7 @@ class Structure(BaseTest):
|
|||||||
b'<body><nav epub:type="landmarks"><ol><li><a epub:type="x" href="../xxx.html#moo">XXX </a></li>'
|
b'<body><nav epub:type="landmarks"><ol><li><a epub:type="x" href="../xxx.html#moo">XXX </a></li>'
|
||||||
b'<li><a href="../a.html"> YYY </a></li>'
|
b'<li><a href="../a.html"> YYY </a></li>'
|
||||||
b'</ol></nav></body></html>',
|
b'</ol></nav></body></html>',
|
||||||
process_manifest_item=lambda item:item.set('properties', 'nav'))
|
process_manifest_item=lambda item: item.set('properties', 'nav'))
|
||||||
self.assertEqual([
|
self.assertEqual([
|
||||||
{'dest':'xxx.html', 'frag':'moo', 'type':'x', 'title':'XXX'}, {'dest':'a.html', 'frag':'', 'type':'', 'title':'YYY'}
|
{'dest':'xxx.html', 'frag':'moo', 'type':'x', 'title':'XXX'}, {'dest':'a.html', 'frag':'', 'type':'', 'title':'YYY'}
|
||||||
], get_landmarks(c))
|
], get_landmarks(c))
|
||||||
|
@ -696,9 +696,9 @@ class CSSFlattener:
|
|||||||
fsize = self.context.dest.fbase
|
fsize = self.context.dest.fbase
|
||||||
self.flatten_node(html, stylizer, names, styles, pseudo_styles, fsize, item.id, recurse=False)
|
self.flatten_node(html, stylizer, names, styles, pseudo_styles, fsize, item.id, recurse=False)
|
||||||
self.flatten_node(html.find(XHTML('body')), stylizer, names, styles, pseudo_styles, fsize, item.id)
|
self.flatten_node(html.find(XHTML('body')), stylizer, names, styles, pseudo_styles, fsize, item.id)
|
||||||
items = sorted(((key, val) for (val, key) in iteritems(styles)), key=lambda x:numeric_sort_key(x[0]))
|
items = sorted(((key, val) for (val, key) in iteritems(styles)), key=lambda x: numeric_sort_key(x[0]))
|
||||||
# :hover must come after link and :active must come after :hover
|
# :hover must come after link and :active must come after :hover
|
||||||
psels = sorted(pseudo_styles, key=lambda x :
|
psels = sorted(pseudo_styles, key=lambda x:
|
||||||
{'hover':1, 'active':2}.get(x, 0))
|
{'hover':1, 'active':2}.get(x, 0))
|
||||||
for psel in psels:
|
for psel in psels:
|
||||||
styles = pseudo_styles[psel]
|
styles = pseudo_styles[psel]
|
||||||
|
@ -24,17 +24,17 @@ def meta_info_to_oeb_metadata(mi, m, log, override_input_metadata=False):
|
|||||||
m.clear('title_sort')
|
m.clear('title_sort')
|
||||||
m.add('title_sort', mi.title_sort)
|
m.add('title_sort', mi.title_sort)
|
||||||
if not mi.is_null('authors'):
|
if not mi.is_null('authors'):
|
||||||
m.filter('creator', lambda x : x.role.lower() in ['aut', ''])
|
m.filter('creator', lambda x: x.role.lower() in ['aut', ''])
|
||||||
for a in mi.authors:
|
for a in mi.authors:
|
||||||
attrib = {'role':'aut'}
|
attrib = {'role':'aut'}
|
||||||
if mi.author_sort:
|
if mi.author_sort:
|
||||||
attrib[OPF('file-as')] = mi.author_sort
|
attrib[OPF('file-as')] = mi.author_sort
|
||||||
m.add('creator', a, attrib=attrib)
|
m.add('creator', a, attrib=attrib)
|
||||||
if not mi.is_null('book_producer'):
|
if not mi.is_null('book_producer'):
|
||||||
m.filter('contributor', lambda x : x.role.lower() == 'bkp')
|
m.filter('contributor', lambda x: x.role.lower() == 'bkp')
|
||||||
m.add('contributor', mi.book_producer, role='bkp')
|
m.add('contributor', mi.book_producer, role='bkp')
|
||||||
elif override_input_metadata:
|
elif override_input_metadata:
|
||||||
m.filter('contributor', lambda x : x.role.lower() == 'bkp')
|
m.filter('contributor', lambda x: x.role.lower() == 'bkp')
|
||||||
if not mi.is_null('comments'):
|
if not mi.is_null('comments'):
|
||||||
m.clear('description')
|
m.clear('description')
|
||||||
m.add('description', mi.comments)
|
m.add('description', mi.comments)
|
||||||
|
@ -131,7 +131,7 @@ class Split:
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
page_breaks = list(page_breaks)
|
page_breaks = list(page_breaks)
|
||||||
page_breaks.sort(key=lambda x:int(x.get('pb_order')))
|
page_breaks.sort(key=lambda x: int(x.get('pb_order')))
|
||||||
page_break_ids, page_breaks_ = [], []
|
page_break_ids, page_breaks_ = [], []
|
||||||
for i, x in enumerate(page_breaks):
|
for i, x in enumerate(page_breaks):
|
||||||
x.set('id', x.get('id', 'calibre_pb_%d'%i))
|
x.set('id', x.get('id', 'calibre_pb_%d'%i))
|
||||||
@ -426,7 +426,7 @@ class FlowSplitter:
|
|||||||
'''
|
'''
|
||||||
if not self.was_split:
|
if not self.was_split:
|
||||||
return
|
return
|
||||||
self.anchor_map = collections.defaultdict(lambda :self.base%0)
|
self.anchor_map = collections.defaultdict(lambda: self.base%0)
|
||||||
self.files = []
|
self.files = []
|
||||||
|
|
||||||
for i, tree in enumerate(self.trees):
|
for i, tree in enumerate(self.trees):
|
||||||
|
@ -170,7 +170,7 @@ class Text(Element):
|
|||||||
('top', 'left', 'width', 'height'))))
|
('top', 'left', 'width', 'height'))))
|
||||||
# This does nothing, as expected,
|
# This does nothing, as expected,
|
||||||
# but somewhere left (at least) is changed sometimes to not .0
|
# but somewhere left (at least) is changed sometimes to not .0
|
||||||
if self.left != round(self.left) :
|
if self.left != round(self.left):
|
||||||
self.left = round(self.left)
|
self.left = round(self.left)
|
||||||
self.bottom = self.top + self.height
|
self.bottom = self.top + self.height
|
||||||
self.right = self.left + self.width
|
self.right = self.left + self.width
|
||||||
@ -274,7 +274,7 @@ class Text(Element):
|
|||||||
and False \
|
and False \
|
||||||
and self.font.id == other.font.id \
|
and self.font.id == other.font.id \
|
||||||
and re.match(r'<span style="font-size:', self.raw) is not None \
|
and re.match(r'<span style="font-size:', self.raw) is not None \
|
||||||
and re.match(r'<span style="font-size:', other.raw) is not None :
|
and re.match(r'<span style="font-size:', other.raw) is not None:
|
||||||
# We have the same class, so merge
|
# We have the same class, so merge
|
||||||
m_self = re.match(r'^(.+)</span>$', self.raw)
|
m_self = re.match(r'^(.+)</span>$', self.raw)
|
||||||
m_other = re.match(r'^<span style="font-size:.+em">(.+</span>)$', other.raw)
|
m_other = re.match(r'^<span style="font-size:.+em">(.+</span>)$', other.raw)
|
||||||
@ -282,12 +282,12 @@ class Text(Element):
|
|||||||
self.raw = m_self.group(1)
|
self.raw = m_self.group(1)
|
||||||
other.raw = m_other.group(1)
|
other.raw = m_other.group(1)
|
||||||
elif self.font_size_em != other.font_size_em \
|
elif self.font_size_em != other.font_size_em \
|
||||||
and self.font_size_em != 1.00 :
|
and self.font_size_em != 1.00:
|
||||||
if re.match(r'<span', self.raw) is None :
|
if re.match(r'<span', self.raw) is None:
|
||||||
self.raw = '<span style="font-size:%sem">%s</span>'%(str(self.font_size_em),self.raw)
|
self.raw = '<span style="font-size:%sem">%s</span>'%(str(self.font_size_em),self.raw)
|
||||||
# Try to allow for a very large initial character
|
# Try to allow for a very large initial character
|
||||||
elif len(self.text_as_string) <= 2 \
|
elif len(self.text_as_string) <= 2 \
|
||||||
and self.font_size_em >= other.font_size_em * 2.0 :
|
and self.font_size_em >= other.font_size_em * 2.0:
|
||||||
# Insert 'float: left' etc. into current font info
|
# Insert 'float: left' etc. into current font info
|
||||||
# Unfortunately, processing to generate the .epub file changes things.
|
# Unfortunately, processing to generate the .epub file changes things.
|
||||||
# The line height gets set to the same as other parts of the file
|
# The line height gets set to the same as other parts of the file
|
||||||
@ -351,10 +351,10 @@ class Text(Element):
|
|||||||
# Are there problems if self.raw does not end </a>?
|
# Are there problems if self.raw does not end </a>?
|
||||||
# Note that the 2 parts could have different font sizes
|
# Note that the 2 parts could have different font sizes
|
||||||
matchObj = re.match(r'^([^<]*)(<span[^>]*>)*(<a href[^>]+>)(.*)</a>(</span>)*(\s*)$', self.raw)
|
matchObj = re.match(r'^([^<]*)(<span[^>]*>)*(<a href[^>]+>)(.*)</a>(</span>)*(\s*)$', self.raw)
|
||||||
if matchObj is not None :
|
if matchObj is not None:
|
||||||
otherObj = re.match(r'^([^<]*)(<span[^>]*>)*(<a href[^>]+>)(.*)(</a>)(</span>)*(.*)$', other.raw)
|
otherObj = re.match(r'^([^<]*)(<span[^>]*>)*(<a href[^>]+>)(.*)(</a>)(</span>)*(.*)$', other.raw)
|
||||||
# There is another href, but is it for the same place?
|
# There is another href, but is it for the same place?
|
||||||
if otherObj is not None and matchObj.group(3) == otherObj.group(3) :
|
if otherObj is not None and matchObj.group(3) == otherObj.group(3):
|
||||||
m2 = matchObj.group(2)
|
m2 = matchObj.group(2)
|
||||||
if m2 is None:
|
if m2 is None:
|
||||||
m2 = ''
|
m2 = ''
|
||||||
@ -740,15 +740,15 @@ class Page:
|
|||||||
if (frst.top <= secnd.top and frst.bottom >= secnd.bottom-BOTTOM_FACTOR) \
|
if (frst.top <= secnd.top and frst.bottom >= secnd.bottom-BOTTOM_FACTOR) \
|
||||||
or (secnd.top <= frst.top and secnd.bottom >= frst.bottom-BOTTOM_FACTOR):
|
or (secnd.top <= frst.top and secnd.bottom >= frst.bottom-BOTTOM_FACTOR):
|
||||||
# Overlap = same line
|
# Overlap = same line
|
||||||
if frst.left < secnd.left :
|
if frst.left < secnd.left:
|
||||||
return -1
|
return -1
|
||||||
elif frst.left == secnd.left :
|
elif frst.left == secnd.left:
|
||||||
return 0
|
return 0
|
||||||
return 1
|
return 1
|
||||||
# Different line so sort into line number
|
# Different line so sort into line number
|
||||||
if frst.bottom < secnd.bottom :
|
if frst.bottom < secnd.bottom:
|
||||||
return -1
|
return -1
|
||||||
elif frst.bottom == secnd.bottom :
|
elif frst.bottom == secnd.bottom:
|
||||||
return 0
|
return 0
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
@ -862,7 +862,7 @@ class Page:
|
|||||||
# Approximate the line spacing for checking overlapped lines
|
# Approximate the line spacing for checking overlapped lines
|
||||||
line_height = frag.bottom - frag.top
|
line_height = frag.bottom - frag.top
|
||||||
for t in self.texts:
|
for t in self.texts:
|
||||||
if t is not frag :
|
if t is not frag:
|
||||||
# Do the parts of a line overlap?
|
# Do the parts of a line overlap?
|
||||||
# Some files can have separate lines overlapping slightly
|
# Some files can have separate lines overlapping slightly
|
||||||
# BOTTOM_FACTOR allows for this
|
# BOTTOM_FACTOR allows for this
|
||||||
@ -1100,7 +1100,7 @@ class Page:
|
|||||||
for i in range(LINE_SCAN_COUNT):
|
for i in range(LINE_SCAN_COUNT):
|
||||||
if len(self.texts) < 1:
|
if len(self.texts) < 1:
|
||||||
break
|
break
|
||||||
if re.match(opts.pdf_header_regex, self.texts[0].text_as_string) is not None :
|
if re.match(opts.pdf_header_regex, self.texts[0].text_as_string) is not None:
|
||||||
# There could be fragments which are spread out, so join_fragments has not coalesced them
|
# There could be fragments which are spread out, so join_fragments has not coalesced them
|
||||||
# Not sure that this would work as it relies on the first fragment matching regex
|
# Not sure that this would work as it relies on the first fragment matching regex
|
||||||
t = self.texts[0]
|
t = self.texts[0]
|
||||||
@ -1116,7 +1116,7 @@ class Page:
|
|||||||
for i in range(LINE_SCAN_COUNT):
|
for i in range(LINE_SCAN_COUNT):
|
||||||
if len(self.texts) < 1:
|
if len(self.texts) < 1:
|
||||||
break
|
break
|
||||||
if re.match(opts.pdf_footer_regex, self.texts[-1].text_as_string) is not None :
|
if re.match(opts.pdf_footer_regex, self.texts[-1].text_as_string) is not None:
|
||||||
# There could be fragments which are spread out, so join_fragments has not coalesced them
|
# There could be fragments which are spread out, so join_fragments has not coalesced them
|
||||||
t = self.texts[-1]
|
t = self.texts[-1]
|
||||||
# match = self.find_match(t)
|
# match = self.find_match(t)
|
||||||
@ -1154,10 +1154,10 @@ class Page:
|
|||||||
for text in self.texts:
|
for text in self.texts:
|
||||||
top = text.top
|
top = text.top
|
||||||
left = text.left
|
left = text.left
|
||||||
if round(left) != left :
|
if round(left) != left:
|
||||||
text.left = left = round(left)
|
text.left = left = round(left)
|
||||||
right = text.right
|
right = text.right
|
||||||
if round(right) != right :
|
if round(right) != right:
|
||||||
text.right = right = round(right)
|
text.right = right = round(right)
|
||||||
if first:
|
if first:
|
||||||
tops[top] = tops.get(top, 0) + 1
|
tops[top] = tops.get(top, 0) + 1
|
||||||
@ -1969,7 +1969,7 @@ class PDFDocument:
|
|||||||
merged_len = 0 # No merge
|
merged_len = 0 # No merge
|
||||||
# Allow where the last line ends with or next line starts with lower case.
|
# Allow where the last line ends with or next line starts with lower case.
|
||||||
if re.match(r'.*[a-z,-]\s*$', last_line.text_as_string) is not None \
|
if re.match(r'.*[a-z,-]\s*$', last_line.text_as_string) is not None \
|
||||||
or re.match(r'^\s*[a-z,-]', merged_text.text_as_string) is not None :
|
or re.match(r'^\s*[a-z,-]', merged_text.text_as_string) is not None:
|
||||||
merged_len = merged_text.right
|
merged_len = merged_text.right
|
||||||
|
|
||||||
# To use merged_len etc.
|
# To use merged_len etc.
|
||||||
@ -1984,7 +1984,7 @@ class PDFDocument:
|
|||||||
merge_done = True
|
merge_done = True
|
||||||
# We don't want to merge partial pages
|
# We don't want to merge partial pages
|
||||||
# i.e. if this is the last line, preserve its top/bottom till after merge
|
# i.e. if this is the last line, preserve its top/bottom till after merge
|
||||||
if len(page.texts) == 1 :
|
if len(page.texts) == 1:
|
||||||
save_bottom = merged_text.bottom
|
save_bottom = merged_text.bottom
|
||||||
else:
|
else:
|
||||||
save_bottom = 0.0
|
save_bottom = 0.0
|
||||||
|
@ -205,7 +205,7 @@ class Font:
|
|||||||
widths = {g:w for g, w in iteritems(widths) if w != most_common}
|
widths = {g:w for g, w in iteritems(widths) if w != most_common}
|
||||||
|
|
||||||
groups = Array()
|
groups = Array()
|
||||||
for k, g in groupby(enumerate(widths), lambda i_x:i_x[0]-i_x[1]):
|
for k, g in groupby(enumerate(widths), lambda i_x: i_x[0]-i_x[1]):
|
||||||
group = list(map(itemgetter(1), g))
|
group = list(map(itemgetter(1), g))
|
||||||
gwidths = [widths[g] for g in group]
|
gwidths = [widths[g] for g in group]
|
||||||
if len(set(gwidths)) == 1 and len(group) > 1:
|
if len(set(gwidths)) == 1 and len(group) > 1:
|
||||||
|
@ -260,14 +260,14 @@ class PDFStream:
|
|||||||
|
|
||||||
PATH_OPS = {
|
PATH_OPS = {
|
||||||
# stroke fill fill-rule
|
# stroke fill fill-rule
|
||||||
(False, False, 'winding') : 'n',
|
(False, False, 'winding'): 'n',
|
||||||
(False, False, 'evenodd') : 'n',
|
(False, False, 'evenodd'): 'n',
|
||||||
(False, True, 'winding') : 'f',
|
(False, True, 'winding'): 'f',
|
||||||
(False, True, 'evenodd') : 'f*',
|
(False, True, 'evenodd'): 'f*',
|
||||||
(True, False, 'winding') : 'S',
|
(True, False, 'winding'): 'S',
|
||||||
(True, False, 'evenodd') : 'S',
|
(True, False, 'evenodd'): 'S',
|
||||||
(True, True, 'winding') : 'B',
|
(True, True, 'winding'): 'B',
|
||||||
(True, True, 'evenodd') : 'B*',
|
(True, True, 'evenodd'): 'B*',
|
||||||
}
|
}
|
||||||
|
|
||||||
def __init__(self, stream, page_size, compress=False, mark_links=False,
|
def __init__(self, stream, page_size, compress=False, mark_links=False,
|
||||||
|
@ -34,10 +34,10 @@ TAG_MAP = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
STYLES = [
|
STYLES = [
|
||||||
('font-weight', {'bold' : 'B', 'bolder' : 'B'}),
|
('font-weight', {'bold': 'B', 'bolder': 'B'}),
|
||||||
('font-style', {'italic' : 'i'}),
|
('font-style', {'italic': 'i'}),
|
||||||
('text-decoration', {'underline' : 'u'}),
|
('text-decoration', {'underline': 'u'}),
|
||||||
('text-align', {'right' : 'r', 'center' : 'c'}),
|
('text-align', {'right': 'r', 'center': 'c'}),
|
||||||
]
|
]
|
||||||
|
|
||||||
BLOCK_TAGS = [
|
BLOCK_TAGS = [
|
||||||
|
@ -455,7 +455,7 @@ class Document:
|
|||||||
if j == x:
|
if j == x:
|
||||||
break
|
break
|
||||||
# self.debug(str(siblings))
|
# self.debug(str(siblings))
|
||||||
if siblings and sum(siblings) > 1000 :
|
if siblings and sum(siblings) > 1000:
|
||||||
to_remove = False
|
to_remove = False
|
||||||
self.debug('Allowing %s' % describe(el))
|
self.debug('Allowing %s' % describe(el))
|
||||||
for desnode in self.tags(el, 'table', 'ul', 'div'):
|
for desnode in self.tags(el, 'table', 'ul', 'div'):
|
||||||
|
@ -321,7 +321,7 @@ class RtfTokenizer:
|
|||||||
break
|
break
|
||||||
l = l + 1
|
l = l + 1
|
||||||
i = i + 1
|
i = i + 1
|
||||||
if l > 10 :
|
if l > 10:
|
||||||
raise Exception('Error (at:%d): Too many digits in control word numeric argument.'%[tokenStart])
|
raise Exception('Error (at:%d): Too many digits in control word numeric argument.'%[tokenStart])
|
||||||
|
|
||||||
if not consumed:
|
if not consumed:
|
||||||
|
@ -63,7 +63,7 @@ Here is an example script using the ParseRTF module directly
|
|||||||
|
|
||||||
def Handle_Main():
|
def Handle_Main():
|
||||||
# Handles options and creates a parse object
|
# Handles options and creates a parse object
|
||||||
parse_obj =ParseRtf.ParseRtf(
|
parse_obj = ParseRtf.ParseRtf(
|
||||||
in_file = 'in.rtf',
|
in_file = 'in.rtf',
|
||||||
# All values from here on are optional
|
# All values from here on are optional
|
||||||
# determine the output file
|
# determine the output file
|
||||||
@ -201,7 +201,7 @@ class ParseRtf:
|
|||||||
|
|
||||||
def __check_dir(self, the_dir):
|
def __check_dir(self, the_dir):
|
||||||
'''Check to see if directory exists'''
|
'''Check to see if directory exists'''
|
||||||
if not the_dir :
|
if not the_dir:
|
||||||
return
|
return
|
||||||
dir_exists = os.path.isdir(the_dir)
|
dir_exists = os.path.isdir(the_dir)
|
||||||
if not dir_exists:
|
if not dir_exists:
|
||||||
@ -590,7 +590,7 @@ class ParseRtf:
|
|||||||
if int(num) > self.__exit_level:
|
if int(num) > self.__exit_level:
|
||||||
self.__exit_level = num
|
self.__exit_level = num
|
||||||
|
|
||||||
def __make_temp_file(self,file):
|
def __make_temp_file(self, file):
|
||||||
'''Make a temporary file to parse'''
|
'''Make a temporary file to parse'''
|
||||||
write_file='rtf_write_file'
|
write_file='rtf_write_file'
|
||||||
read_obj = file if hasattr(file, 'read') else open_for_read(file)
|
read_obj = file if hasattr(file, 'read') else open_for_read(file)
|
||||||
|
@ -53,37 +53,37 @@ class AddBrackets:
|
|||||||
self.__state_dict = {
|
self.__state_dict = {
|
||||||
'before_body' : self.__before_body_func,
|
'before_body' : self.__before_body_func,
|
||||||
'in_body' : self.__in_body_func,
|
'in_body' : self.__in_body_func,
|
||||||
'after_control_word' : self.__after_control_word_func,
|
'after_control_word': self.__after_control_word_func,
|
||||||
'in_ignore' : self.__ignore_func,
|
'in_ignore' : self.__ignore_func,
|
||||||
}
|
}
|
||||||
self.__accept = [
|
self.__accept = [
|
||||||
'cw<ci<bold______' ,
|
'cw<ci<bold______',
|
||||||
'cw<ci<annotation' ,
|
'cw<ci<annotation',
|
||||||
'cw<ci<blue______' ,
|
'cw<ci<blue______',
|
||||||
# 'cw<ci<bold______' ,
|
# 'cw<ci<bold______',
|
||||||
'cw<ci<caps______' ,
|
'cw<ci<caps______',
|
||||||
'cw<ci<char-style' ,
|
'cw<ci<char-style',
|
||||||
'cw<ci<dbl-strike' ,
|
'cw<ci<dbl-strike',
|
||||||
'cw<ci<emboss____' ,
|
'cw<ci<emboss____',
|
||||||
'cw<ci<engrave___' ,
|
'cw<ci<engrave___',
|
||||||
'cw<ci<font-color' ,
|
'cw<ci<font-color',
|
||||||
'cw<ci<font-down_' ,
|
'cw<ci<font-down_',
|
||||||
'cw<ci<font-size_' ,
|
'cw<ci<font-size_',
|
||||||
'cw<ci<font-style' ,
|
'cw<ci<font-style',
|
||||||
'cw<ci<font-up___' ,
|
'cw<ci<font-up___',
|
||||||
'cw<ci<footnot-mk' ,
|
'cw<ci<footnot-mk',
|
||||||
'cw<ci<green_____' ,
|
'cw<ci<green_____',
|
||||||
'cw<ci<hidden____' ,
|
'cw<ci<hidden____',
|
||||||
'cw<ci<italics___' ,
|
'cw<ci<italics___',
|
||||||
'cw<ci<outline___' ,
|
'cw<ci<outline___',
|
||||||
'cw<ci<red_______' ,
|
'cw<ci<red_______',
|
||||||
'cw<ci<shadow____' ,
|
'cw<ci<shadow____',
|
||||||
'cw<ci<small-caps' ,
|
'cw<ci<small-caps',
|
||||||
'cw<ci<strike-thr' ,
|
'cw<ci<strike-thr',
|
||||||
'cw<ci<subscript_' ,
|
'cw<ci<subscript_',
|
||||||
'cw<ci<superscrip' ,
|
'cw<ci<superscrip',
|
||||||
'cw<ci<underlined' ,
|
'cw<ci<underlined',
|
||||||
# 'cw<ul<underlined' ,
|
# 'cw<ul<underlined',
|
||||||
]
|
]
|
||||||
|
|
||||||
def __initiate_values(self):
|
def __initiate_values(self):
|
||||||
@ -180,7 +180,7 @@ class AddBrackets:
|
|||||||
What is the interest as it is build to accept only accepted cw
|
What is the interest as it is build to accept only accepted cw
|
||||||
in __after_control_word_func?
|
in __after_control_word_func?
|
||||||
'''
|
'''
|
||||||
self.__inline = {line[:16] : line[20:-1]
|
self.__inline = {line[:16]: line[20:-1]
|
||||||
for line in self.__temp_group\
|
for line in self.__temp_group\
|
||||||
# Is this really necessary?
|
# Is this really necessary?
|
||||||
if line[:16] in self.__accept}
|
if line[:16] in self.__accept}
|
||||||
|
@ -21,57 +21,57 @@ class BorderParse:
|
|||||||
def __init__(self):
|
def __init__(self):
|
||||||
# cw<bd<bor-t-r-hi<nu<true
|
# cw<bd<bor-t-r-hi<nu<true
|
||||||
self.__border_dict = {
|
self.__border_dict = {
|
||||||
'bor-t-r-hi' : 'border-table-row-horizontal-inside',
|
'bor-t-r-hi': 'border-table-row-horizontal-inside',
|
||||||
'bor-t-r-vi' : 'border-table-row-vertical-inside',
|
'bor-t-r-vi': 'border-table-row-vertical-inside',
|
||||||
'bor-t-r-to' : 'border-table-row-top',
|
'bor-t-r-to': 'border-table-row-top',
|
||||||
'bor-t-r-le' : 'border-table-row-left',
|
'bor-t-r-le': 'border-table-row-left',
|
||||||
'bor-t-r-bo' : 'border-table-row-bottom',
|
'bor-t-r-bo': 'border-table-row-bottom',
|
||||||
'bor-t-r-ri' : 'border-table-row-right',
|
'bor-t-r-ri': 'border-table-row-right',
|
||||||
'bor-cel-bo' : 'border-cell-bottom',
|
'bor-cel-bo': 'border-cell-bottom',
|
||||||
'bor-cel-to' : 'border-cell-top',
|
'bor-cel-to': 'border-cell-top',
|
||||||
'bor-cel-le' : 'border-cell-left',
|
'bor-cel-le': 'border-cell-left',
|
||||||
'bor-cel-ri' : 'border-cell-right',
|
'bor-cel-ri': 'border-cell-right',
|
||||||
'bor-par-bo' : 'border-paragraph-bottom',
|
'bor-par-bo': 'border-paragraph-bottom',
|
||||||
'bor-par-to' : 'border-paragraph-top',
|
'bor-par-to': 'border-paragraph-top',
|
||||||
'bor-par-le' : 'border-paragraph-left',
|
'bor-par-le': 'border-paragraph-left',
|
||||||
'bor-par-ri' : 'border-paragraph-right',
|
'bor-par-ri': 'border-paragraph-right',
|
||||||
'bor-par-bx' : 'border-paragraph-box',
|
'bor-par-bx': 'border-paragraph-box',
|
||||||
'bor-for-ev' : 'border-for-every-paragraph',
|
'bor-for-ev': 'border-for-every-paragraph',
|
||||||
'bor-outsid' : 'border-outside',
|
'bor-outsid': 'border-outside',
|
||||||
'bor-none__' : 'border',
|
'bor-none__': 'border',
|
||||||
# border type => bt
|
# border type => bt
|
||||||
'bdr-li-wid' : 'line-width',
|
'bdr-li-wid': 'line-width',
|
||||||
'bdr-sp-wid' : 'padding',
|
'bdr-sp-wid': 'padding',
|
||||||
'bdr-color_' : 'color',
|
'bdr-color_': 'color',
|
||||||
}
|
}
|
||||||
self.__border_style_dict = {
|
self.__border_style_dict = {
|
||||||
'bdr-single' : 'single',
|
'bdr-single': 'single',
|
||||||
'bdr-doubtb' : 'double-thickness-border',
|
'bdr-doubtb': 'double-thickness-border',
|
||||||
'bdr-shadow' : 'shadowed-border',
|
'bdr-shadow': 'shadowed-border',
|
||||||
'bdr-double' : 'double-border',
|
'bdr-double': 'double-border',
|
||||||
'bdr-dotted' : 'dotted-border',
|
'bdr-dotted': 'dotted-border',
|
||||||
'bdr-dashed' : 'dashed',
|
'bdr-dashed': 'dashed',
|
||||||
'bdr-hair__' : 'hairline',
|
'bdr-hair__': 'hairline',
|
||||||
'bdr-inset_' : 'inset',
|
'bdr-inset_': 'inset',
|
||||||
'bdr-das-sm' : 'dash-small',
|
'bdr-das-sm': 'dash-small',
|
||||||
'bdr-dot-sm' : 'dot-dash',
|
'bdr-dot-sm': 'dot-dash',
|
||||||
'bdr-dot-do' : 'dot-dot-dash',
|
'bdr-dot-do': 'dot-dot-dash',
|
||||||
'bdr-outset' : 'outset',
|
'bdr-outset': 'outset',
|
||||||
'bdr-trippl' : 'tripple',
|
'bdr-trippl': 'tripple',
|
||||||
'bdr-thsm__' : 'thick-thin-small',
|
'bdr-thsm__': 'thick-thin-small',
|
||||||
'bdr-htsm__' : 'thin-thick-small',
|
'bdr-htsm__': 'thin-thick-small',
|
||||||
'bdr-hthsm_' : 'thin-thick-thin-small',
|
'bdr-hthsm_': 'thin-thick-thin-small',
|
||||||
'bdr-thm___' : 'thick-thin-medium',
|
'bdr-thm___': 'thick-thin-medium',
|
||||||
'bdr-htm___' : 'thin-thick-medium',
|
'bdr-htm___': 'thin-thick-medium',
|
||||||
'bdr-hthm__' : 'thin-thick-thin-medium',
|
'bdr-hthm__': 'thin-thick-thin-medium',
|
||||||
'bdr-thl___' : 'thick-thin-large',
|
'bdr-thl___': 'thick-thin-large',
|
||||||
'bdr-hthl__' : 'thin-thick-thin-large',
|
'bdr-hthl__': 'thin-thick-thin-large',
|
||||||
'bdr-wavy__' : 'wavy',
|
'bdr-wavy__': 'wavy',
|
||||||
'bdr-d-wav_' : 'double-wavy',
|
'bdr-d-wav_': 'double-wavy',
|
||||||
'bdr-strip_' : 'striped',
|
'bdr-strip_': 'striped',
|
||||||
'bdr-embos_' : 'emboss',
|
'bdr-embos_': 'emboss',
|
||||||
'bdr-engra_' : 'engrave',
|
'bdr-engra_': 'engrave',
|
||||||
'bdr-frame_' : 'frame',
|
'bdr-frame_': 'frame',
|
||||||
}
|
}
|
||||||
|
|
||||||
def parse_border(self, line):
|
def parse_border(self, line):
|
||||||
|
@ -22,7 +22,7 @@ class CombineBorders:
|
|||||||
'''Combine borders in RTF tokens to make later processing easier'''
|
'''Combine borders in RTF tokens to make later processing easier'''
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
in_file ,
|
in_file,
|
||||||
bug_handler,
|
bug_handler,
|
||||||
copy=None,
|
copy=None,
|
||||||
run_level=1,
|
run_level=1,
|
||||||
|
@ -41,7 +41,7 @@ class Copy:
|
|||||||
'''Remove files from directory'''
|
'''Remove files from directory'''
|
||||||
list_of_files = os.listdir(the_dir)
|
list_of_files = os.listdir(the_dir)
|
||||||
for file in list_of_files:
|
for file in list_of_files:
|
||||||
rem_file = os.path.join(Copy.__dir,file)
|
rem_file = os.path.join(Copy.__dir, file)
|
||||||
if os.path.isdir(rem_file):
|
if os.path.isdir(rem_file):
|
||||||
self.__remove_the_files(rem_file)
|
self.__remove_the_files(rem_file)
|
||||||
else:
|
else:
|
||||||
@ -56,7 +56,7 @@ class Copy:
|
|||||||
If the platform is linux, use the faster linux command
|
If the platform is linux, use the faster linux command
|
||||||
of cp. Otherwise, use a safe python method.
|
of cp. Otherwise, use a safe python method.
|
||||||
'''
|
'''
|
||||||
write_file = os.path.join(Copy.__dir,new_file)
|
write_file = os.path.join(Copy.__dir, new_file)
|
||||||
shutil.copyfile(file, write_file)
|
shutil.copyfile(file, write_file)
|
||||||
|
|
||||||
def rename(self, source, dest):
|
def rename(self, source, dest):
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user