Merge from trunk

This commit is contained in:
Charles Haley 2012-05-17 06:26:35 +02:00
commit 99c57ac10d
201 changed files with 62463 additions and 36984 deletions

View File

@ -16,7 +16,6 @@ resources/ebook-convert-complete.pickle
resources/builtin_recipes.xml
resources/builtin_recipes.zip
resources/template-functions.json
resources/display/*.js
setup/installer/windows/calibre/build.log
src/calibre/translations/.errors
src/cssutils/.svn/

View File

@ -19,6 +19,67 @@
# new recipes:
# - title:
- version: 0.8.51
date: 2012-05-11
new features:
- title: "When switching libraries preserve the position and selected books if you switch back to a previously opened library."
tickets: [994514]
- title: "Conversion pipeline: Filter out the useless font-face rules inserted by Microsoft Word for every font on the system"
- title: "Driver for Motorola XT875 and Pandigital SuperNova"
tickets: [996890]
- title: "Add a colour swatch the the dialog for creating column coloring rules, to ease selection of colors"
tickets: [994811]
- title: "EPUB Output: Consolidate internal CSS generated by calibre into external stylesheets for ease of editing the EPUB"
- title: "List EPUB and MOBI at the top of the dropdown list fo formats to convert to, as they are the most common choices"
tickets: [994838]
bug fixes:
- title: "E-book viewer: Improve performance when switching between normal and fullscreen views."
tickets: [996102]
- title: "Edit metadata dialog: When running download metadata do not insert duplicate tags into the list of tags"
- title: "KF8 Input: Do not error out if the file has a few invalidly encoded bytes."
tickets: [997034]
- title: "Fix download of news in AZW3 format not working"
tickets: [996439]
- title: "Pocketbook driver: Update for new PB 611 firmware."
tickets: [903079]
- title: "ebook-convert: Error out if the user prvides extra command line args instead of silently ignoring them"
tickets: [994939]
- title: "EPUB Output: Do not self close any container tags to prevent artifacts when EPUBs are viewed using buggy browser based viewers."
tickets: [994861]
- title: "Fix regression in 0.8.50 that broke the conversion of HTML files that contained non-ascii font-face declarations, typically produced by Microsoft Word"
improved recipes:
- Mainichi news
- derStandard
- Endgadget Japan
new recipes:
- title: Mainichi English
author: Hiroshi Miura
- title: The Grid TO
author: Yusuf W
- title: National Geographic (Italy)
author: faber1971
- title: Rebelion
author: Marc Busque
- version: 0.8.50
date: 2012-05-04

View File

@ -0,0 +1,26 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1336986047(BasicNewsRecipe):
title = u'Ads of the World'
oldest_article = 7
max_articles_per_feed = 100
auto_cleanup = False
description = 'The best international advertising campaigns'
language = 'en'
__author__ = 'faber1971'
no_stylesheets = True
keep_only_tags = [
dict(name='div', attrs={'id':'primary'})
]
remove_tags = [
dict(name='ul', attrs={'class':'links inline'})
,dict(name='div', attrs={'class':'form-item'})
,dict(name='div', attrs={'id':['options', 'comments']})
,dict(name='ul', attrs={'id':'nodePager'})
]
reverse_article_order = True
masthead_url = 'http://bigcatgroup.co.uk/files/2011/01/05-ads-of-the-world.png'
feeds = [(u'Ads of the world', u'http://feeds.feedburner.com/adsoftheworld-latest')]

View File

@ -0,0 +1,43 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AirForceTimes(BasicNewsRecipe):
title = 'Air Force Times'
__author__ = 'jde'
__date__ = '16 May 2012'
__version__ = '1.0'
description = 'News of the U.S. Air Force'
language = 'en'
publisher = 'AirForceTimes.com'
category = 'news, U.S. Air Force'
tags = 'news, U.S. Air Force'
cover_url = 'http://www.airforcetimes.com/images/logo_airforcetimes_alert.jpg'
masthead_url = 'http://www.airforcetimes.com/images/logo_airforcetimes_alert.jpg'
oldest_article = 7 #days
max_articles_per_feed = 25
publication_type = 'newspaper'
no_stylesheets = True
use_embedded_content = False
encoding = None
recursions = 0
needs_subscription = False
remove_javascript = True
remove_empty_feeds = True
auto_cleanup = True
feeds = [
('News', 'http://www.airforcetimes.com/rss_news.php'),
('Benefits', 'http://www.airforcetimes.com/rss_benefits.php'),
('Money', 'http://www.airforcetimes.com/rss_money.php'),
('Careers & Education', 'http://www.airforcetimes.com/rss_careers.php'),
('Community', 'http://www.airforcetimes.com/rss_community.php'),
('Off Duty', 'http://www.airforcetimes.com/rss_off_duty.php'),
('Entertainment', 'http://www.airforcetimes.com/rss_entertainment.php'),
('Guard & Reserve', 'http://www.airforcetimes.com/rss_guard.php'),
]

42
recipes/army_times.recipe Normal file
View File

@ -0,0 +1,42 @@
from calibre.web.feeds.news import BasicNewsRecipe
class ArmyTimes(BasicNewsRecipe):
title = 'Army Times'
__author__ = 'jde'
__date__ = '16 May 2012'
__version__ = '1.0'
description = 'News of the U.S. Army'
language = 'en'
publisher = 'ArmyTimes.com'
category = 'news, U.S. Army'
tags = 'news, U.S. Army'
cover_url = 'http://www.armytimes.com/images/logo_armytimes_alert.jpg'
masthead_url = 'http://www.armytimes.com/images/logo_armytimes_alert.jpg'
oldest_article = 7 #days
max_articles_per_feed = 25
publication_type = 'newspaper'
no_stylesheets = True
use_embedded_content = False
encoding = None
recursions = 0
needs_subscription = False
remove_javascript = True
remove_empty_feeds = True
auto_cleanup = True
feeds = [
('News', 'http://www.armytimes.com/rss_news.php'),
('Benefits', 'http://www.armytimes.com/rss_benefits.php'),
('Money', 'http://www.armytimes.com/rss_money.php'),
('Careers & Education', 'http://www.armytimes.com/rss_careers.php'),
('Community', 'http://www.armytimes.com/rss_community.php'),
('Off Duty', 'http://www.armytimes.com/rss_off_duty.php'),
('Entertainment', 'http://www.armytimes.com/rss_entertainment.php'),
('Guard & Reserve', 'http://www.armytimes.com/rss_guard.php'),
]

View File

@ -1,5 +1,5 @@
__license__ = 'GPL v3'
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2008-2012, Darko Miletic <darko.miletic at gmail.com>'
'''
arstechnica.com
'''
@ -12,22 +12,24 @@ class ArsTechnica(BasicNewsRecipe):
title = u'Ars Technica'
language = 'en'
__author__ = 'Darko Miletic, Sujata Raman, Alexis Rohou'
description = 'The art of technology'
publisher = 'Ars Technica'
description = 'Ars Technica: Serving the technologist for 1.2 decades'
publisher = 'Conde Nast Publications'
category = 'news, IT, technology'
oldest_article = 5
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
extra_css = '''
body {font-family: Arial,Helvetica,sans-serif}
.title{text-align: left}
.byline{font-weight: bold; line-height: 1em; font-size: 0.625em; text-decoration: none}
.news-item-figure-caption-text{font-size:small; font-style:italic}
.news-item-figure-caption-byline{font-size:small; font-style:italic; font-weight:bold}
'''
ignoreEtcArticles = True # Etc feed items can be ignored, as they're not real stories
remove_empty_feeds = True
publication_type = 'newsportal'
extra_css = '''
body {font-family: Arial,sans-serif}
.heading{font-family: "Times New Roman",serif}
.byline{font-weight: bold; line-height: 1em; font-size: 0.625em; text-decoration: none}
img{display: block}
.caption-text{font-size:small; font-style:italic}
.caption-byline{font-size:small; font-style:italic; font-weight:bold}
'''
conversion_options = {
'comments' : description
@ -36,93 +38,64 @@ class ArsTechnica(BasicNewsRecipe):
,'publisher' : publisher
}
#preprocess_regexps = [
# (re.compile(r'<div class="news-item-figure', re.DOTALL|re.IGNORECASE),lambda match: '<div class="news-item-figure"')
# ,(re.compile(r'</title>.*?</head>', re.DOTALL|re.IGNORECASE),lambda match: '</title></head>')
# ]
keep_only_tags = [dict(name='div', attrs={'id':['story','etc-story']})]
keep_only_tags = [
dict(attrs={'class':'standalone'})
,dict(attrs={'id':'article-guts'})
]
remove_tags = [
dict(name=['object','link','embed'])
,dict(name='div', attrs={'class':'read-more-link'})
dict(name=['object','link','embed','iframe','meta'])
,dict(attrs={'class':'corner-info'})
]
#remove_attributes=['width','height']
remove_attributes = ['lang']
feeds = [
(u'Infinite Loop (Apple content)' , u'http://feeds.arstechnica.com/arstechnica/apple/' )
,(u'Opposable Thumbs (Gaming content)' , u'http://feeds.arstechnica.com/arstechnica/gaming/' )
,(u'Gear and Gadgets' , u'http://feeds.arstechnica.com/arstechnica/gadgets/' )
,(u'Chipster (Hardware content)' , u'http://feeds.arstechnica.com/arstechnica/hardware/' )
,(u'Uptime (IT content)' , u'http://feeds.arstechnica.com/arstechnica/business/' )
,(u'Open Ended (Open Source content)' , u'http://feeds.arstechnica.com/arstechnica/open-source/')
,(u'One Microsoft Way' , u'http://feeds.arstechnica.com/arstechnica/microsoft/' )
,(u'Nobel Intent (Science content)' , u'http://feeds.arstechnica.com/arstechnica/science/' )
,(u'Scientific method (Science content)' , u'http://feeds.arstechnica.com/arstechnica/science/' )
,(u'Law & Disorder (Tech policy content)' , u'http://feeds.arstechnica.com/arstechnica/tech-policy/')
]
# This deals with multi-page stories
def append_page(self, soup, appendtag, position):
pager = soup.find('div',attrs={'class':'pager'})
pager = soup.find(attrs={'class':'numbers'})
if pager:
for atag in pager.findAll('a',href=True):
str = self.tag_to_string(atag)
if str.startswith('Next'):
nurl = 'http://arstechnica.com' + atag['href']
rawc = self.index_to_soup(nurl,True)
soup2 = BeautifulSoup(rawc, fromEncoding=self.encoding)
readmoretag = soup2.find('div', attrs={'class':'read-more-link'})
if readmoretag:
readmoretag.extract()
texttag = soup2.find('div', attrs={'class':'body'})
for it in texttag.findAll(style=True):
del it['style']
newpos = len(texttag.contents)
self.append_page(soup2,texttag,newpos)
texttag.extract()
pager.extract()
appendtag.insert(position,texttag)
nexttag = pager.find(attrs={'class':'next'})
if nexttag:
nurl = nexttag.parent['href']
rawc = self.index_to_soup(nurl,True)
soup2 = BeautifulSoup(rawc, fromEncoding=self.encoding)
texttag = soup2.find(attrs={'id':'article-guts'})
newpos = len(texttag.contents)
self.append_page(soup2,texttag,newpos)
texttag.extract()
pager.extract()
appendtag.insert(position,texttag)
def preprocess_html(self, soup):
# Adds line breaks near the byline (not sure why this is needed)
ftag = soup.find('div', attrs={'class':'byline'})
if ftag:
brtag = Tag(soup,'br')
brtag2 = Tag(soup,'br')
ftag.insert(4,brtag)
ftag.insert(5,brtag2)
# Remove style items
for item in soup.findAll(style=True):
del item['style']
# Remove id
for item in soup.findAll(id=True):
del item['id']
# For some reason, links to authors don't have the domainname
a_author = soup.find('a',{'href':re.compile("^/author")})
if a_author:
a_author['href'] = 'http://arstechnica.com'+a_author['href']
# within div class news-item-figure, we need to grab images
# Deal with multi-page stories
self.append_page(soup, soup.body, 3)
for item in soup.findAll('a'):
limg = item.find('img')
if item.string is not None:
str = item.string
item.replaceWith(str)
else:
if limg:
item.name = 'div'
item.attrs = []
else:
str = self.tag_to_string(item)
item.replaceWith(str)
for item in soup.findAll('img'):
if not item.has_key('alt'):
item['alt'] = 'image'
return soup
def get_article_url(self, article):
# If the article title starts with Etc:, don't return it
if self.ignoreEtcArticles:
article_title = article.get('title',None)
if re.match('Etc: ',article_title) is not None:
return None
# The actual article is in a guid tag
return article.get('guid', None).rpartition('?')[0]
def preprocess_raw_html(self, raw, url):
return '<html><head>'+raw[raw.find('</head>'):]

View File

@ -7,10 +7,11 @@ __copyright__ = '2009, Gerhard Aigner <gerhard.aigner at gmail.com>'
''' http://www.derstandard.at - Austrian Newspaper '''
import re
from calibre.web.feeds.news import BasicNewsRecipe
from time import strftime
class DerStandardRecipe(BasicNewsRecipe):
title = u'derStandard'
__author__ = 'Gerhard Aigner and Sujata Raman and Marcel Jira'
__author__ = 'Gerhard Aigner and Sujata Raman and Marcel Jira and Peter Reschenhofer'
description = u'Nachrichten aus Österreich'
publisher ='derStandard.at'
category = 'news, politics, nachrichten, Austria'
@ -88,3 +89,41 @@ class DerStandardRecipe(BasicNewsRecipe):
for t in soup.findAll(['ul', 'li']):
t.name = 'div'
return soup
def get_cover_url(self):
highResolution = True
date = strftime("%Y/%Y%m%d")
# it is also possible for the past
#date = '2012/20120503'
urlP1 = 'http://epaper.derstandarddigital.at/'
urlP2 = 'data_ep/STAN/' + date
urlP3 = '/V.B1/'
urlP4 = 'paper.htm'
urlHTML = urlP1 + urlP2 + urlP3 + urlP4
br = self.clone_browser(self.browser)
htmlF = br.open_novisit(urlHTML)
htmlC = htmlF.read()
# URL EXAMPLE: data_ep/STAN/2012/20120504/V.B1/pages/A3B6798F-2751-4D8D-A103-C5EF22F7ACBE.htm
# consists of part2 + part3 + 'pages/' + code
# 'pages/' has length 6, code has lenght 36
index = htmlC.find(urlP2) + len(urlP2 + urlP3) + 6
code = htmlC[index:index + 36]
# URL EXAMPLE HIGH RESOLUTION: http://epaper.derstandarddigital.at/data_ep/STAN/2012/20120504/pagejpg/A3B6798F-2751-4D8D-A103-C5EF22F7ACBE_b.png
# URL EXAMPLE LOW RESOLUTION: http://epaper.derstandarddigital.at/data_ep/STAN/2012/20120504/pagejpg/2AB52F71-11C1-4859-9114-CDCD79BEFDCB.png
urlPic = urlP1 + urlP2 + '/pagejpg/' + code
if highResolution:
urlPic = urlPic + '_b'
urlPic = urlPic + '.png'
return urlPic

30
recipes/economico.recipe Normal file
View File

@ -0,0 +1,30 @@
from calibre.web.feeds.news import BasicNewsRecipe
class Economico(BasicNewsRecipe):
title = u'Economico'
language = 'pt'
__author__ = 'Krittika Goyal'
oldest_article = 1 #days
max_articles_per_feed = 25
encoding = 'utf-8'
use_embedded_content = False
no_stylesheets = True
auto_cleanup = True
feeds = [
('Ultima Hora',
'http://economico.sapo.pt/rss/ultimas'),
('Em Foco',
'http://economico.sapo.pt/rss/emfoco'),
('Mercados',
'http://economico.sapo.pt/rss/mercados'),
('Empresas',
'http://economico.sapo.pt/rss/empresas'),
('Economia',
'http://economico.sapo.pt/rss/economia'),
('Politica',
'http://economico.sapo.pt/rss/politica'),
]

View File

@ -17,7 +17,25 @@ class EndgadgetJapan(BasicNewsRecipe):
no_stylesheets = True
language = 'ja'
encoding = 'utf-8'
feeds = [(u'engadget', u'http://japanese.engadget.com/rss.xml')]
index = 'http://japanese.engadget.com/'
remove_javascript = True
remove_tags_before = dict(name="h1", attrs={'class':"post_title"})
remove_tags_after = dict(name='div', attrs={'class':'post_body'})
def parse_index(self):
feeds = []
newsarticles = []
soup = self.index_to_soup(self.index)
for topstories in soup.findAll('div',attrs={'class':'post_content'}):
itt = topstories.find('h4')
itema = itt.find('a',href=True)
newsarticles.append({
'title' :itema.string
,'date' :''
,'url' :itema['href']
,'description':''
})
feeds.append(('Latest Posts', newsarticles))
return feeds
remove_tags_before = dict(name="div", attrs={'id':"content_wrap"})
remove_tags_after = dict(name='h3', attrs={'id':'addcomments'})

82
recipes/folha.recipe Normal file
View File

@ -0,0 +1,82 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
__license__ = 'GPL v3'
__copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
'''
www.folha.uol.com.br
'''
import urllib
from calibre.web.feeds.news import BasicNewsRecipe
class Folha_de_s_paulo(BasicNewsRecipe):
title = u'Folha de São Paulo - portal'
__author__ = 'Darko Miletic'
description = 'Um Jornala a servicao do Brasil'
publisher = 'Folhapress'
category = 'news, politics, Brasil'
oldest_article = 2
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'cp1252'
use_embedded_content = False
language = 'pt_BR'
remove_empty_feeds = True
publication_type = 'newspaper'
masthead_url = 'http://f.i.uol.com.br/fsp/furniture/images/lgo-fsp-430x50-ffffff.gif'
extra_css = """
body{font-family: Arial,Helvetica,sans-serif }
img{margin-bottom: 0.4em; display:block}
"""
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
remove_tags = [dict(name=['meta','link','base','iframe','embed','object'])]
keep_only_tags = [dict(attrs={'id':'articleNew'})]
feeds = [
(u'Poder' , u'http://feeds.folha.uol.com.br/poder/rss091.xml' )
,(u'Mundo' , u'http://feeds.folha.uol.com.br/mundo/rss091.xml' )
,(u'Mercado' , u'http://feeds.folha.uol.com.br/mercado/rss091.xml' )
,(u'Cotidiano' , u'http://feeds.folha.uol.com.br/cotidiano/rss091.xml' )
,(u'Esporte' , u'http://feeds.folha.uol.com.br/esporte/rss091.xml' )
,(u'Ilustrada' , u'http://feeds.folha.uol.com.br/ilustrada/rss091.xml' )
,(u'F5' , u'http://feeds.folha.uol.com.br/f5/rss091.xml' )
,(u'Ciência' , u'http://feeds.folha.uol.com.br/ciencia/rss091.xml' )
,(u'Tec' , u'http://feeds.folha.uol.com.br/tec/rss091.xml' )
,(u'Ambiente' , u'http://feeds.folha.uol.com.br/ambiente/rss091.xml' )
,(u'Bichos' , u'http://feeds.folha.uol.com.br/bichos/rss091.xml' )
,(u'Celebridades' , u'http://feeds.folha.uol.com.br/celebridades/rss091.xml' )
,(u'Comida' , u'http://feeds.folha.uol.com.br/comida/rss091.xml' )
,(u'Equilibrio' , u'http://feeds.folha.uol.com.br/equilibrioesaude/rss091.xml' )
,(u'Folhateen' , u'http://feeds.folha.uol.com.br/folhateen/rss091.xml' )
,(u'Folhinha' , u'http://feeds.folha.uol.com.br/folhinha/rss091.xml' )
,(u'Ilustrissima' , u'http://feeds.folha.uol.com.br/ilustrissima/rss091.xml' )
,(u'Saber' , u'http://feeds.folha.uol.com.br/saber/rss091.xml' )
,(u'Turismo' , u'http://feeds.folha.uol.com.br/turismo/rss091.xml' )
,(u'Panel do Leitor', u'http://feeds.folha.uol.com.br/folha/paineldoleitor/rss091.xml')
,(u'Publifolha' , u'http://feeds.folha.uol.com.br/folha/publifolha/rss091.xml' )
,(u'Em cima da hora', u'http://feeds.folha.uol.com.br/emcimadahora/rss091.xml' )
]
def get_article_url(self, article):
url = BasicNewsRecipe.get_article_url(self, article)
curl = url.partition('/*')[2]
return curl
def print_version(self, url):
return 'http://tools.folha.com.br/print?site=emcimadahora&url=' + urllib.quote_plus(url)
def get_cover_url(self):
soup = self.index_to_soup('http://www.folha.uol.com.br/')
cont = soup.find('div', attrs={'id':'newspaper'})
if cont:
ai = cont.find('a', href='http://www1.folha.uol.com.br/fsp/')
if ai:
return ai.img['src']
return None

View File

@ -8,7 +8,7 @@ from urllib2 import Request, urlopen, URLError
class FolhaOnline(BasicNewsRecipe):
THUMBALIZR_API = '' # ---->Get your at http://www.thumbalizr.com/ and put here
LANGUAGE = 'pt_br'
language = 'pt'
language = 'pt_BR'
LANGHTM = 'pt-br'
ENCODING = 'cp1252'
ENCHTM = 'iso-8859-1'

View File

@ -14,7 +14,7 @@ class FSP(BasicNewsRecipe):
HOMEPAGE = 'http://www1.folha.uol.com.br/fsp/'
masthead_url = 'http://f.i.uol.com.br/fsp/furniture/images/lgo-fsp-430x50-ffffff.gif'
language = 'pt'
language = 'pt_BR'
no_stylesheets = True
max_articles_per_feed = 40
remove_javascript = True

View File

@ -6,21 +6,20 @@ __copyright__ = '2011, Piotr Kontek, piotr.kontek@gmail.com'
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ptempfile import PersistentTemporaryFile
from datetime import date
import re
class GN(BasicNewsRecipe):
EDITION = 0
__author__ = 'Piotr Kontek'
title = u'Gość niedzielny'
description = 'Weekly magazine'
encoding = 'utf-8'
no_stylesheets = True
language = 'pl'
remove_javascript = True
temp_files = []
simultaneous_downloads = 1
masthead_url = 'http://gosc.pl/files/11/03/12/949089_top.gif'
title = u'Gość niedzielny'
articles_are_obfuscated = True
@ -56,22 +55,28 @@ class GN(BasicNewsRecipe):
self.temp_files[-1].close()
return self.temp_files[-1].name
def find_last_issue(self):
soup = self.index_to_soup('http://gosc.pl/wyszukaj/wydania/3.Gosc-Niedzielny')
#szukam zdjęcia i linka do porzedniego pełnego numeru
def find_last_issue(self, year):
soup = self.index_to_soup('http://gosc.pl/wyszukaj/wydania/3.Gosc-Niedzielny/rok/' + str(year))
#szukam zdjęcia i linka do poprzedniego pełnego numeru
first = True
for d in soup.findAll('div', attrs={'class':'l release_preview_l'}):
img = d.find('img')
if img != None:
a = img.parent
self.EDITION = a['href']
self.title = img['alt']
self.cover_url = 'http://www.gosc.pl' + img['src']
if not first:
if year != date.today().year or not first:
break
first = False
def parse_index(self):
self.find_last_issue()
year = date.today().year
self.find_last_issue(year)
##jeśli to pierwszy numer w roku trzeba pobrać poprzedni rok
if self.EDITION == 0:
self.find_last_issue(year-1)
soup = self.index_to_soup('http://www.gosc.pl' + self.EDITION)
feeds = []
#wstepniak

79
recipes/grid_to.recipe Normal file
View File

@ -0,0 +1,79 @@
from calibre.web.feeds.news import BasicNewsRecipe
class TheGridTO(BasicNewsRecipe):
#: The title to use for the ebook
title = u'The Grid TO'
#: A couple of lines that describe the content this recipe downloads.
#: This will be used primarily in a GUI that presents a list of recipes.
description = (u'The Grid is a weekly city magazine and daily website providing a fresh, '
'accessible voice for Toronto.')
#: The author of this recipe
__author__ = u'Yusuf W'
#: The language that the news is in. Must be an ISO-639 code either
#: two or three characters long
language = 'en_CA'
#: Publication type
#: Set to newspaper, magazine or blog
publication_type = 'newspaper'
#: Convenient flag to disable loading of stylesheets for websites
#: that have overly complex stylesheets unsuitable for conversion
#: to ebooks formats
#: If True stylesheets are not downloaded and processed
no_stylesheets = True
#: List of tags to be removed. Specified tags are removed from downloaded HTML.
remove_tags_before = dict(name='div', id='content')
remove_tags_after = dict(name='div', id='content')
remove_tags = [
dict(name='div', attrs={'class':'right-content pull-right'}),
dict(name='div', attrs={'class':'right-content'}),
dict(name='div', attrs={'class':'ftr-line'}),
dict(name='div', attrs={'class':'pull-right'}),
dict(name='div', id='comments'),
dict(name='div', id='tags')
]
#: Keep only the specified tags and their children.
#keep_only_tags = [dict(name='div', id='content')]
cover_margins = (0, 0, '#ffffff')
INDEX = 'http://www.thegridto.com'
def get_cover_url(self):
soup = self.index_to_soup(self.INDEX)
cover_url = soup.find(attrs={'class':'article-block latest-issue'}).find('img')['src']
return cover_url
def parse_index(self):
# Get the latest issue
soup = self.index_to_soup(self.INDEX)
a = soup.find('div', attrs={'class': 'full-content stuff-ftr'}).findAll('a')[2]
# Parse the index of the latest issue
self.INDEX = self.INDEX + a['href']
soup = self.index_to_soup(self.INDEX)
feeds = []
for section in ['city', 'life', 'culture']:
section_class = 'left-content article-listing ' + section + ' pull-left'
div = soup.find(attrs={'class': section_class})
articles = []
for tag in div.findAllNext(attrs={'class':'search-block'}):
a = tag.findAll('a', href=True)[1]
title = self.tag_to_string(a)
url = a['href']
articles.append({'title': title, 'url': url, 'description':'', 'date':''})
feeds.append((section, articles))
return feeds

View File

@ -0,0 +1,22 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1336289226(BasicNewsRecipe):
title = u'Heavy Metal'
oldest_article = 15
max_articles_per_feed = 100
auto_cleanup = False
masthead_url = 'http://net-static2.tccstatic.com/template/tmw/img/tj.gif'
feeds = [(u'Heavy Metal', u'http://www.heavy-metal.it/feed/')]
keep_only_tags = [
dict(name='div', attrs={'class':'entry'})
]
remove_tags_after = [
dict(name='div', attrs={'class':'sociable'})
]
description = 'An Heavy metal Italian magazine'
__author__ = 'faber1971'
language = 'it'
__version__ = 'v1.0'
__date__ = '6, May 2012'

BIN
recipes/icons/folha.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 648 B

View File

@ -20,6 +20,8 @@ class JijiDotCom(BasicNewsRecipe):
top_url = 'http://www.jiji.com/'
feeds = [(u'\u30cb\u30e5\u30fc\u30b9', u'http://www.jiji.com/rss/ranking.rdf')]
remove_tags_before = dict(id="article-area")
remove_tags_after = dict(id="ad_google")
def get_cover_url(self):

View File

@ -0,0 +1,24 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1336504510(BasicNewsRecipe):
title = u'Juve - La Stampa'
oldest_article = 1
language = 'it'
max_articles_per_feed = 100
auto_cleanup = True
masthead_url = 'http://www3.lastampa.it/fileadmin/media/sport/quijuve/top_quijuve.jpg'
feeds = [(u'Qui Juve - La Stampa', u'http://feed43.com/2352784107537677.xml')]
remove_tags = [dict(name='div',attrs={'class':['article-toolbar', 'sezione sezione-news', 'intestazione']})]
extra_css = '''
div.dettaglio div.immagine_girata p.news-single-imgcaption {color: #000000; font-family: "Georgia", "Times", serif; font-size: 7px; font-weight: 400;line-height: 1.2; padding-bottom: 12px; text-transform: none; }
.sezione {color: #000000; font-family: "Georgia", "Times", serif; font-size: 7px; font-weight: 400;line-height: 1.2; padding-bottom: 12px; text-transform: none; }
body {color: #000000; font-family: "Georgia", "Times", serif; font-size: 7px; font-weight: 400;line-height: 1.2; padding-bottom: 12px; text-transform: none; }
h3 {color: #000000; font-family: "Georgia", "Times", serif; font-size: 22px; font-weight: 400;line-height: 1.2; padding-bottom: 12px; text-transform: none; }
div.dettaglio h2.catenaccio {color: #000000; font-family: "Georgia", "Times", serif; font-size: 18px; font-weight: 400;line-height: 1.2; padding-bottom: 12px; text-transform: none; }
'''
description = 'News about Juventus from La Stampa'
__author__ = 'faber1971'
__version__ = 'v1.0'
__date__ = '8, May 2012'

View File

@ -1,7 +1,7 @@
__license__ = 'GPL v3'
__author__ = 'Lorenzo Vigentini, based on Darko Miletic, Gabriele Marini'
__copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>, Lorenzo Vigentini <l.vigentini at gmail.com>'
description = 'Italian daily newspaper - v1.01 (04, January 2010); 16.05.2010 new version; 17.10.2011 new version; 14.12.2011 new version'
__author__ = 'Lorenzo Vigentini, based on Darko Miletic, Gabriele Marini; minor fixes by faber1971'
__copyright__ = '2009-2012, Darko Miletic <darko.miletic at gmail.com>, Lorenzo Vigentini <l.vigentini at gmail.com>, faber1971'
description = 'Italian daily newspaper - v1.02 (04, January 2010); 16.05.2010 new version; 17.10.2011 new version; 14.12.2011 new version; 11.05.2012 new version'
'''
http://www.repubblica.it/
@ -12,14 +12,14 @@ from calibre.web.feeds.news import BasicNewsRecipe
class LaRepubblica(BasicNewsRecipe):
title = 'La Repubblica'
__author__ = 'Lorenzo Vigentini, Gabriele Marini, Darko Miletic'
__author__ = 'Lorenzo Vigentini, Gabriele Marini, Darko Miletic, faber1971'
description = 'il quotidiano online con tutte le notizie in tempo reale. News e ultime notizie. Tutti i settori: politica, cronaca, economia, sport, esteri, scienza, tecnologia, internet, spettacoli, musica, cultura, arte, mostre, libri, dvd, vhs, concerti, cinema, attori, attrici, recensioni, chat, cucina, mappe. Le citta di Repubblica: Roma, Milano, Bologna, Firenze, Palermo, Napoli, Bari, Torino.'
masthead_url = 'http://www.repubblica.it/static/images/homepage/2010/la-repubblica-logo-home-payoff.png'
publisher = 'Gruppo editoriale L\'Espresso'
category = 'News, politics, culture, economy, general interest'
language = 'it'
timefmt = '[%a, %d %b, %Y]'
oldest_article = 5
oldest_article = 1
encoding = 'utf8'
use_embedded_content = False
no_stylesheets = True
@ -59,6 +59,7 @@ class LaRepubblica(BasicNewsRecipe):
dict(attrs={'class':'articolo'}),
dict(attrs={'class':'body-text'}),
dict(name='p', attrs={'class':'disclaimer clearfix'}),
dict(name='div', attrs={'id':'main'}),
dict(attrs={'id':'contA'})
]
@ -67,7 +68,7 @@ class LaRepubblica(BasicNewsRecipe):
dict(name=['object','link','meta','iframe','embed']),
dict(name='span',attrs={'class':'linkindice'}),
dict(name='div', attrs={'class':['bottom-mobile','adv adv-middle-inline']}),
dict(name='div', attrs={'id':['rssdiv','blocco','fb-like-head']}),
dict(name='div', attrs={'id':['rssdiv','blocco','fb-like-head', 'sidebar']}),
dict(name='div', attrs={'class':['utility','fb-like-button','archive-button']}),
dict(name='div', attrs={'class':'generalbox'}),
dict(name='ul', attrs={'id':'hystory'})
@ -88,11 +89,12 @@ class LaRepubblica(BasicNewsRecipe):
(u'Sport', u'http://www.repubblica.it/rss/sport/rss2.0.xml'),
(u'Calcio', u'http://www.repubblica.it/rss/sport/calcio/rss2.0.xml'),
(u'Motori', u'http://www.repubblica.it/rss/motori/rss2.0.xml'),
(u'Edizione Roma', u'http://roma.repubblica.it/rss/rss2.0.xml'),
(u'Edizione Torino', u'http://torino.repubblica.it/rss/rss2.0.xml'),
(u'Edizione Milano', u'feed://milano.repubblica.it/rss/rss2.0.xml'),
(u'Edizione Napoli', u'feed://napoli.repubblica.it/rss/rss2.0.xml'),
(u'Edizione Palermo', u'feed://palermo.repubblica.it/rss/rss2.0.xml')
(u'Roma', u'http://roma.repubblica.it/rss/rss2.0.xml'),
(u'Torino', u'http://torino.repubblica.it/rss/rss2.0.xml'),
(u'Milano', u'feed://milano.repubblica.it/rss/rss2.0.xml'),
(u'Napoli', u'feed://napoli.repubblica.it/rss/rss2.0.xml'),
(u'Bari', u'http://bari.repubblica.it/rss/rss2.0.xml'),
(u'Palermo', u'feed://palermo.repubblica.it/rss/rss2.0.xml')
]
def preprocess_html(self, soup):

View File

@ -16,12 +16,12 @@ class MainichiDailyNews(BasicNewsRecipe):
publisher = 'Mainichi Daily News'
category = 'news, japan'
language = 'ja'
feeds = [(u'daily news', u'http://mainichi.jp/rss/etc/flash.rss')]
index = 'http://mainichi.jp/select/'
remove_javascript = True
masthead_title = u'MAINICHI DAILY NEWS'
remove_tags_before = {'class':"NewsTitle"}
remove_tags = [{'class':"RelatedArticle"}]
remove_tags_after = {'class':"Credit"}
remove_tags_after = {'class':"NewsBody clr"}
def parse_feeds(self):
@ -32,9 +32,30 @@ class MainichiDailyNews(BasicNewsRecipe):
for a,curarticle in enumerate(curfeed.articles):
if re.search(r'pheedo.jp', curarticle.url):
delList.append(curarticle)
if re.search(r'rssad.jp', curarticle.url):
delList.append(curarticle)
if len(delList)>0:
for d in delList:
index = curfeed.articles.index(d)
curfeed.articles[index:index+1] = []
return feeds
def parse_index(self):
feeds = []
soup = self.index_to_soup(self.index)
topstories = soup.find('ul',attrs={'class':'MaiLink'})
if topstories:
newsarticles = []
for itt in topstories.findAll('li'):
itema = itt.find('a',href=True)
if itema:
newsarticles.append({
'title' :itema.string
,'date' :''
,'url' :itema['href']
,'description':''
})
feeds.append(('latest', newsarticles))
return feeds

View File

@ -0,0 +1,67 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
'''
www.mainichi.jp
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class MainichiEnglishNews(BasicNewsRecipe):
title = u'The Mainichi'
__author__ = 'Hiroshi Miura'
oldest_article = 2
max_articles_per_feed = 40
description = 'Japanese traditional newspaper Mainichi news in English'
publisher = 'Mainichi News'
category = 'news, japan'
language = 'en_JP'
index = 'http://mainichi.jp/english/english/index.html'
remove_javascript = True
masthead_url = 'http://mainichi.jp/english/images/themainichi.png'
remove_tags_before = {'class':"NewsTitle"}
remove_tags_after = {'class':"NewsBody clr"}
def parse_feeds(self):
feeds = BasicNewsRecipe.parse_feeds(self)
for curfeed in feeds:
delList = []
for a,curarticle in enumerate(curfeed.articles):
if re.search(r'pheedo.jp', curarticle.url):
delList.append(curarticle)
if re.search(r'rssad.jp', curarticle.url):
delList.append(curarticle)
if len(delList)>0:
for d in delList:
index = curfeed.articles.index(d)
curfeed.articles[index:index+1] = []
return feeds
def parse_index(self):
feeds = []
soup = self.index_to_soup(self.index)
for section in soup.findAll('section'):
newsarticles = []
section_name = 'news'
hds = section.find('div', attrs={'class':'CategoryHead clr'})
if hds:
section_item = hds.find('h1')
if section_item:
section_name = section_item.find('a').string
items = section.find('ul', attrs={'class':'MaiLink'})
for item in items.findAll('li'):
if item:
itema = item.find('a')
newsarticles.append({
'title' :itema.string
,'date' :''
,'url' :itema['href']
,'description':''
})
feeds.append((section_name, newsarticles))
return feeds

View File

@ -1,34 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
import re
class MainichiDailyITNews(BasicNewsRecipe):
title = u'\u6bce\u65e5\u65b0\u805e(IT&\u5bb6\u96fb)'
__author__ = 'Hiroshi Miura'
oldest_article = 2
max_articles_per_feed = 100
description = 'Japanese traditional newspaper Mainichi Daily News - IT and electronics'
publisher = 'Mainichi Daily News'
category = 'news, Japan, IT, Electronics'
language = 'ja'
feeds = [(u'IT News', u'http://mainichi.pheedo.jp/f/mainichijp_electronics')]
remove_tags_before = {'class':"NewsTitle"}
remove_tags = [{'class':"RelatedArticle"}]
remove_tags_after = {'class':"Credit"}
def parse_feeds(self):
feeds = BasicNewsRecipe.parse_feeds(self)
for curfeed in feeds:
delList = []
for a,curarticle in enumerate(curfeed.articles):
if re.search(r'pheedo.jp', curarticle.url):
delList.append(curarticle)
if len(delList)>0:
for d in delList:
index = curfeed.articles.index(d)
curfeed.articles[index:index+1] = []
return feeds

View File

@ -0,0 +1,59 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
'''
www.mainichi.jp
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class MainichiDailyScienceNews(BasicNewsRecipe):
title = u'\u6bce\u65e5\u65b0\u805e(Science)'
__author__ = 'Hiroshi Miura'
oldest_article = 2
max_articles_per_feed = 20
description = 'Japanese traditional newspaper Mainichi Daily News - science'
publisher = 'Mainichi Daily News'
category = 'news, japan'
language = 'ja'
index = 'http://mainichi.jp/select/science'
remove_javascript = True
masthead_title = u'MAINICHI DAILY NEWS'
remove_tags_before = {'class':"NewsTitle"}
remove_tags_after = {'class':"NewsBody clr"}
def parse_feeds(self):
feeds = BasicNewsRecipe.parse_feeds(self)
for curfeed in feeds:
delList = []
for a,curarticle in enumerate(curfeed.articles):
if re.search(r'rssad.jp', curarticle.url):
delList.append(curarticle)
if len(delList)>0:
for d in delList:
index = curfeed.articles.index(d)
curfeed.articles[index:index+1] = []
return feeds
def parse_index(self):
feeds = []
soup = self.index_to_soup(self.index)
topstories = soup.find('ul',attrs={'class':'MaiLink'})
if topstories:
newsarticles = []
for itt in topstories.findAll('li'):
itema = itt.find('a',href=True)
if itema:
newsarticles.append({
'title' :itema.string
,'date' :''
,'url' :itema['href']
,'description':''
})
feeds.append(('Science', newsarticles))
return feeds

View File

@ -0,0 +1,42 @@
from calibre.web.feeds.news import BasicNewsRecipe
class MarineCorpsTimes(BasicNewsRecipe):
title = 'Marine Corps Times'
__author__ = 'jde'
__date__ = '16 May 2012'
__version__ = '1.0'
description = 'News of the U.S. Marine Corps'
language = 'en'
publisher = 'MarineCorpsTimes.com'
category = 'news, U.S. Marine Corps'
tags = 'news, U.S. Marine Corps'
cover_url = 'http://www.marinecorpstimes.com/images/logo_marinetimes-alert.jpg'
masthead_url = 'http://www.marinecorpstimes.com/images/logo_marinetimes-alert.jpg'
oldest_article = 7 #days
max_articles_per_feed = 25
publication_type = 'newspaper'
no_stylesheets = True
use_embedded_content = False
encoding = None
recursions = 0
needs_subscription = False
remove_javascript = True
remove_empty_feeds = True
auto_cleanup = True
feeds = [
('News', 'http://www.MarineCorpstimes.com/rss_news.php'),
('Benefits', 'http://www.MarineCorpstimes.com/rss_benefits.php'),
('Money', 'http://www.MarineCorpstimes.com/rss_money.php'),
('Careers & Education', 'http://www.MarineCorpstimes.com/rss_careers.php'),
('Community', 'http://www.MarineCorpstimes.com/rss_community.php'),
('Off Duty', 'http://www.MarineCorpstimes.com/rss_off_duty.php'),
('Entertainment', 'http://www.MarineCorpstimes.com/rss_entertainment.php'),
('Guard & Reserve', 'http://www.MarineCorpstimes.com/rss_guard.php'),
]

View File

@ -0,0 +1,41 @@
from calibre.web.feeds.news import BasicNewsRecipe
class MilitaryTimes(BasicNewsRecipe):
title = 'Military Times'
__author__ = 'jde'
__date__ = '16 May 2012'
__version__ = '1.0'
description = 'News of the U.S. Military'
language = 'en'
publisher = 'MilitaryTimes.com'
category = 'news, U.S. Military'
tags = 'news, U.S. Military'
cover_url = 'http://www.militarytimes.com/images/logo_militarytimes_landing-s.gif'
masthead_url = 'http://www.militarytimes.com/images/logo_militarytimes_landing-s.gif'
oldest_article = 7 #days
max_articles_per_feed = 25
publication_type = 'newspaper'
no_stylesheets = True
use_embedded_content = False
encoding = None
recursions = 0
needs_subscription = False
remove_javascript = True
remove_empty_feeds = True
auto_cleanup = True
feeds = [
('News', 'http://www.militarytimes.com/rss_news.php'),
('Benefits', 'http://www.militarytimes.com/rss_benefits.php'),
('Money', 'http://www.militarytimes.com/rss_money.php'),
('Careers & Education', 'http://www.militarytimes.com/rss_careers.php'),
('Community', 'http://www.militarytimes.com/rss_community.php'),
('Off Duty', 'http://www.militarytimes.com/rss_off_duty.php'),
('Entertainment', 'http://www.militarytimes.com/rss_entertainment.php'),
('Guard & Reserve', 'http://www.militarytimes.com/rss_guard.php'),
]

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
@ -7,77 +6,21 @@ __license__ = 'GPL v3'
www.canada.com
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
from calibre.web.feeds.recipes import BasicNewsRecipe
class CanWestPaper(BasicNewsRecipe):
# un-comment the following four lines for the Victoria Times Colonist
## title = u'Victoria Times Colonist'
## url_prefix = 'http://www.timescolonist.com'
## description = u'News from Victoria, BC'
## fp_tag = 'CAN_TC'
# un-comment the following four lines for the Vancouver Province
## title = u'Vancouver Province'
## url_prefix = 'http://www.theprovince.com'
## description = u'News from Vancouver, BC'
## fp_tag = 'CAN_VP'
# un-comment the following four lines for the Vancouver Sun
## title = u'Vancouver Sun'
## url_prefix = 'http://www.vancouversun.com'
## description = u'News from Vancouver, BC'
## fp_tag = 'CAN_VS'
# un-comment the following four lines for the Edmonton Journal
## title = u'Edmonton Journal'
## url_prefix = 'http://www.edmontonjournal.com'
## description = u'News from Edmonton, AB'
## fp_tag = 'CAN_EJ'
# un-comment the following four lines for the Calgary Herald
## title = u'Calgary Herald'
## url_prefix = 'http://www.calgaryherald.com'
## description = u'News from Calgary, AB'
## fp_tag = 'CAN_CH'
# un-comment the following four lines for the Regina Leader-Post
## title = u'Regina Leader-Post'
## url_prefix = 'http://www.leaderpost.com'
## description = u'News from Regina, SK'
## fp_tag = ''
# un-comment the following four lines for the Saskatoon Star-Phoenix
## title = u'Saskatoon Star-Phoenix'
## url_prefix = 'http://www.thestarphoenix.com'
## description = u'News from Saskatoon, SK'
## fp_tag = ''
# un-comment the following four lines for the Windsor Star
## title = u'Windsor Star'
## url_prefix = 'http://www.windsorstar.com'
## description = u'News from Windsor, ON'
## fp_tag = 'CAN_'
# un-comment the following four lines for the Ottawa Citizen
## title = u'Ottawa Citizen'
## url_prefix = 'http://www.ottawacitizen.com'
## description = u'News from Ottawa, ON'
## fp_tag = 'CAN_OC'
# un-comment the following four lines for the Montreal Gazette
# un-comment the following three lines for the Montreal Gazette
title = u'Montreal Gazette'
url_prefix = 'http://www.montrealgazette.com'
description = u'News from Montreal, QC'
fp_tag = 'CAN_MG'
language = 'en_CA'
__author__ = 'Nick Redding'
no_stylesheets = True
auto_cleanup = True
auto_cleanup_keep = '//*[@id="imageBox"]'
timefmt = ' [%b %d]'
extra_css = '''
.timestamp { font-size:xx-small; display: block; }
@ -87,135 +30,19 @@ class CanWestPaper(BasicNewsRecipe):
.byline { font-size:xx-small; }
#photocaption { font-size: small; font-style: italic }
#photocredit { font-size: xx-small; }'''
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
remove_tags = [{'class':'comments'},
dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
dict(name='div', attrs={'class':'rule_grey_solid'}),
dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
def get_cover_url(self):
from datetime import timedelta, date
if self.fp_tag=='':
return None
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
br = BasicNewsRecipe.get_browser()
daysback=1
try:
br.open(cover)
except:
while daysback<7:
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str((date.today() - timedelta(days=daysback)).day)+'/lg/'+self.fp_tag+'.jpg'
br = BasicNewsRecipe.get_browser()
try:
br.open(cover)
except:
daysback = daysback+1
continue
break
if daysback==7:
self.log("\nCover unavailable")
cover = None
return cover
def fixChars(self,string):
# Replace lsquo (\x91)
fixed = re.sub("\x91","",string)
# Replace rsquo (\x92)
fixed = re.sub("\x92","",fixed)
# Replace ldquo (\x93)
fixed = re.sub("\x93","“",fixed)
# Replace rdquo (\x94)
fixed = re.sub("\x94","”",fixed)
# Replace ndash (\x96)
fixed = re.sub("\x96","",fixed)
# Replace mdash (\x97)
fixed = re.sub("\x97","—",fixed)
fixed = re.sub("&#x2019;","",fixed)
return fixed
def massageNCXText(self, description):
# Kindle TOC descriptions won't render certain characters
if description:
massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
# Replace '&' with '&'
massaged = re.sub("&","&", massaged)
return self.fixChars(massaged)
else:
return description
def populate_article_metadata(self, article, soup, first):
if first:
picdiv = soup.find('body').find('img')
if picdiv is not None:
self.add_toc_thumbnail(article,re.sub(r'links\\link\d+\\','',picdiv['src']))
xtitle = article.text_summary.strip()
if len(xtitle) == 0:
desc = soup.find('meta',attrs={'property':'og:description'})
if desc is not None:
article.summary = article.text_summary = desc['content']
def strip_anchors(self,soup):
paras = soup.findAll(True)
for para in paras:
aTags = para.findAll('a')
for a in aTags:
if a.img is None:
a.replaceWith(a.renderContents().decode('cp1252','replace'))
return soup
def preprocess_html(self, soup):
return self.strip_anchors(soup)
feeds = [
('News',
'http://rss.canada.com/get/?F297'),
('Sports',
'http://rss.canada.com/get/?F299'),
('Entertainment',
'http://rss.canada.com/get/?F7366'),
('Business',
'http://rss.canada.com/get/?F6939'),
]
def parse_index(self):
soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
articles = {}
key = 'News'
ans = ['News']
# Find each instance of class="sectiontitle", class="featurecontent"
for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
#self.log(" div class = %s" % divtag['class'])
if divtag['class'].startswith('section_title'):
# div contains section title
if not divtag.h3:
continue
key = self.tag_to_string(divtag.h3,False)
ans.append(key)
self.log("Section name %s" % key)
continue
# div contains article data
h1tag = divtag.find('h1')
if not h1tag:
continue
atag = h1tag.find('a',href=True)
if not atag:
continue
url = self.url_prefix+'/news/todays-paper/'+atag['href']
#self.log("Section %s" % key)
#self.log("url %s" % url)
title = self.tag_to_string(atag,False)
#self.log("title %s" % title)
pubdate = ''
description = ''
ptag = divtag.find('p');
if ptag:
description = self.tag_to_string(ptag,False)
#self.log("description %s" % description)
author = ''
autag = divtag.find('h4')
if autag:
author = self.tag_to_string(autag,False)
#self.log("author %s" % author)
if not articles.has_key(key):
articles[key] = []
articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
return ans

View File

@ -0,0 +1,22 @@
from calibre.web.feeds.news import BasicNewsRecipe
class Nachdenkseiten(BasicNewsRecipe):
title = u'Nachdenkseiten'
__author__ = 'jrda'
publisher = 'www.nachdenkseiten.de Albrecht Mueller und Dr. Wolfgang Lieb'
description = 'NachDenkSeiten - Die kritische Website'
category = 'news'
oldest_article = 7
use_embedded_content = False
language = 'de'
timefmt = ''
max_articles_per_feed = 6
no_stylesheets = True
encoding = 'utf-8'
remove_javascript = True
keep_only_tags = [
{'id':'content'}]
feeds = [
('News', 'http://www.nachdenkseiten.de/?feed=rss2'),
]

View File

@ -0,0 +1,16 @@
__version__ = 'v1.0'
__date__ = '5, May 2012'
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1336226255(BasicNewsRecipe):
title = u'National Geographic'
__author__ = 'faber1971'
description = 'Science magazine'
language = 'it'
oldest_article = 15
max_articles_per_feed = 100
auto_cleanup = True
remove_tags = [dict(name='div',attrs={'class':'banner-abbonamenti'})]
feeds = [(u'National Geographic', u'http://www.nationalgeographic.it/rss/all/rss2.0.xml')]

42
recipes/navy_times.recipe Normal file
View File

@ -0,0 +1,42 @@
from calibre.web.feeds.news import BasicNewsRecipe
class NavyTimes(BasicNewsRecipe):
title = 'Navy Times'
__author__ = 'jde'
__date__ = '16 May 2012'
__version__ = '1.0'
description = 'News of the U.S. Navy'
language = 'en'
publisher = 'NavyTimes.com'
category = 'news, U.S. Navy'
tags = 'news, U.S. Navy'
cover_url = 'http://www.navytimes.com/images/logo_navytimes_alert.jpg'
masthead_url = 'http://www.navytimes.com/images/logo_navytimes_alert.jpg'
oldest_article = 7 #days
max_articles_per_feed = 25
publication_type = 'newspaper'
no_stylesheets = True
use_embedded_content = False
encoding = None
recursions = 0
needs_subscription = False
remove_javascript = True
remove_empty_feeds = True
auto_cleanup = True
feeds = [
('News', 'http://www.navytimes.com/rss_news.php'),
('Benefits', 'http://www.navytimes.com/rss_benefits.php'),
('Money', 'http://www.navytimes.com/rss_money.php'),
('Careers & Education', 'http://www.navytimes.com/rss_careers.php'),
('Community', 'http://www.navytimes.com/rss_community.php'),
('Off Duty', 'http://www.navytimes.com/rss_off_duty.php'),
('Entertainment', 'http://www.navytimes.com/rss_entertainment.php'),
('Guard & Reserve', 'http://www.navytimes.com/rss_guard.php'),
]

View File

@ -0,0 +1,20 @@
from calibre.web.feeds.news import BasicNewsRecipe
class NewsBusters(BasicNewsRecipe):
title = u'News Busters'
description = 'Exposing and Combating Liberal Media Bias'
__author__ = 'jde'
oldest_article = 1#day
max_articles_per_feed = 100
cover_url = "http://newsbusters.org/sites/all/themes/genesis_nb/images/nb-mrc.png"
language = 'en'
encoding = 'utf8'
needs_subscription = False
remove_javascript = True
recursions = 0
use_embedded_content = False
no_stylesheets = True
auto_cleanup = True
feeds = [(u'Blog', u'http://www.newsbusters.org/rss.xml')]

View File

@ -9,10 +9,10 @@ import re
from calibre.web.feeds.news import BasicNewsRecipe
class Pescanik(BasicNewsRecipe):
title = 'Peščanik'
title = u'Peščanik'
__author__ = 'Darko Miletic'
description = 'Peščanik je udruženje građana osnovano 2006. godine. Glavni proizvod Peščanika je radio emisija koja je emitovana na Radiju B92 od 02.02.2000. do 16.06.2011, a od septembra 2011. se emituje na osam radio stanica u Srbiji, Crnoj Gori i BiH'
publisher = 'Peščanik'
description = u'Peščanik je udruženje građana osnovano 2006. godine. Glavni proizvod Peščanika je radio emisija koja je emitovana na Radiju B92 od 02.02.2000. do 16.06.2011, a od septembra 2011. se emituje na osam radio stanica u Srbiji, Crnoj Gori i BiH'
publisher = u'Peščanik'
category = 'news, politics, Serbia'
oldest_article = 10
max_articles_per_feed = 100
@ -45,4 +45,4 @@ class Pescanik(BasicNewsRecipe):
]
def print_version(self, url):
return url + 'print/'
return url + 'print/'

View File

@ -1,5 +1,5 @@
"""
Pocket Calibre Recipe v1.0
Pocket Calibre Recipe v1.2
"""
__license__ = 'GPL v3'
__copyright__ = '''
@ -73,6 +73,9 @@ class Pocket(BasicNewsRecipe):
articles = []
soup = self.index_to_soup(feedurl)
ritem = soup.find('ul', attrs={'id':'list'})
if ritem is None:
self.log.exception("Page %s skipped: invalid HTML" % (feedtitle if feedtitle else feedurl))
continue
for item in reversed(ritem.findAll('li')):
if articlesToGrab < 1:
break
@ -94,7 +97,12 @@ class Pocket(BasicNewsRecipe):
self.readList.append(readLink)
totalfeeds.append((feedtitle, articles))
if len(self.readList) < self.minimum_articles:
raise Exception("Not enough articles in RIL! Change minimum_articles or add more.")
self.mark_as_read_after_dl = False
if hasattr(self, 'abort_recipe_processing'):
self.abort_recipe_processing("Only %d articles retrieved, minimum_articles not reached" % len(self.readList))
else:
self.log.exception("Only %d articles retrieved, minimum_articles not reached" % len(self.readList))
return []
return totalfeeds
def mark_as_read(self, markList):

View File

@ -0,0 +1,22 @@
__license__ = 'GPL v3'
__author__ = 'Vakya'
__version__ = 'v1.0'
__date__ = '14, May 2012'
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1336226255(BasicNewsRecipe):
title = u'Revista Summa'
publisher = u'Summa'
__author__ = 'Vakya'
description = 'Informacion regional sobre economia y negocios'
language = 'es'
oldest_article = 15
max_articles_per_feed = 100
auto_cleanup = True
remove_tags_before = dict(name='h1')
remove_tags_after = dict(name='label')
feeds = [(u'Revista Summa', u'http://www.revistasumma.com/rss/rss-v2.0.rss')]

View File

@ -1,3 +1,4 @@
__license__ = 'GPL v3'
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
'''
@ -15,6 +16,8 @@ class Spiegel_int(BasicNewsRecipe):
language = 'en_DE'
no_stylesheets = True
use_embedded_content = False
auto_cleanup = True
auto_cleanup_keep = '//*[@id="spArticleTopAsset"]'
encoding = 'cp1252'
publisher = 'SPIEGEL ONLINE GmbH'
category = 'news, politics, Germany'
@ -43,25 +46,25 @@ class Spiegel_int(BasicNewsRecipe):
.spPhotoGallery{font-size:x-small; color:#990000 ;}
'''
keep_only_tags = [dict(attrs={'id':'spArticleContent'})]
remove_tags_after = dict(attrs={'id':'spArticleBody'})
remove_tags = [dict(name=['meta','base','iframe','embed','object'])]
remove_attributes = ['clear']
#keep_only_tags = [dict(attrs={'id':'spArticleContent'})]
#remove_tags_after = dict(attrs={'id':'spArticleBody'})
#remove_tags = [dict(name=['meta','base','iframe','embed','object'])]
#remove_attributes = ['clear']
feeds = [(u'Spiegel Online', u'http://www.spiegel.de/international/index.rss')]
def print_version(self, url):
main, sep, rest = url.rpartition(',')
rmain, rsep, rrest = main.rpartition(',')
return rmain + ',druck-' + rrest + ',' + rest
#def print_version(self, url):
#main, sep, rest = url.rpartition(',')
#rmain, rsep, rrest = main.rpartition(',')
#return rmain + ',druck-' + rrest + ',' + rest
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('a'):
if item.string is not None:
str = item.string
item.replaceWith(str)
else:
str = self.tag_to_string(item)
item.replaceWith(str)
return soup
#def preprocess_html(self, soup):
#for item in soup.findAll(style=True):
#del item['style']
#for item in soup.findAll('a'):
#if item.string is not None:
#str = item.string
#item.replaceWith(str)
#else:
#str = self.tag_to_string(item)
#item.replaceWith(str)
#return soup

View File

@ -0,0 +1,39 @@
''' Stars and Stripes
'''
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
class AdvancedUserRecipe1308791026(BasicNewsRecipe):
title = u'Stars and Stripes'
oldest_article = 3
max_articles_per_feed = 100
__author__ = 'adoucette'
description = 'The U.S. militarys independent news source, featuring exclusive reports from Iraq, Afghanistan, Europe and the Far East.'
no_stylesheets = True
#delay = 1
use_embedded_content = False
encoding = 'utf8'
publisher = 'stripes.com'
category = 'news, US, world'
language = 'en_US'
publication_type = 'newsportal'
preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
,'linearize_tables': True
}
keep_only_tags = [dict(name='div', attrs={'class':['element article']})]
remove_tags_after = [dict(name='ul', attrs={'class':'inline-bookmarks'})]
feeds = [
(u'News', u'http://feeds.stripes.com/starsandstripes/news'),
(u'Sports', u'http://feeds.stripes.com/starsandstripes/sports'),
(u'Military Life', u'http://feeds.stripes.com/starsandstripes/militarylife'),
(u'Opinion', u'http://feeds.stripes.com/starsandstripes/opinion'),
(u'Travel', u'http://feeds.stripes.com/starsandstripes/travel')
]

View File

@ -0,0 +1,92 @@
__license__ = 'GPL v3'
__copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
'''
www.strategic-culture.org
'''
import time
from calibre import strftime
from calibre.web.feeds.recipes import BasicNewsRecipe
class StrategicCulture(BasicNewsRecipe):
title = 'Strategic Culture Foundation'
__author__ = 'Darko Miletic'
description = 'Online Journal'
publisher = 'Strategic Culture Foundation'
category = 'news, politics'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
language = 'en'
publication_type = 'newsportal'
masthead_url = 'http://www.strategic-culture.org/img/logo.jpg'
extra_css = '''
body{font-family: Arial, sans-serif}
h1{font-family: "Times New Roman",Times,serif}
img{margin-bottom: 0.8em}
'''
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
keep_only_tags = [
dict(name=['h1','p'])
,dict(name='div', attrs={'id':'cke_pastebin'})
]
remove_tags = [dict(name=['object','link','base','meta','iframe'])]
feeds = [
(u'News' , u'http://www.strategic-culture.org/blocks/news.html' )
,(u'Politics' , u'http://www.strategic-culture.org/rubrics/politics.html' )
,(u'Economics' , u'http://www.strategic-culture.org/rubrics/economics.html' )
,(u'History & Culture', u'http://www.strategic-culture.org/rubrics/history-and-culture.html')
,(u'Columnists' , u'http://www.strategic-culture.org/rubrics/columnists.html' )
]
def print_version(self, url):
return url.replace('-culture.org/news/','-culture.org/pview/')
def parse_index(self):
totalfeeds = []
lfeeds = self.get_feeds()
for feedobj in lfeeds:
feedtitle, feedurl = feedobj
self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
articles = []
soup = self.index_to_soup(feedurl)
if feedurl.endswith('news.html'):
clname = 'sini14'
else:
clname = 'h22'
checker = []
for item in soup.findAll('a', attrs={'class':clname}):
atag = item
url = atag['href']
title = self.tag_to_string(atag)
description = ''
daypart = url.rpartition('/')[0]
mpart,sep,day = daypart.rpartition('/')
ypart,sep,month = mpart.rpartition('/')
year = ypart.rpartition('/')[2]
date = strftime("%a, %d %b %Y %H:%M:%S +0000", time.strptime(day + "/" + month + "/" + year, "%d/%m/%Y"))
if url not in checker:
checker.append(url)
articles.append({
'title' :title
,'date' :date
,'url' :url
,'description':description
})
totalfeeds.append((feedtitle, articles))
return totalfeeds

Binary file not shown.

View File

@ -506,3 +506,17 @@ change_book_details_font_size_by = 0
# No compile: compile_gpm_templates = False
compile_gpm_templates = True
#: What format to default to when using the Tweak feature
# The Tweak feature of calibre allows direct editing of a book format.
# If multiple formats are available, calibre will offer you a choice
# of formats, defaulting to your preferred output format if it is available.
# Set this tweak to a specific value of 'EPUB' or 'AZW3' to always default
# to that format rather than your output format preference.
# Set to a value of 'remember' to use whichever format you chose last time you
# used the Tweak feature.
# Examples:
# default_tweak_format = None (Use output format)
# default_tweak_format = 'EPUB'
# default_tweak_format = 'remember'
default_tweak_format = None

View File

@ -20,7 +20,11 @@ vipy.session.initialize(project_name='calibre', src_dir=src_dir,
project_dir=project_dir, base_dir=project_dir)
def recipe_title_callback(raw):
return eval(raw.decode('utf-8')).replace(' ', '_')
try:
return eval(raw.decode('utf-8')).replace(u' ', u'_')
except:
print ('Failed to decode recipe title: %r'%raw)
raise
vipy.session.add_content_browser('<leader>r', 'Recipe',
vipy.session.glob_based_iterator(os.path.join(project_dir, 'recipes', '*.recipe')),

View File

@ -22,7 +22,8 @@ Do not modify it unless you know what you are doing.
import sys, os
path = os.environ.get('CALIBRE_PYTHON_PATH', {path!r})
sys.path.insert(0, path)
if path not in sys.path:
sys.path.insert(0, path)
sys.resources_location = os.environ.get('CALIBRE_RESOURCES_PATH', {resources!r})
sys.extensions_location = os.environ.get('CALIBRE_EXTENSIONS_PATH', {extensions!r})

View File

@ -41,8 +41,8 @@ binary_includes = [
'/usr/lib/libgthread-2.0.so.0',
'/usr/lib/libpng14.so.14',
'/usr/lib/libexslt.so.0',
MAGICK_PREFIX+'/lib/libMagickWand.so.4',
MAGICK_PREFIX+'/lib/libMagickCore.so.4',
MAGICK_PREFIX+'/lib/libMagickWand.so.5',
MAGICK_PREFIX+'/lib/libMagickCore.so.5',
'/usr/lib/libgcrypt.so.11',
'/usr/lib/libgpg-error.so.0',
'/usr/lib/libphonon.so.4',

View File

@ -429,7 +429,7 @@ class Py2App(object):
def add_imagemagick(self):
info('\nAdding ImageMagick')
for x in ('Wand', 'Core'):
self.install_dylib(os.path.join(SW, 'lib', 'libMagick%s.4.dylib'%x))
self.install_dylib(os.path.join(SW, 'lib', 'libMagick%s.5.dylib'%x))
idir = glob.glob(os.path.join(SW, 'lib', 'ImageMagick-*'))[-1]
dest = os.path.join(self.frameworks_dir, 'ImageMagick')
if os.path.exists(dest):

View File

@ -18,7 +18,7 @@ QT_DIR = 'Q:\\Qt\\4.8.1'
QT_DLLS = ['Core', 'Gui', 'Network', 'Svg', 'WebKit', 'Xml', 'XmlPatterns']
LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll'
SW = r'C:\cygwin\home\kovid\sw'
IMAGEMAGICK = os.path.join(SW, 'build', 'ImageMagick-6.6.6',
IMAGEMAGICK = os.path.join(SW, 'build', 'ImageMagick-6.7.6',
'VisualMagick', 'bin')
CRT = r'C:\Microsoft.VC90.CRT'

View File

@ -336,6 +336,8 @@ Index: src/PdfFiltersPrivate.cpp
ImageMagick
--------------
Get the source from: http://www.imagemagick.org/download/windows/ImageMagick-windows.zip
Edit VisualMagick/configure/configure.cpp to set
int projectType = MULTITHREADEDDLL;
@ -349,7 +351,10 @@ Edit magick/magick-config.h
Undefine ProvideDllMain and MAGICKCORE_X11_DELEGATE
Now open VisualMagick/VisualDynamicMT.sln set to Release
Remove the CORE_xlib and UTIL_Imdisplay project CORE_Magick++
Remove the CORE_xlib, UTIL_Imdisplay and CORE_Magick++ projects.
F7 for build project, you will get one error due to the removal of xlib, ignore
it.
calibre
---------

View File

@ -12,14 +12,14 @@ msgstr ""
"Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
"devel@lists.alioth.debian.org>\n"
"POT-Creation-Date: 2011-11-25 14:01+0000\n"
"PO-Revision-Date: 2012-04-28 10:42+0000\n"
"Last-Translator: Ferran Rius <frius64@hotmail.com>\n"
"PO-Revision-Date: 2012-05-03 16:09+0000\n"
"Last-Translator: Dídac Rios <didac@niorcs.com>\n"
"Language-Team: Catalan <linux@softcatala.org>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"X-Launchpad-Export-Date: 2012-04-29 04:45+0000\n"
"X-Generator: Launchpad (build 15149)\n"
"X-Launchpad-Export-Date: 2012-05-04 04:47+0000\n"
"X-Generator: Launchpad (build 15195)\n"
"Language: ca\n"
#. name for aaa
@ -9936,11 +9936,11 @@ msgstr "Ibani"
#. name for ica
msgid "Ede Ica"
msgstr ""
msgstr "Ede Ica"
#. name for ich
msgid "Etkywan"
msgstr ""
msgstr "Etkywan"
#. name for icl
msgid "Icelandic Sign Language"
@ -9952,7 +9952,7 @@ msgstr "Anglès crioll; Islander"
#. name for ida
msgid "Idakho-Isukha-Tiriki"
msgstr ""
msgstr "Idakho-Isukha-Tiriki"
#. name for idb
msgid "Indo-Portuguese"
@ -9960,15 +9960,15 @@ msgstr "Indo-portuguès"
#. name for idc
msgid "Idon"
msgstr ""
msgstr "Idon"
#. name for idd
msgid "Ede Idaca"
msgstr ""
msgstr "Ede Idaca"
#. name for ide
msgid "Idere"
msgstr ""
msgstr "Idere"
#. name for idi
msgid "Idi"
@ -9976,43 +9976,43 @@ msgstr ""
#. name for ido
msgid "Ido"
msgstr ""
msgstr "ido"
#. name for idr
msgid "Indri"
msgstr ""
msgstr "Indri"
#. name for ids
msgid "Idesa"
msgstr ""
msgstr "Idesa"
#. name for idt
msgid "Idaté"
msgstr ""
msgstr "Idaté"
#. name for idu
msgid "Idoma"
msgstr ""
msgstr "Idoma"
#. name for ifa
msgid "Ifugao; Amganad"
msgstr ""
msgstr "Ifugao; Amganad"
#. name for ifb
msgid "Ifugao; Batad"
msgstr ""
msgstr "Ifugao; Batad"
#. name for ife
msgid "Ifè"
msgstr ""
msgstr "Ifè"
#. name for iff
msgid "Ifo"
msgstr ""
msgstr "Ifo"
#. name for ifk
msgid "Ifugao; Tuwali"
msgstr ""
msgstr "Ifugao; Tuwali"
#. name for ifm
msgid "Teke-Fuumu"
@ -10020,15 +10020,15 @@ msgstr "Teke; Fuumu"
#. name for ifu
msgid "Ifugao; Mayoyao"
msgstr ""
msgstr "Ifugao; Mayoyao"
#. name for ify
msgid "Kallahan; Keley-I"
msgstr ""
msgstr "Kallahan; Keley-I"
#. name for igb
msgid "Ebira"
msgstr ""
msgstr "Ebira"
#. name for ige
msgid "Igede"

View File

@ -8,14 +8,14 @@ msgstr ""
"Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
"devel@lists.alioth.debian.org>\n"
"POT-Creation-Date: 2011-11-25 14:01+0000\n"
"PO-Revision-Date: 2012-03-25 12:19+0000\n"
"Last-Translator: Radan Putnik <srastral@gmail.com>\n"
"PO-Revision-Date: 2012-05-03 14:49+0000\n"
"Last-Translator: Иван Старчевић <ivanstar61@gmail.com>\n"
"Language-Team: Serbian <gnu@prevod.org>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"X-Launchpad-Export-Date: 2012-03-26 04:37+0000\n"
"X-Generator: Launchpad (build 15008)\n"
"X-Launchpad-Export-Date: 2012-05-04 04:47+0000\n"
"X-Generator: Launchpad (build 15195)\n"
"Language: sr\n"
#. name for aaa
@ -6152,7 +6152,7 @@ msgstr ""
#. name for deu
msgid "German"
msgstr "немачки"
msgstr "Немачки"
#. name for dev
msgid "Domung"
@ -8416,7 +8416,7 @@ msgstr "ирски"
#. name for glg
msgid "Galician"
msgstr ""
msgstr "Галицијски"
#. name for glh
msgid "Pashayi; Northwest"
@ -8472,11 +8472,11 @@ msgstr ""
#. name for gmh
msgid "German; Middle High (ca. 1050-1500)"
msgstr ""
msgstr "Немачки; средње високи (ca. 1050-1500)"
#. name for gml
msgid "German; Middle Low"
msgstr ""
msgstr "Немачки; средње низак"
#. name for gmm
msgid "Gbaya-Mbodomo"
@ -8792,7 +8792,7 @@ msgstr ""
#. name for gsg
msgid "German Sign Language"
msgstr ""
msgstr "Немачки језик"
#. name for gsl
msgid "Gusilay"
@ -8820,7 +8820,7 @@ msgstr ""
#. name for gsw
msgid "German; Swiss"
msgstr ""
msgstr "Немачки ; Швајцарска"
#. name for gta
msgid "Guató"
@ -17954,7 +17954,7 @@ msgstr ""
#. name for nds
msgid "German; Low"
msgstr ""
msgstr "Немачки; низак"
#. name for ndt
msgid "Ndunga"
@ -18778,7 +18778,7 @@ msgstr ""
#. name for nno
msgid "Norwegian Nynorsk"
msgstr "норвешки модерни"
msgstr "Норвешки модерни"
#. name for nnp
msgid "Naga; Wancho"
@ -18830,7 +18830,7 @@ msgstr ""
#. name for nob
msgid "Norwegian Bokmål"
msgstr ""
msgstr "Норвешки (књижевни)"
#. name for noc
msgid "Nuk"
@ -18886,7 +18886,7 @@ msgstr ""
#. name for nor
msgid "Norwegian"
msgstr "норвешки"
msgstr "Норвешки"
#. name for nos
msgid "Nisu; Eastern"
@ -19066,7 +19066,7 @@ msgstr ""
#. name for nsl
msgid "Norwegian Sign Language"
msgstr ""
msgstr "Норвешки језик"
#. name for nsm
msgid "Naga; Sumi"
@ -20406,7 +20406,7 @@ msgstr ""
#. name for pdc
msgid "German; Pennsylvania"
msgstr ""
msgstr "Немачки ; Пенсилванија"
#. name for pdi
msgid "Pa Di"
@ -22086,7 +22086,7 @@ msgstr ""
#. name for rmg
msgid "Norwegian; Traveller"
msgstr ""
msgstr "Норвешки; путнички"
#. name for rmh
msgid "Murkim"
@ -22871,7 +22871,7 @@ msgstr ""
#. name for sgg
msgid "Swiss-German Sign Language"
msgstr ""
msgstr "Швајцарско-Немачки језик"
#. name for sgh
msgid "Shughni"

View File

@ -26,7 +26,7 @@ def get_opts_from_parser(parser):
class Coffee(Command): # {{{
description = 'Compile coffeescript files into javascript'
COFFEE_DIRS = {'ebooks/oeb/display': 'display'}
COFFEE_DIRS = ('ebooks/oeb/display',)
def add_options(self, parser):
parser.add_option('--watch', '-w', action='store_true', default=False,
@ -47,49 +47,69 @@ class Coffee(Command): # {{{
except KeyboardInterrupt:
pass
def show_js(self, jsfile):
def show_js(self, raw):
from pygments.lexers import JavascriptLexer
from pygments.formatters import TerminalFormatter
from pygments import highlight
with open(jsfile, 'rb') as f:
raw = f.read()
print highlight(raw, JavascriptLexer(), TerminalFormatter())
def do_coffee_compile(self, opts, timestamp=False, ignore_errors=False):
for toplevel, dest in self.COFFEE_DIRS.iteritems():
dest = self.j(self.RESOURCES, dest)
for x in glob.glob(self.j(self.SRC, __appname__, toplevel, '*.coffee')):
js = self.j(dest, os.path.basename(x.rpartition('.')[0]+'.js'))
if self.newer(js, x):
print ('\t%sCompiling %s'%(time.strftime('[%H:%M:%S] ') if
timestamp else '', os.path.basename(x)))
try:
cs = subprocess.check_output(self.compiler +
[x]).decode('utf-8')
except Exception as e:
print ('\n\tCompilation of %s failed'%os.path.basename(x))
print (e)
if ignore_errors:
with open(js, 'wb') as f:
f.write('# Compilation from coffeescript failed')
else:
raise SystemExit(1)
else:
with open(js, 'wb') as f:
f.write(cs.encode('utf-8'))
if opts.show_js:
self.show_js(js)
print ('#'*80)
print ('#'*80)
src_files = {}
for src in self.COFFEE_DIRS:
for f in glob.glob(self.j(self.SRC, __appname__, src,
'*.coffee')):
bn = os.path.basename(f).rpartition('.')[0]
arcname = src.replace('/', '.') + '.' + bn + '.js'
src_files[arcname] = (f, os.stat(f).st_mtime)
existing = {}
dest = self.j(self.RESOURCES, 'compiled_coffeescript.zip')
if os.path.exists(dest):
with zipfile.ZipFile(dest, 'r') as zf:
for info in zf.infolist():
mtime = time.mktime(info.date_time + (0, 0, -1))
arcname = info.filename
if (arcname in src_files and src_files[arcname][1] <
mtime):
existing[arcname] = (zf.read(info), info)
todo = set(src_files) - set(existing)
updated = {}
for arcname in todo:
name = arcname.rpartition('.')[0]
print ('\t%sCompiling %s'%(time.strftime('[%H:%M:%S] ') if
timestamp else '', name))
src = src_files[arcname][0]
try:
js = subprocess.check_output(self.compiler +
[src]).decode('utf-8')
except Exception as e:
print ('\n\tCompilation of %s failed'%name)
print (e)
if ignore_errors:
js = u'# Compilation from coffeescript failed'
else:
raise SystemExit(1)
else:
if opts.show_js:
self.show_js(js)
print ('#'*80)
print ('#'*80)
zi = zipfile.ZipInfo()
zi.filename = arcname
zi.date_time = time.localtime()[:6]
updated[arcname] = (js.encode('utf-8'), zi)
if updated:
with zipfile.ZipFile(dest, 'w', zipfile.ZIP_STORED) as zf:
for raw, zi in updated.itervalues():
zf.writestr(zi, raw)
for raw, zi in existing.itervalues():
zf.writestr(zi, raw)
def clean(self):
for toplevel, dest in self.COFFEE_DIRS.iteritems():
dest = self.j(self.RESOURCES, dest)
for x in glob.glob(self.j(self.SRC, __appname__, toplevel, '*.coffee')):
x = x.rpartition('.')[0] + '.js'
x = self.j(dest, os.path.basename(x))
if os.path.exists(x):
os.remove(x)
x = self.j(self.RESOURCES, 'compiled_coffeescript.zip')
if os.path.exists(x):
os.remove(x)
# }}}
class Kakasi(Command): # {{{

View File

@ -4,7 +4,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
__appname__ = u'calibre'
numeric_version = (0, 8, 50)
numeric_version = (0, 8, 51)
__version__ = u'.'.join(map(unicode, numeric_version))
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"

View File

@ -302,7 +302,9 @@ class OutputFormatPlugin(Plugin):
:param item: The item (HTML file) being processed
:param stylizer: A Stylizer object containing the flattened styles for
item. You can get the style for any element by stylizer.style(element).
item. You can get the style for any element by
stylizer.style(element).
'''
pass

View File

@ -57,6 +57,7 @@ class ANDROID(USBMS):
0x4316 : [0x216],
0x42d6 : [0x216],
0x42d7 : [0x216],
0x42f7 : [0x216],
},
# Freescale
0x15a2 : {
@ -193,7 +194,7 @@ class ANDROID(USBMS):
'GT-I9003_CARD', 'XT912', 'FILE-CD_GADGET', 'RK29_SDK', 'MB855',
'XT910', 'BOOK_A10', 'USB_2.0_DRIVER', 'I9100T', 'P999DW',
'KTABLET_PC', 'INGENIC', 'GT-I9001_CARD', 'USB_2.0_DRIVER',
'GT-S5830L_CARD', 'UNIVERSE']
'GT-S5830L_CARD', 'UNIVERSE', 'XT875']
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
@ -201,7 +202,8 @@ class ANDROID(USBMS):
'ANDROID_MID', 'P990_SD_CARD', '.K080', 'LTE_CARD', 'MB853',
'A1-07___C0541A4F', 'XT912', 'MB855', 'XT910', 'BOOK_A10_CARD',
'USB_2.0_DRIVER', 'I9100T', 'P999DW_SD_CARD', 'KTABLET_PC',
'FILE-CD_GADGET', 'GT-I9001_CARD', 'USB_2.0_DRIVER']
'FILE-CD_GADGET', 'GT-I9001_CARD', 'USB_2.0_DRIVER', 'XT875',
'UMS_COMPOSITE']
OSX_MAIN_MEM = 'Android Device Main Memory'

View File

@ -92,6 +92,10 @@ class POCKETBOOK360(EB600):
name = 'PocketBook 360 Device Interface'
gui_name = 'PocketBook 360'
VENDOR_ID = [0x1f85, 0x525]
PRODUCT_ID = [0x1688, 0xa4a5]
BCD = [0x110]
FORMATS = ['epub', 'fb2', 'prc', 'mobi', 'pdf', 'djvu', 'rtf', 'chm', 'txt']

View File

@ -1,4 +1,25 @@
from __future__ import with_statement
__license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
class ConversionUserFeedBack(Exception):
def __init__(self, title, msg, level='info', det_msg=''):
''' Show a simple message to the user
:param title: The title (very short description)
:param msg: The message to show the user
:param level: Must be one of 'info', 'warn' or 'error'
:param det_msg: Optional detailed message to show the user
'''
import json
Exception.__init__(self, json.dumps({'msg':msg, 'level':level,
'det_msg':det_msg, 'title':title}))
self.title, self.msg, self.det_msg = title, msg, det_msg
self.level = level

View File

@ -15,6 +15,7 @@ from calibre.utils.logging import Log
from calibre.constants import preferred_encoding
from calibre.customize.conversion import OptionRecommendation
from calibre import patheq
from calibre.ebooks.conversion import ConversionUserFeedBack
USAGE = '%prog ' + _('''\
input_file output_file [options]
@ -304,7 +305,10 @@ def read_sr_patterns(path, log=None):
def main(args=sys.argv):
log = Log()
parser, plumber = create_option_parser(args, log)
opts = parser.parse_args(args)[0]
opts, leftover_args = parser.parse_args(args)
if len(leftover_args) > 3:
log.error('Extra arguments not understood:', u', '.join(leftover_args[3:]))
return 1
for x in ('read_metadata_from_opf', 'cover'):
if getattr(opts, x, None) is not None:
setattr(opts, x, abspath(getattr(opts, x)))
@ -317,7 +321,16 @@ def main(args=sys.argv):
if n.dest]
plumber.merge_ui_recommendations(recommendations)
plumber.run()
try:
plumber.run()
except ConversionUserFeedBack as e:
ll = {'info': log.info, 'warn': log.warn,
'error':log.error}.get(e.level, log.info)
ll(e.title)
if e.det_msg:
log.debug(e.detmsg)
ll(e.msg)
raise SystemExit(1)
log(_('Output saved to'), ' ', plumber.output)

View File

@ -207,7 +207,7 @@ class EPUBInput(InputFormatPlugin):
if rc:
cover_toc_item = None
for item in oeb.toc.iterdescendants():
if item.href == rc:
if item.href and item.href.partition('#')[0] == rc:
cover_toc_item = item
break
spine = {x.href for x in oeb.spine}

View File

@ -393,8 +393,14 @@ class EPUBOutput(OutputFormatPlugin):
for tag in XPath('//h:body/descendant::h:script')(root):
tag.getparent().remove(tag)
formchildren = XPath('./h:input|./h:button|./h:textarea|'
'./h:label|./h:fieldset|./h:legend')
for tag in XPath('//h:form')(root):
tag.getparent().remove(tag)
if formchildren(tag):
tag.getparent().remove(tag)
else:
# Not a real form
tag.tag = XHTML('div')
for tag in XPath('//h:center')(root):
tag.tag = XHTML('div')

View File

@ -12,7 +12,7 @@ class MOBIInput(InputFormatPlugin):
name = 'MOBI Input'
author = 'Kovid Goyal'
description = 'Convert MOBI files (.mobi, .prc, .azw) to HTML'
file_types = set(['mobi', 'prc', 'azw', 'azw3'])
file_types = set(['mobi', 'prc', 'azw', 'azw3', 'pobi'])
def convert(self, stream, options, file_ext, log,
accelerators):

View File

@ -343,21 +343,25 @@ OptionRecommendation(name='remove_fake_margins',
OptionRecommendation(name='margin_top',
recommended_value=5.0, level=OptionRecommendation.LOW,
help=_('Set the top margin in pts. Default is %default. '
'Setting this to less than zero will cause no margin to be set. '
'Note: 72 pts equals 1 inch')),
OptionRecommendation(name='margin_bottom',
recommended_value=5.0, level=OptionRecommendation.LOW,
help=_('Set the bottom margin in pts. Default is %default. '
'Setting this to less than zero will cause no margin to be set. '
'Note: 72 pts equals 1 inch')),
OptionRecommendation(name='margin_left',
recommended_value=5.0, level=OptionRecommendation.LOW,
help=_('Set the left margin in pts. Default is %default. '
'Setting this to less than zero will cause no margin to be set. '
'Note: 72 pts equals 1 inch')),
OptionRecommendation(name='margin_right',
recommended_value=5.0, level=OptionRecommendation.LOW,
help=_('Set the right margin in pts. Default is %default. '
'Setting this to less than zero will cause no margin to be set. '
'Note: 72 pts equals 1 inch')),
OptionRecommendation(name='change_justification',
@ -885,7 +889,10 @@ OptionRecommendation(name='search_replace',
self.log.debug('Resolved conversion options')
try:
self.log.debug('calibre version:', __version__)
self.log.debug(pprint.pformat(self.opts.__dict__))
odict = dict(self.opts.__dict__)
for x in ('username', 'password'):
odict.pop(x, None)
self.log.debug(pprint.pformat(odict))
except:
self.log.exception('Failed to get resolved conversion options')

View File

@ -5,7 +5,7 @@ __license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import re, error as re_error
import re
from math import ceil
from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator
from calibre.utils.logging import default_log
@ -184,7 +184,7 @@ class HeuristicProcessor(object):
except OverflowError:
# match.group(0) was too large to be compiled into a regex
continue
except re_error:
except re.error:
# the match was not a valid regular expression
continue

View File

@ -113,6 +113,11 @@ class HTMLFile(object):
raise IOError(msg)
raise IgnoreFile(msg, err.errno)
if not src:
if level == 0:
raise ValueError('The file %s is empty'%self.path)
self.is_binary = True
if not self.is_binary:
if not encoding:
encoding = detect_xml_encoding(src[:4096], verbose=verbose)[1]

View File

@ -18,7 +18,7 @@ from calibre.ebooks.metadata import check_isbn
from calibre.ebooks.metadata.sources.base import (Source, Option, fixcase,
fixauthors)
from calibre.ebooks.metadata.book.base import Metadata
from calibre.utils.date import parse_date
from calibre.utils.date import parse_only_date
from calibre.utils.localization import canonicalize_lang
class Worker(Thread): # Get details {{{
@ -471,7 +471,7 @@ class Worker(Thread): # Get details {{{
ans = x.tail
date = ans.rpartition('(')[-1].replace(')', '').strip()
date = self.delocalize_datestr(date)
return parse_date(date, assume_utc=True)
return parse_only_date(date, assume_utc=True)
def parse_language(self, pd):
for x in reversed(pd.xpath(self.language_xpath)):

View File

@ -306,10 +306,15 @@ class MOBIHeader(object): # {{{
self.extra_data_flags = 0
if self.has_extra_data_flags:
self.unknown4 = self.raw[184:192]
self.fdst_idx, self.fdst_count = struct.unpack_from(b'>LL',
self.raw, 192)
if self.fdst_count <= 1:
self.fdst_idx = NULL_INDEX
if self.file_version < 8:
self.first_text_record, self.last_text_record = \
struct.unpack_from(b'>HH', self.raw, 192)
self.fdst_count = struct.unpack_from(b'>L', self.raw, 196)
else:
self.fdst_idx, self.fdst_count = struct.unpack_from(b'>LL',
self.raw, 192)
if self.fdst_count <= 1:
self.fdst_idx = NULL_INDEX
(self.fcis_number, self.fcis_count, self.flis_number,
self.flis_count) = struct.unpack(b'>IIII',
self.raw[200:216])
@ -409,7 +414,11 @@ class MOBIHeader(object): # {{{
a('DRM Flags: %r'%self.drm_flags)
if self.has_extra_data_flags:
a('Unknown4: %r'%self.unknown4)
r('FDST Index', 'fdst_idx')
if hasattr(self, 'first_text_record'):
a('First content record: %d'%self.first_text_record)
a('Last content record: %d'%self.last_text_record)
else:
r('FDST Index', 'fdst_idx')
a('FDST Count: %d'% self.fdst_count)
r('FCIS number', 'fcis_number')
a('FCIS count: %d'% self.fcis_count)

View File

@ -111,7 +111,11 @@ def update_flow_links(mobi8_reader, resource_map, log):
continue
if not isinstance(flow, unicode):
flow = flow.decode(mr.header.codec)
try:
flow = flow.decode(mr.header.codec)
except UnicodeDecodeError:
log.error('Flow part has invalid %s encoded bytes'%mr.header.codec)
flow = flow.decode(mr.header.codec, 'replace')
# links to raster image files from image tags
# image_pattern

View File

@ -207,9 +207,9 @@ class Mobi8Reader(object):
fname = 'svgimg' + nstr + '.svg'
else:
# search for CDATA and if exists inline it
if flowpart.find('[CDATA[') >= 0:
if flowpart.find(b'[CDATA[') >= 0:
typ = 'css'
flowpart = '<style type="text/css">\n' + flowpart + '\n</style>\n'
flowpart = b'<style type="text/css">\n' + flowpart + b'\n</style>\n'
format = 'inline'
dir = None
fname = None

View File

@ -382,6 +382,7 @@ class MobiWriter(object):
first_image_record = len(self.records)
self.resources.serialize(self.records, used_images)
resource_record_count = len(self.records) - old
last_content_record = len(self.records) - 1
# FCIS/FLIS (Seems to serve no purpose)
flis_number = len(self.records)
@ -406,7 +407,7 @@ class MobiWriter(object):
# header
header_fields['first_resource_record'] = first_image_record
header_fields['exth_flags'] = 0b100001010000 # Kinglegen uses this
header_fields['fdst_record'] = NULL_INDEX
header_fields['fdst_record'] = pack(b'>HH', 1, last_content_record)
header_fields['fdst_count'] = 1 # Why not 0? Kindlegen uses 1
header_fields['flis_record'] = flis_number
header_fields['fcis_record'] = fcis_number

View File

@ -314,9 +314,9 @@ class KF8Writer(object):
return
# Flatten the ToC into a depth first list
fl = toc.iter() if is_periodical else toc.iterdescendants()
fl = toc.iterdescendants()
for i, item in enumerate(fl):
entry = {'id': id(item), 'index': i, 'href':item.href,
entry = {'id': id(item), 'index': i, 'href':item.href or '',
'label':(item.title or _('Unknown')),
'children':[]}
entry['depth'] = getattr(item, 'ncx_hlvl', 0)

View File

@ -138,6 +138,8 @@ class MOBIHeader(Header): # {{{
unknown2 = zeroes(8)
# 192: FDST
# In MOBI 6 the fdst record is instead two two byte fields storing the
# index of the first and last content records
fdst_record = DYN
fdst_count = DYN

View File

@ -966,7 +966,7 @@ class Manifest(object):
data = data.cssText
if isinstance(data, unicode):
data = data.encode('utf-8')
return data
return data + b'\n'
return str(data)
def __unicode__(self):

View File

@ -389,8 +389,17 @@ class CanonicalFragmentIdentifier
# Drill down into iframes, etc.
while true
target = cdoc.elementFromPoint x, y
if not target or target.localName == 'html'
log("No element at (#{ x }, #{ y })")
if not target or target.localName in ['html', 'body']
# We ignore both html and body even though body could
# have text nodes under it as performance is very poor if body
# has large margins/padding (for e.g. in fullscreen mode)
# A possible solution for this is to wrap all text node
# children of body in <span> but that is seriously ugly and
# might have side effects. Lets do this only if there are lots of
# books in the wild that actually have text children of body,
# and even in this case it might be better to change the input
# plugin to prevent this from happening.
# log("No element at (#{ x }, #{ y })")
return null
name = target.localName

View File

@ -0,0 +1,76 @@
#!/usr/bin/env coffee
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
###
Copyright 2012, Kovid Goyal <kovid@kovidgoyal.net>
Released under the GPLv3 License
###
body_height = () ->
db = document.body
dde = document.documentElement
if db? and dde?
return Math.max(db.scrollHeight, dde.scrollHeight, db.offsetHeight,
dde.offsetHeight, db.clientHeight, dde.clientHeight)
return 0
abstop = (elem) ->
ans = elem.offsetTop
while elem.offsetParent
elem = elem.offsetParent
ans += elem.offsetTop
return ans
class BookIndexing
###
This class is a namespace to expose indexing functions via the
window.book_indexing object. The most important functions are:
anchor_positions(): Get the absolute (document co-ordinate system) position
for elements with the specified id/name attributes.
###
constructor: () ->
this.cache = {}
this.body_height_at_last_check = null
cache_valid: (anchors) ->
for a in anchors
if not Object.prototype.hasOwnProperty.call(this.cache, a)
return false
for p of this.cache
if Object.prototype.hasOwnProperty.call(this.cache, p) and p not in anchors
return false
return true
anchor_positions: (anchors, use_cache=false) ->
if use_cache and body_height() == this.body_height_at_last_check and this.cache_valid(anchors)
return this.cache
ans = {}
for anchor in anchors
elem = document.getElementById(anchor)
if elem == null
# Look for an <a name="anchor"> element
try
result = document.evaluate(
".//*[local-name() = 'a' and @name='#{ anchor }']",
document.body, null,
XPathResult.FIRST_ORDERED_NODE_TYPE, null)
elem = result.singleNodeValue
catch error
# The anchor had a ' or other invalid char
elem = null
if elem == null
pos = body_height() + 10000
else
pos = abstop(elem)
ans[anchor] = pos
this.cache = ans
this.body_height_at_last_check = body_height()
return ans
if window?
window.book_indexing = new BookIndexing()

View File

@ -1,383 +0,0 @@
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008 Kovid Goyal <kovid at kovidgoyal.net>'
'''
Iterate over the HTML files in an ebook. Useful for writing viewers.
'''
import re, os, math
from cStringIO import StringIO
from PyQt4.Qt import QFontDatabase
from calibre.customize.ui import available_input_formats
from calibre.ebooks.metadata.opf2 import OPF
from calibre.ptempfile import TemporaryDirectory
from calibre.ebooks.chardet import xml_to_unicode
from calibre.utils.zipfile import safe_replace
from calibre.utils.config import DynamicConfig
from calibre.utils.logging import Log
from calibre import (guess_type, prints, prepare_string_for_xml,
xml_replace_entities)
from calibre.ebooks.oeb.transforms.cover import CoverManager
from calibre.constants import filesystem_encoding
TITLEPAGE = CoverManager.SVG_TEMPLATE.decode('utf-8').replace(\
'__ar__', 'none').replace('__viewbox__', '0 0 600 800'
).replace('__width__', '600').replace('__height__', '800')
BM_FIELD_SEP = u'*|!|?|*'
BM_LEGACY_ESC = u'esc-text-%&*#%(){}ads19-end-esc'
def character_count(html):
'''
Return the number of "significant" text characters in a HTML string.
'''
count = 0
strip_space = re.compile(r'\s+')
for match in re.finditer(r'>[^<]+<', html):
count += len(strip_space.sub(' ', match.group()))-2
return count
class UnsupportedFormatError(Exception):
def __init__(self, fmt):
Exception.__init__(self, _('%s format books are not supported')%fmt.upper())
class SpineItem(unicode):
def __new__(cls, path, mime_type=None):
ppath = path.partition('#')[0]
if not os.path.exists(path) and os.path.exists(ppath):
path = ppath
obj = super(SpineItem, cls).__new__(cls, path)
raw = open(path, 'rb').read()
raw, obj.encoding = xml_to_unicode(raw)
obj.character_count = character_count(raw)
obj.start_page = -1
obj.pages = -1
obj.max_page = -1
if mime_type is None:
mime_type = guess_type(obj)[0]
obj.mime_type = mime_type
return obj
class FakeOpts(object):
verbose = 0
breadth_first = False
max_levels = 5
input_encoding = None
def is_supported(path):
ext = os.path.splitext(path)[1].replace('.', '').lower()
ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext)
return ext in available_input_formats()
def write_oebbook(oeb, path):
from calibre.ebooks.oeb.writer import OEBWriter
from calibre import walk
w = OEBWriter()
w(oeb, path)
for f in walk(path):
if f.endswith('.opf'):
return f
class EbookIterator(object):
CHARACTERS_PER_PAGE = 1000
def __init__(self, pathtoebook, log=None):
self.log = log
if log is None:
self.log = Log()
pathtoebook = pathtoebook.strip()
self.pathtoebook = os.path.abspath(pathtoebook)
self.config = DynamicConfig(name='iterator')
ext = os.path.splitext(pathtoebook)[1].replace('.', '').lower()
ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext)
self.ebook_ext = ext.replace('original_', '')
def search(self, text, index, backwards=False):
text = prepare_string_for_xml(text.lower())
pmap = [(i, path) for i, path in enumerate(self.spine)]
if backwards:
pmap.reverse()
for i, path in pmap:
if (backwards and i < index) or (not backwards and i > index):
with open(path, 'rb') as f:
raw = f.read().decode(path.encoding)
try:
raw = xml_replace_entities(raw)
except:
pass
if text in raw.lower():
return i
def find_missing_css_files(self):
for x in os.walk(os.path.dirname(self.pathtoopf)):
for f in x[-1]:
if f.endswith('.css'):
yield os.path.join(x[0], f)
def find_declared_css_files(self):
for item in self.opf.manifest:
if item.mime_type and 'css' in item.mime_type.lower():
yield item.path
def find_embedded_fonts(self):
'''
This will become unnecessary once Qt WebKit supports the @font-face rule.
'''
css_files = set(self.find_declared_css_files())
if not css_files:
css_files = set(self.find_missing_css_files())
bad_map = {}
font_family_pat = re.compile(r'font-family\s*:\s*([^;]+)')
for csspath in css_files:
try:
css = open(csspath, 'rb').read().decode('utf-8', 'replace')
except:
continue
for match in re.compile(r'@font-face\s*{([^}]+)}').finditer(css):
block = match.group(1)
family = font_family_pat.search(block)
url = re.compile(r'url\s*\([\'"]*(.+?)[\'"]*\)', re.DOTALL).search(block)
if url:
path = url.group(1).split('/')
path = os.path.join(os.path.dirname(csspath), *path)
if not os.access(path, os.R_OK):
continue
id = QFontDatabase.addApplicationFont(path)
if id != -1:
families = [unicode(f) for f in QFontDatabase.applicationFontFamilies(id)]
if family:
family = family.group(1)
specified_families = [x.strip().replace('"',
'').replace("'", '') for x in family.split(',')]
aliasing_ok = False
for f in specified_families:
bad_map[f] = families[0]
if not aliasing_ok and f in families:
aliasing_ok = True
if not aliasing_ok:
prints('WARNING: Family aliasing not fully supported.')
prints('\tDeclared family: %r not in actual families: %r'
% (family, families))
else:
prints('Loaded embedded font:', repr(family))
if bad_map:
def prepend_embedded_font(match):
for bad, good in bad_map.items():
if bad in match.group(1):
prints('Substituting font family: %s -> %s'%(bad, good))
return match.group().replace(bad, '"%s"'%good)
from calibre.ebooks.chardet import force_encoding
for csspath in css_files:
with open(csspath, 'r+b') as f:
css = f.read()
enc = force_encoding(css, False)
css = css.decode(enc, 'replace')
ncss = font_family_pat.sub(prepend_embedded_font, css)
if ncss != css:
f.seek(0)
f.truncate()
f.write(ncss.encode(enc))
def __enter__(self, processed=False, only_input_plugin=False):
self.delete_on_exit = []
self._tdir = TemporaryDirectory('_ebook_iter')
self.base = self._tdir.__enter__()
if not isinstance(self.base, unicode):
self.base = self.base.decode(filesystem_encoding)
from calibre.ebooks.conversion.plumber import Plumber, create_oebbook
plumber = Plumber(self.pathtoebook, self.base, self.log)
plumber.setup_options()
if self.pathtoebook.lower().endswith('.opf'):
plumber.opts.dont_package = True
if hasattr(plumber.opts, 'no_process'):
plumber.opts.no_process = True
plumber.input_plugin.for_viewer = True
with plumber.input_plugin:
self.pathtoopf = plumber.input_plugin(open(plumber.input, 'rb'),
plumber.opts, plumber.input_fmt, self.log,
{}, self.base)
if not only_input_plugin:
if processed or plumber.input_fmt.lower() in ('pdb', 'pdf', 'rb') and \
not hasattr(self.pathtoopf, 'manifest'):
if hasattr(self.pathtoopf, 'manifest'):
self.pathtoopf = write_oebbook(self.pathtoopf, self.base)
self.pathtoopf = create_oebbook(self.log, self.pathtoopf,
plumber.opts)
if hasattr(self.pathtoopf, 'manifest'):
self.pathtoopf = write_oebbook(self.pathtoopf, self.base)
self.book_format = os.path.splitext(self.pathtoebook)[1][1:].upper()
if getattr(plumber.input_plugin, 'is_kf8', False):
self.book_format = 'KF8'
self.opf = getattr(plumber.input_plugin, 'optimize_opf_parsing', None)
if self.opf is None:
self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf))
self.language = self.opf.language
if self.language:
self.language = self.language.lower()
ordered = [i for i in self.opf.spine if i.is_linear] + \
[i for i in self.opf.spine if not i.is_linear]
self.spine = []
for i in ordered:
spath = i.path
mt = None
if i.idref is not None:
mt = self.opf.manifest.type_for_id(i.idref)
if mt is None:
mt = guess_type(spath)[0]
try:
self.spine.append(SpineItem(spath, mime_type=mt))
except:
self.log.warn('Missing spine item:', repr(spath))
cover = self.opf.cover
if self.ebook_ext in ('lit', 'mobi', 'prc', 'opf', 'fb2') and cover:
cfile = os.path.join(self.base, 'calibre_iterator_cover.html')
rcpath = os.path.relpath(cover, self.base).replace(os.sep, '/')
chtml = (TITLEPAGE%prepare_string_for_xml(rcpath, True)).encode('utf-8')
open(cfile, 'wb').write(chtml)
self.spine[0:0] = [SpineItem(cfile,
mime_type='application/xhtml+xml')]
self.delete_on_exit.append(cfile)
if self.opf.path_to_html_toc is not None and \
self.opf.path_to_html_toc not in self.spine:
try:
self.spine.append(SpineItem(self.opf.path_to_html_toc))
except:
import traceback
traceback.print_exc()
sizes = [i.character_count for i in self.spine]
self.pages = [math.ceil(i/float(self.CHARACTERS_PER_PAGE)) for i in sizes]
for p, s in zip(self.pages, self.spine):
s.pages = p
start = 1
for s in self.spine:
s.start_page = start
start += s.pages
s.max_page = s.start_page + s.pages - 1
self.toc = self.opf.toc
self.read_bookmarks()
return self
def parse_bookmarks(self, raw):
for line in raw.splitlines():
bm = None
if line.count('^') > 0:
tokens = line.rpartition('^')
title, ref = tokens[0], tokens[2]
try:
spine, _, pos = ref.partition('#')
spine = int(spine.strip())
except:
continue
bm = {'type':'legacy', 'title':title, 'spine':spine, 'pos':pos}
elif BM_FIELD_SEP in line:
try:
title, spine, pos = line.strip().split(BM_FIELD_SEP)
spine = int(spine)
except:
continue
# Unescape from serialization
pos = pos.replace(BM_LEGACY_ESC, u'^')
# Check for pos being a scroll fraction
try:
pos = float(pos)
except:
pass
bm = {'type':'cfi', 'title':title, 'pos':pos, 'spine':spine}
if bm:
self.bookmarks.append(bm)
def serialize_bookmarks(self, bookmarks):
dat = []
for bm in bookmarks:
if bm['type'] == 'legacy':
rec = u'%s^%d#%s'%(bm['title'], bm['spine'], bm['pos'])
else:
pos = bm['pos']
if isinstance(pos, (int, float)):
pos = unicode(pos)
else:
pos = pos.replace(u'^', BM_LEGACY_ESC)
rec = BM_FIELD_SEP.join([bm['title'], unicode(bm['spine']), pos])
dat.append(rec)
return (u'\n'.join(dat) +u'\n')
def read_bookmarks(self):
self.bookmarks = []
bmfile = os.path.join(self.base, 'META-INF', 'calibre_bookmarks.txt')
raw = ''
if os.path.exists(bmfile):
with open(bmfile, 'rb') as f:
raw = f.read()
else:
saved = self.config['bookmarks_'+self.pathtoebook]
if saved:
raw = saved
if not isinstance(raw, unicode):
raw = raw.decode('utf-8')
self.parse_bookmarks(raw)
def save_bookmarks(self, bookmarks=None):
if bookmarks is None:
bookmarks = self.bookmarks
dat = self.serialize_bookmarks(bookmarks)
if os.path.splitext(self.pathtoebook)[1].lower() == '.epub' and \
os.access(self.pathtoebook, os.R_OK):
try:
zf = open(self.pathtoebook, 'r+b')
except IOError:
return
safe_replace(zf, 'META-INF/calibre_bookmarks.txt',
StringIO(dat.encode('utf-8')),
add_missing=True)
else:
self.config['bookmarks_'+self.pathtoebook] = dat
def add_bookmark(self, bm):
self.bookmarks = [x for x in self.bookmarks if x['title'] !=
bm['title']]
self.bookmarks.append(bm)
self.save_bookmarks()
def set_bookmarks(self, bookmarks):
self.bookmarks = bookmarks
def __exit__(self, *args):
self._tdir.__exit__(*args)
for x in self.delete_on_exit:
if os.path.exists(x):
os.remove(x)
def get_preprocess_html(path_to_ebook, output):
from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
iterator = EbookIterator(path_to_ebook)
iterator.__enter__(only_input_plugin=True)
preprocessor = HTMLPreProcessor(None, False)
with open(output, 'wb') as out:
for path in iterator.spine:
with open(path, 'rb') as f:
html = f.read().decode('utf-8', 'replace')
html = preprocessor(html, get_preprocess_html=True)
out.write(html.encode('utf-8'))
out.write(b'\n\n' + b'-'*80 + b'\n\n')

View File

@ -0,0 +1,42 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os, re
from calibre.customize.ui import available_input_formats
def is_supported(path):
ext = os.path.splitext(path)[1].replace('.', '').lower()
ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext)
return ext in available_input_formats()
class UnsupportedFormatError(Exception):
def __init__(self, fmt):
Exception.__init__(self, _('%s format books are not supported')%fmt.upper())
def EbookIterator(*args, **kwargs):
'For backwards compatibility'
from calibre.ebooks.oeb.iterator.book import EbookIterator
return EbookIterator(*args, **kwargs)
def get_preprocess_html(path_to_ebook, output):
from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
iterator = EbookIterator(path_to_ebook)
iterator.__enter__(only_input_plugin=True, run_char_count=False,
read_anchor_map=False)
preprocessor = HTMLPreProcessor(None, False)
with open(output, 'wb') as out:
for path in iterator.spine:
with open(path, 'rb') as f:
html = f.read().decode('utf-8', 'replace')
html = preprocessor(html, get_preprocess_html=True)
out.write(html.encode('utf-8'))
out.write(b'\n\n' + b'-'*80 + b'\n\n')

View File

@ -0,0 +1,187 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
'''
Iterate over the HTML files in an ebook. Useful for writing viewers.
'''
import re, os, math
from functools import partial
from calibre.ebooks.metadata.opf2 import OPF
from calibre.ptempfile import TemporaryDirectory
from calibre.utils.config import DynamicConfig
from calibre.utils.logging import default_log
from calibre import (guess_type, prepare_string_for_xml,
xml_replace_entities)
from calibre.ebooks.oeb.transforms.cover import CoverManager
from calibre.ebooks.oeb.iterator.spine import (SpineItem, create_indexing_data)
from calibre.ebooks.oeb.iterator.bookmarks import BookmarksMixin
TITLEPAGE = CoverManager.SVG_TEMPLATE.decode('utf-8').replace(\
'__ar__', 'none').replace('__viewbox__', '0 0 600 800'
).replace('__width__', '600').replace('__height__', '800')
class FakeOpts(object):
verbose = 0
breadth_first = False
max_levels = 5
input_encoding = None
def write_oebbook(oeb, path):
from calibre.ebooks.oeb.writer import OEBWriter
from calibre import walk
w = OEBWriter()
w(oeb, path)
for f in walk(path):
if f.endswith('.opf'):
return f
class EbookIterator(BookmarksMixin):
CHARACTERS_PER_PAGE = 1000
def __init__(self, pathtoebook, log=None):
self.log = log or default_log
pathtoebook = pathtoebook.strip()
self.pathtoebook = os.path.abspath(pathtoebook)
self.config = DynamicConfig(name='iterator')
ext = os.path.splitext(pathtoebook)[1].replace('.', '').lower()
ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext)
self.ebook_ext = ext.replace('original_', '')
def search(self, text, index, backwards=False):
text = prepare_string_for_xml(text.lower())
pmap = [(i, path) for i, path in enumerate(self.spine)]
if backwards:
pmap.reverse()
for i, path in pmap:
if (backwards and i < index) or (not backwards and i > index):
with open(path, 'rb') as f:
raw = f.read().decode(path.encoding)
try:
raw = xml_replace_entities(raw)
except:
pass
if text in raw.lower():
return i
def __enter__(self, processed=False, only_input_plugin=False,
run_char_count=True, read_anchor_map=True):
''' Convert an ebook file into an exploded OEB book suitable for
display in viewers/preprocessing etc. '''
from calibre.ebooks.conversion.plumber import Plumber, create_oebbook
self.delete_on_exit = []
self._tdir = TemporaryDirectory('_ebook_iter')
self.base = self._tdir.__enter__()
plumber = Plumber(self.pathtoebook, self.base, self.log)
plumber.setup_options()
if self.pathtoebook.lower().endswith('.opf'):
plumber.opts.dont_package = True
if hasattr(plumber.opts, 'no_process'):
plumber.opts.no_process = True
plumber.input_plugin.for_viewer = True
with plumber.input_plugin, open(plumber.input, 'rb') as inf:
self.pathtoopf = plumber.input_plugin(inf,
plumber.opts, plumber.input_fmt, self.log,
{}, self.base)
if not only_input_plugin:
# Run the HTML preprocess/parsing from the conversion pipeline as
# well
if (processed or plumber.input_fmt.lower() in {'pdb', 'pdf', 'rb'}
and not hasattr(self.pathtoopf, 'manifest')):
if hasattr(self.pathtoopf, 'manifest'):
self.pathtoopf = write_oebbook(self.pathtoopf, self.base)
self.pathtoopf = create_oebbook(self.log, self.pathtoopf,
plumber.opts)
if hasattr(self.pathtoopf, 'manifest'):
self.pathtoopf = write_oebbook(self.pathtoopf, self.base)
self.book_format = os.path.splitext(self.pathtoebook)[1][1:].upper()
if getattr(plumber.input_plugin, 'is_kf8', False):
self.book_format = 'KF8'
self.opf = getattr(plumber.input_plugin, 'optimize_opf_parsing', None)
if self.opf is None:
self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf))
self.language = self.opf.language
if self.language:
self.language = self.language.lower()
ordered = [i for i in self.opf.spine if i.is_linear] + \
[i for i in self.opf.spine if not i.is_linear]
self.spine = []
Spiny = partial(SpineItem, read_anchor_map=read_anchor_map,
run_char_count=run_char_count)
for i in ordered:
spath = i.path
mt = None
if i.idref is not None:
mt = self.opf.manifest.type_for_id(i.idref)
if mt is None:
mt = guess_type(spath)[0]
try:
self.spine.append(Spiny(spath, mime_type=mt))
except:
self.log.warn('Missing spine item:', repr(spath))
cover = self.opf.cover
if cover and self.ebook_ext in {'lit', 'mobi', 'prc', 'opf', 'fb2',
'azw', 'azw3'}:
cfile = os.path.join(self.base, 'calibre_iterator_cover.html')
rcpath = os.path.relpath(cover, self.base).replace(os.sep, '/')
chtml = (TITLEPAGE%prepare_string_for_xml(rcpath, True)).encode('utf-8')
with open(cfile, 'wb') as f:
f.write(chtml)
self.spine[0:0] = [Spiny(cfile,
mime_type='application/xhtml+xml')]
self.delete_on_exit.append(cfile)
if self.opf.path_to_html_toc is not None and \
self.opf.path_to_html_toc not in self.spine:
try:
self.spine.append(Spiny(self.opf.path_to_html_toc))
except:
import traceback
traceback.print_exc()
sizes = [i.character_count for i in self.spine]
self.pages = [math.ceil(i/float(self.CHARACTERS_PER_PAGE)) for i in sizes]
for p, s in zip(self.pages, self.spine):
s.pages = p
start = 1
for s in self.spine:
s.start_page = start
start += s.pages
s.max_page = s.start_page + s.pages - 1
self.toc = self.opf.toc
if read_anchor_map:
create_indexing_data(self.spine, self.toc)
self.read_bookmarks()
return self
def __exit__(self, *args):
self._tdir.__exit__(*args)
for x in self.delete_on_exit:
try:
os.remove(x)
except:
pass

View File

@ -0,0 +1,105 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os
from io import BytesIO
from calibre.utils.zipfile import safe_replace
BM_FIELD_SEP = u'*|!|?|*'
BM_LEGACY_ESC = u'esc-text-%&*#%(){}ads19-end-esc'
class BookmarksMixin(object):
def parse_bookmarks(self, raw):
for line in raw.splitlines():
bm = None
if line.count('^') > 0:
tokens = line.rpartition('^')
title, ref = tokens[0], tokens[2]
try:
spine, _, pos = ref.partition('#')
spine = int(spine.strip())
except:
continue
bm = {'type':'legacy', 'title':title, 'spine':spine, 'pos':pos}
elif BM_FIELD_SEP in line:
try:
title, spine, pos = line.strip().split(BM_FIELD_SEP)
spine = int(spine)
except:
continue
# Unescape from serialization
pos = pos.replace(BM_LEGACY_ESC, u'^')
# Check for pos being a scroll fraction
try:
pos = float(pos)
except:
pass
bm = {'type':'cfi', 'title':title, 'pos':pos, 'spine':spine}
if bm:
self.bookmarks.append(bm)
def serialize_bookmarks(self, bookmarks):
dat = []
for bm in bookmarks:
if bm['type'] == 'legacy':
rec = u'%s^%d#%s'%(bm['title'], bm['spine'], bm['pos'])
else:
pos = bm['pos']
if isinstance(pos, (int, float)):
pos = unicode(pos)
else:
pos = pos.replace(u'^', BM_LEGACY_ESC)
rec = BM_FIELD_SEP.join([bm['title'], unicode(bm['spine']), pos])
dat.append(rec)
return (u'\n'.join(dat) +u'\n')
def read_bookmarks(self):
self.bookmarks = []
bmfile = os.path.join(self.base, 'META-INF', 'calibre_bookmarks.txt')
raw = ''
if os.path.exists(bmfile):
with open(bmfile, 'rb') as f:
raw = f.read()
else:
saved = self.config['bookmarks_'+self.pathtoebook]
if saved:
raw = saved
if not isinstance(raw, unicode):
raw = raw.decode('utf-8')
self.parse_bookmarks(raw)
def save_bookmarks(self, bookmarks=None):
if bookmarks is None:
bookmarks = self.bookmarks
dat = self.serialize_bookmarks(bookmarks)
if os.path.splitext(self.pathtoebook)[1].lower() == '.epub' and \
os.access(self.pathtoebook, os.R_OK):
try:
zf = open(self.pathtoebook, 'r+b')
except IOError:
return
safe_replace(zf, 'META-INF/calibre_bookmarks.txt',
BytesIO(dat.encode('utf-8')),
add_missing=True)
else:
self.config['bookmarks_'+self.pathtoebook] = dat
def add_bookmark(self, bm):
self.bookmarks = [x for x in self.bookmarks if x['title'] !=
bm['title']]
self.bookmarks.append(bm)
self.save_bookmarks()
def set_bookmarks(self, bookmarks):
self.bookmarks = bookmarks

View File

@ -0,0 +1,120 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
from future_builtins import map
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import re, os
from functools import partial
from operator import attrgetter
from collections import namedtuple
from calibre import guess_type
from calibre.ebooks.chardet import xml_to_unicode
def character_count(html):
''' Return the number of "significant" text characters in a HTML string. '''
count = 0
strip_space = re.compile(r'\s+')
for match in re.finditer(r'>[^<]+<', html):
count += len(strip_space.sub(' ', match.group()))-2
return count
def anchor_map(html):
''' Return map of all anchor names to their offsets in the html '''
ans = {}
for match in re.finditer(
r'''(?:id|name)\s*=\s*['"]([^'"]+)['"]''', html):
anchor = match.group(0)
ans[anchor] = ans.get(anchor, match.start())
return ans
class SpineItem(unicode):
def __new__(cls, path, mime_type=None, read_anchor_map=True,
run_char_count=True):
ppath = path.partition('#')[0]
if not os.path.exists(path) and os.path.exists(ppath):
path = ppath
obj = super(SpineItem, cls).__new__(cls, path)
with open(path, 'rb') as f:
raw = f.read()
raw, obj.encoding = xml_to_unicode(raw)
obj.character_count = character_count(raw) if run_char_count else 10000
obj.anchor_map = anchor_map(raw) if read_anchor_map else {}
obj.start_page = -1
obj.pages = -1
obj.max_page = -1
obj.index_entries = []
if mime_type is None:
mime_type = guess_type(obj)[0]
obj.mime_type = mime_type
return obj
class IndexEntry(object):
def __init__(self, spine, toc_entry, num):
self.num = num
self.text = toc_entry.text or _('Unknown')
self.key = toc_entry.abspath
self.anchor = self.start_anchor = toc_entry.fragment or None
try:
self.spine_pos = spine.index(self.key)
except ValueError:
self.spine_pos = -1
self.anchor_pos = 0
if self.spine_pos > -1:
self.anchor_pos = spine[self.spine_pos].anchor_map.get(self.anchor,
0)
self.depth = 0
p = toc_entry.parent
while p is not None:
self.depth += 1
p = p.parent
self.sort_key = (self.spine_pos, self.anchor_pos)
self.spine_count = len(spine)
def find_end(self, all_entries):
potential_enders = [i for i in all_entries if
i.depth <= self.depth and
(
(i.spine_pos == self.spine_pos and i.anchor_pos >
self.anchor_pos)
or
i.spine_pos > self.spine_pos
)]
if potential_enders:
# potential_enders is sorted by (spine_pos, anchor_pos)
end = potential_enders[0]
self.end_spine_pos = end.spine_pos
self.end_anchor = end.anchor
else:
self.end_spine_pos = self.spine_count - 1
self.end_anchor = None
def create_indexing_data(spine, toc):
if not toc: return
f = partial(IndexEntry, spine)
index_entries = list(map(f,
(t for t in toc.flat() if t is not toc),
(i-1 for i, t in enumerate(toc.flat()) if t is not toc)
))
index_entries.sort(key=attrgetter('sort_key'))
[ i.find_end(index_entries) for i in index_entries ]
ie = namedtuple('IndexEntry', 'entry start_anchor end_anchor')
for spine_pos, spine_item in enumerate(spine):
for i in index_entries:
if i.end_spine_pos < spine_pos or i.spine_pos > spine_pos:
continue # Does not touch this file
start = i.anchor if i.spine_pos == spine_pos else None
end = i.end_anchor if i.spine_pos == spine_pos else None
spine_item.index_entries.append(ie(i, start, end))

View File

@ -361,9 +361,11 @@ def parse_html(data, log=None, decoder=None, preprocessor=None,
# Remove any encoding-specifying <meta/> elements
for meta in META_XP(data):
meta.getparent().remove(meta)
etree.SubElement(head, XHTML('meta'),
attrib={'http-equiv': 'Content-Type',
'content': '%s; charset=utf-8' % XHTML_NS})
meta = etree.SubElement(head, XHTML('meta'),
attrib={'http-equiv': 'Content-Type'})
meta.set('content', 'text/html; charset=utf-8') # Ensure content is second
# attribute
# Ensure has a <body/>
if not xpath(data, '/h:html/h:body'):
body = xpath(data, '//h:body')

View File

@ -347,7 +347,11 @@ class Stylizer(object):
style = self.flatten_style(rule.style)
self.page_rule.update(style)
elif isinstance(rule, CSSFontFaceRule):
self.font_face_rules.append(rule)
if rule.style.length > 1:
# Ignore the meaningless font face rules generated by the
# benighted MS Word that contain only a font-family declaration
# and nothing else
self.font_face_rules.append(rule)
return results
def flatten_style(self, cssstyle):

View File

@ -157,10 +157,12 @@ class CSSFlattener(object):
bs = body.get('style', '').split(';')
bs.append('margin-top: 0pt')
bs.append('margin-bottom: 0pt')
bs.append('margin-left : %fpt'%\
float(self.context.margin_left))
bs.append('margin-right : %fpt'%\
float(self.context.margin_right))
if float(self.context.margin_left) >= 0:
bs.append('margin-left : %gpt'%\
float(self.context.margin_left))
if float(self.context.margin_right) >= 0:
bs.append('margin-right : %gpt'%\
float(self.context.margin_right))
bs.extend(['padding-left: 0pt', 'padding-right: 0pt'])
if self.page_break_on_body:
bs.extend(['page-break-before: always'])
@ -393,10 +395,11 @@ class CSSFlattener(object):
l = etree.SubElement(head, XHTML('link'),
rel='stylesheet', type=CSS_MIME, href=href)
l.tail='\n'
href = item.relhref(global_href)
l = etree.SubElement(head, XHTML('link'),
rel='stylesheet', type=CSS_MIME, href=href)
l.tail = '\n'
if global_href:
href = item.relhref(global_href)
l = etree.SubElement(head, XHTML('link'),
rel='stylesheet', type=CSS_MIME, href=href)
l.tail = '\n'
def replace_css(self, css):
manifest = self.oeb.manifest
@ -413,14 +416,16 @@ class CSSFlattener(object):
global_css = defaultdict(list)
for item in self.oeb.spine:
stylizer = self.stylizers[item]
stylizer.page_rule['margin-top'] = '%gpt'%\
float(self.context.margin_top)
stylizer.page_rule['margin-bottom'] = '%gpt'%\
float(self.context.margin_bottom)
if float(self.context.margin_top) >= 0:
stylizer.page_rule['margin-top'] = '%gpt'%\
float(self.context.margin_top)
if float(self.context.margin_bottom) >= 0:
stylizer.page_rule['margin-bottom'] = '%gpt'%\
float(self.context.margin_bottom)
items = stylizer.page_rule.items()
items.sort()
css = ';\n'.join("%s: %s" % (key, val) for key, val in items)
css = '@page {\n%s\n}\n'%css
css = ('@page {\n%s\n}\n'%css) if items else ''
rules = [r.cssText for r in stylizer.font_face_rules]
raw = '\n\n'.join(rules)
css += '\n\n' + raw
@ -429,9 +434,11 @@ class CSSFlattener(object):
gc_map = {}
manifest = self.oeb.manifest
for css in global_css:
id_, href = manifest.generate('page_css', 'page_styles.css')
manifest.add(id_, href, CSS_MIME, data=cssutils.parseString(css,
validate=False))
href = None
if css.strip():
id_, href = manifest.generate('page_css', 'page_styles.css')
manifest.add(id_, href, CSS_MIME, data=cssutils.parseString(css,
validate=False))
gc_map[css] = href
ans = {}

View File

@ -6,7 +6,7 @@ __license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os
import os, re
from calibre.utils.date import isoformat, now
from calibre import guess_type
@ -141,7 +141,7 @@ class MergeMetadata(object):
item = self.oeb.manifest.hrefs[old_cover.href]
if not cdata:
return item.id
self.oeb.manifest.remove(item)
self.remove_old_cover(item)
elif not cdata:
id = self.oeb.manifest.generate(id='cover')
self.oeb.manifest.add(id, old_cover.href, 'image/jpeg')
@ -152,3 +152,41 @@ class MergeMetadata(object):
self.oeb.guide.add('cover', 'Cover', href)
return id
def remove_old_cover(self, cover_item):
from calibre.ebooks.oeb.base import XPath
from lxml import etree
self.oeb.manifest.remove(cover_item)
# Remove any references to the cover in the HTML
affected_items = set()
for item in self.oeb.spine:
try:
images = XPath('//h:img[@src]')(item.data)
except:
images = []
removed = False
for img in images:
href = item.abshref(img.get('src'))
if href == cover_item.href:
img.getparent().remove(img)
removed = True
if removed:
affected_items.add(item)
# Check if the resulting HTML has no content, if so remove it
for item in affected_items:
body = XPath('//h:body')(item.data)
if body:
text = etree.tostring(body[0], method='text', encoding=unicode)
else:
text = ''
text = re.sub(r'\s+', '', text)
if not text and not XPath('//h:img|//svg:svg')(item.data):
self.log('Removing %s as it is a wrapper around'
' the cover image'%item.href)
self.oeb.spine.remove(item)
self.oeb.manifest.remove(item)

View File

@ -372,8 +372,8 @@ class ParseRtf:
old_rtf = old_rtf_obj.check_if_old_rtf()
if old_rtf:
if self.__run_level > 5:
msg = 'Older RTF\n'
msg += 'self.__run_level is "%s"\n' % self.__run_level
msg = 'Older RTF\n' \
'self.__run_level is "%s"\n' % self.__run_level
raise RtfInvalidCodeException, msg
if self.__run_level > 1:
sys.stderr.write('File could be older RTF...\n')
@ -381,7 +381,7 @@ class ParseRtf:
if self.__run_level > 1:
sys.stderr.write(
'File also has newer RTF.\n'
'Will do the best to convert.\n'
'Will do the best to convert...\n'
)
add_brackets_obj = add_brackets.AddBrackets(
in_file = self.__temp_file,

View File

@ -20,6 +20,9 @@ class AddBrackets:
"""
Add brackets for old RTF.
Logic:
When control words without their own brackets are encountered
and in the list of allowed words, this will add brackets
to facilitate the treatment of the file
"""
def __init__(self, in_file,
bug_handler,
@ -41,53 +44,56 @@ class AddBrackets:
self.__copy = copy
self.__write_to = better_mktemp()
self.__run_level = run_level
def __initiate_values(self):
"""
"""
self.__state_dict = {
'before_body' : self.__before_body_func,
'in_body' : self.__in_body_func,
'after_control_word' : self.__after_control_word_func,
'in_ignore' : self.__ignore_func,
}
self.__accept = [
'cw<ci<bold______' ,
'cw<ci<annotation' ,
'cw<ci<blue______' ,
# 'cw<ci<bold______' ,
'cw<ci<caps______' ,
'cw<ci<char-style' ,
'cw<ci<dbl-strike' ,
'cw<ci<emboss____' ,
'cw<ci<engrave___' ,
'cw<ci<font-color' ,
'cw<ci<font-down_' ,
'cw<ci<font-size_' ,
'cw<ci<font-style' ,
'cw<ci<font-up___' ,
'cw<ci<footnot-mk' ,
'cw<ci<green_____' ,
'cw<ci<hidden____' ,
'cw<ci<italics___' ,
'cw<ci<outline___' ,
'cw<ci<red_______' ,
'cw<ci<shadow____' ,
'cw<ci<small-caps' ,
'cw<ci<strike-thr' ,
'cw<ci<subscript_' ,
'cw<ci<superscrip' ,
'cw<ci<underlined' ,
# 'cw<ul<underlined' ,
]
def __initiate_values(self):
"""
Init temp values
"""
self.__state = 'before_body'
self.__inline = {}
self.__temp_group = []
self.__open_bracket = 0
self.__found_brackets = 0
self.__accept = [
'cw<ci<bold______',
'cw<ci<annotation' ,
'cw<ci<blue______' ,
'cw<ci<bold______' ,
'cw<ci<caps______' ,
'cw<ci<char-style' ,
'cw<ci<dbl-strike' ,
'cw<ci<emboss____' ,
'cw<ci<engrave___' ,
'cw<ci<font-color' ,
'cw<ci<font-down_' ,
'cw<ci<font-size_' ,
'cw<ci<font-style' ,
'cw<ci<font-up___',
'cw<ci<footnot-mk',
'cw<ci<green_____' ,
'cw<ci<hidden____',
'cw<ci<italics___' ,
'cw<ci<outline___',
'cw<ci<red_______' ,
'cw<ci<shadow____',
'cw<ci<small-caps' ,
'cw<ci<strike-thr',
'cw<ci<subscript_' ,
'cw<ci<superscrip',
'cw<ci<underlined' ,
# 'cw<ul<underlined' ,
]
self.__open_bracket = False
self.__found_brackets = False
def __before_body_func(self, line):
"""
If we are before the body, not interest in changing anything
"""
if self.__token_info == 'mi<mk<body-open_':
self.__state = 'in_body'
@ -95,6 +101,14 @@ class AddBrackets:
def __in_body_func(self, line):
"""
Select what action to take in body:
1-At the end of the file close the braket if a bracket was opened
This happens if there is achange
2-If an open bracket is found the code inside is ignore
(written without modifications)
3-If an accepted control word is found put the line
in a buffer then chage state to after cw
4-Else simply write the line
"""
if line == 'cb<nu<clos-brack<0001\n' and self.__open_bracket:
self.__write_obj.write(
@ -102,7 +116,7 @@ class AddBrackets:
)
self.__write_obj.write(line)
elif self.__token_info == 'ob<nu<open-brack':
self.__found_brackets = 1
self.__found_brackets = True
self.__state = 'in_ignore'
self.__ignore_count = self.__ob_count
self.__write_obj.write(line)
@ -114,6 +128,10 @@ class AddBrackets:
def __after_control_word_func(self, line):
"""
After a cw either add next allowed cw to temporary list or
change groupe and write it.
If the token leading to an exit is an open bracket go to
ignore otherwise goto in body
"""
if self.__token_info in self.__accept:
self.__temp_group.append(line)
@ -129,82 +147,84 @@ class AddBrackets:
def __write_group(self):
"""
Write a tempory group after accepted control words end
But this is mostly useless in my opinion as there is no list of rejected cw
This may be a way to implement future old rtf processing for cw
Utility: open a group to just put brackets but why be so complicated?
Scheme: open brackets, write cw then go to body and back with cw after
"""
if self.__open_bracket:
self.__write_obj.write(
'cb<nu<clos-brack<0003\n'
)
self.__open_bracket = 0
inline_string = ''
the_keys = self.__inline.keys()
for the_key in the_keys:
value = self.__inline[the_key]
if value != 'false':
inline_string += '%s<nu<%s\n' % (the_key, value)
self.__open_bracket = False
inline_string = ''.join(['%s<nu<%s\n' % (k, v) \
for k, v in self.__inline.iteritems() \
if v != 'false'])
if inline_string:
self.__write_obj.write('ob<nu<open-brack<0003\n')
self.__write_obj.write(inline_string)
self.__open_bracket = 1
self.__write_obj.write('ob<nu<open-brack<0003\n'
'%s' % inline_string)
self.__open_bracket = True
self.__temp_group = []
def __change_permanent_group(self):
"""
use temp group to change permanent group
Use temp group to change permanent group
If the control word is not accepted remove it
What is the interest as it is build to accept only accepted cw
in __after_control_word_func?
"""
for line in self.__temp_group:
token_info = line[:16]
if token_info in self.__accept:
att = line[20:-1]
self.__inline[token_info] = att
self.__inline = {line[:16] : line[20:-1]\
for line in self.__temp_group\
# Is this really necessary?
if line[:16] in self.__accept}
def __ignore_func(self, line):
"""
Don't add any brackets while inside of brackets RTF has already
added.
Just copy data inside of RTF brackets already here.
"""
self.__write_obj.write(line)
if self.__token_info == 'cb<nu<clos-brack'and\
self.__cb_count == self.__ignore_count:
if self.__token_info == 'cb<nu<clos-brack'\
and self.__cb_count == self.__ignore_count:
self.__state = 'in_body'
def __check_brackets(self, in_file):
self.__check_brack_obj = check_brackets.CheckBrackets\
"""
Return True if brackets match
"""
check_brack_obj = check_brackets.CheckBrackets\
(file = in_file)
good_br = self.__check_brack_obj.check_brackets()[0]
if not good_br:
return 1
return check_brack_obj.check_brackets()[0]
def add_brackets(self):
"""
"""
self.__initiate_values()
read_obj = open(self.__file, 'r')
self.__write_obj = open(self.__write_to, 'w')
line_to_read = 1
while line_to_read:
line_to_read = read_obj.readline()
line = line_to_read
self.__token_info = line[:16]
if self.__token_info == 'ob<nu<open-brack':
self.__ob_count = line[-5:-1]
if self.__token_info == 'cb<nu<clos-brack':
self.__cb_count = line[-5:-1]
action = self.__state_dict.get(self.__state)
if action == None:
sys.stderr.write('No matching state in module add_brackets.py\n')
sys.stderr.write(self.__state + '\n')
action(line)
read_obj.close()
self.__write_obj.close()
bad_brackets = self.__check_brackets(self.__write_to)
if not bad_brackets:
with open(self.__file, 'r') as read_obj:
with open(self.__write_to, 'w') as self.__write_obj:
for line in read_obj:
self.__token_info = line[:16]
if self.__token_info == 'ob<nu<open-brack':
self.__ob_count = line[-5:-1]
if self.__token_info == 'cb<nu<clos-brack':
self.__cb_count = line[-5:-1]
action = self.__state_dict.get(self.__state)
if action is None:
sys.stderr.write(
'No matching state in module add_brackets.py\n'
'%s\n' % self.__state)
action(line)
#Check bad brackets
if self.__check_brackets(self.__write_to):
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy:
copy_obj.copy_file(self.__write_to, "add_brackets.data")
copy_obj.rename(self.__write_to, self.__file)
copy_obj.rename(self.__write_to, self.__file)
else:
if self.__run_level > 0:
sys.stderr.write(
'Sorry, but this files has a mix of old and new RTF.\n'
'Some characteristics cannot be converted.\n')
os.remove(self.__write_to)
os.remove(self.__write_to)

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,5 @@
import os, sys
from codecs import EncodedFile
from calibre.ebooks.rtf2xml import copy, check_encoding
from calibre.ptempfile import better_mktemp
@ -41,6 +42,7 @@ class ConvertToTags:
self.__run_level = run_level
self.__write_to = better_mktemp()
self.__convert_utf = False
self.__bad_encoding = False
def __initiate_values(self):
"""
@ -213,13 +215,14 @@ class ConvertToTags:
if not check_encoding_obj.check_encoding(self.__file, verbose=False):
self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
elif not check_encoding_obj.check_encoding(self.__file, self.__encoding):
elif not check_encoding_obj.check_encoding(self.__file, self.__encoding, verbose=False):
self.__write_obj.write('<?xml version="1.0" encoding="UTF-8" ?>')
self.__convert_utf = True
else:
self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
sys.stderr.write('Bad RTF encoding, revert to US-ASCII chars and'
' hope for the best')
self.__bad_encoding = True
self.__new_line = 0
self.__write_new_line()
if self.__no_dtd:
@ -247,7 +250,7 @@ class ConvertToTags:
the appropriate function.
The functions that are called:
a text function for text
an open funciton for open tags
an open function for open tags
an open with attribute function for tags with attributes
an empty with attribute function for tags that are empty but have
attribtes.
@ -263,20 +266,19 @@ class ConvertToTags:
action = self.__state_dict.get(self.__token_info)
if action is not None:
action(line)
self.__write_obj.close()
#convert all encodings to UTF8 to avoid unsupported encodings in lxml
if self.__convert_utf:
#convert all encodings to UTF8 or ASCII to avoid unsupported encodings in lxml
if self.__convert_utf or self.__bad_encoding:
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
copy_obj.rename(self.__write_to, self.__file)
file_encoding = "utf-8"
if self.__bad_encoding:
file_encoding = "us-ascii"
with open(self.__file, 'r') as read_obj:
with open(self.__write_to, 'w') as write_obj:
file = read_obj.read()
try:
file = file.decode(self.__encoding)
write_obj.write(file.encode('utf-8'))
except:
sys.stderr.write('Conversion to UTF-8 is not possible,'
' encoding should be very carefully checked')
write_objenc = EncodedFile(write_obj, self.__encoding,
file_encoding, 'replace')
for line in read_obj:
write_objenc.write(line)
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy:
copy_obj.copy_file(self.__write_to, "convert_to_tags.data")

View File

@ -11,6 +11,7 @@
# #
#########################################################################
import sys, os
from calibre.ebooks.rtf2xml import copy
from calibre.ptempfile import better_mktemp
@ -31,29 +32,29 @@ class Header:
self.__bug_handler = bug_handler
self.__copy = copy
self.__write_to = better_mktemp()
self.__found_a_header = 0
self.__found_a_header = False
def __in_header_func(self, line):
"""
Handle all tokens that are part of header
"""
if self.__cb_count == self.__header_bracket_count:
self.__in_header = 0
self.__in_header = False
self.__write_obj.write(line)
self.__write_to_head_obj.write(
'mi<mk<head___clo\n')
self.__write_to_head_obj.write(
'mi<tg<close_____<header-or-footer\n')
self.__write_to_head_obj.write(
'mi<mk<head___clo\n' \
'mi<tg<close_____<header-or-footer\n' \
'mi<mk<header-clo\n')
else:
self.__write_to_head_obj.write(line)
def __found_header(self, line):
"""
Found a header
"""
# but this could be header or footer
self.__found_a_header = 1
self.__in_header = 1
self.__found_a_header = True
self.__in_header = True
self.__header_count += 1
# temporarily set this to zero so I can enter loop
self.__cb_count = 0
@ -69,18 +70,23 @@ class Header:
'mi<tg<open-att__<header-or-footer<type>%s\n' % (type)
)
else:
sys.stderr.write('module is header\n')
sys.stderr.write('method is __found_header\n')
sys.stderr.write('no dict entry\n')
sys.stderr.write('line is %s' % line)
sys.stderr.write(
'module is header\n' \
'method is __found_header\n' \
'no dict entry\n' \
'line is %s' % line)
self.__write_to_head_obj.write(
'mi<tg<open-att__<header-or-footer<type>none\n'
)
def __default_sep(self, line):
"""Handle all tokens that are not header tokens"""
"""
Handle all tokens that are not header tokens
"""
if self.__token_info[3:5] == 'hf':
self.__found_header(line)
self.__write_obj.write(line)
def __initiate_sep_values(self):
"""
initiate counters for separate_footnotes method.
@ -89,7 +95,7 @@ class Header:
self.__ob_count = 0
self.__cb_count = 0
self.__header_bracket_count = 0
self.__in_header = 0
self.__in_header = False
self.__header_count = 0
self.__head_dict = {
'head-left_' : ('header-left'),
@ -101,6 +107,7 @@ class Header:
'header____' : ('header' ),
'footer____' : ('footer' ),
}
def separate_headers(self):
"""
Separate all the footnotes in an RTF file and put them at the bottom,
@ -110,53 +117,47 @@ class Header:
bottom of the main file.
"""
self.__initiate_sep_values()
read_obj = open(self.__file)
self.__write_obj = open(self.__write_to, 'w')
self.__header_holder = better_mktemp()
self.__write_to_head_obj = open(self.__header_holder, 'w')
line_to_read = 1
while line_to_read:
line_to_read = read_obj.readline()
line = line_to_read
self.__token_info = line[:16]
# keep track of opening and closing brackets
if self.__token_info == 'ob<nu<open-brack':
self.__ob_count = line[-5:-1]
if self.__token_info == 'cb<nu<clos-brack':
self.__cb_count = line[-5:-1]
# In the middle of footnote text
if self.__in_header:
self.__in_header_func(line)
# not in the middle of footnote text
else:
self.__default_sep(line)
self.__write_obj.close()
read_obj.close()
self.__write_to_head_obj.close()
read_obj = open(self.__header_holder, 'r')
write_obj = open(self.__write_to, 'a')
write_obj.write(
'mi<mk<header-beg\n')
line = 1
while line:
line = read_obj.readline()
write_obj.write(line)
write_obj.write(
'mi<mk<header-end\n')
read_obj.close()
write_obj.close()
with open(self.__file) as read_obj:
with open(self.__write_to, 'w') as self.__write_obj:
with open(self.__header_holder, 'w') as self.__write_to_head_obj:
for line in read_obj:
self.__token_info = line[:16]
# keep track of opening and closing brackets
if self.__token_info == 'ob<nu<open-brack':
self.__ob_count = line[-5:-1]
if self.__token_info == 'cb<nu<clos-brack':
self.__cb_count = line[-5:-1]
# In the middle of footnote text
if self.__in_header:
self.__in_header_func(line)
# not in the middle of footnote text
else:
self.__default_sep(line)
with open(self.__header_holder, 'r') as read_obj:
with open(self.__write_to, 'a') as write_obj:
write_obj.write(
'mi<mk<header-beg\n')
for line in read_obj:
write_obj.write(line)
write_obj.write(
'mi<mk<header-end\n')
os.remove(self.__header_holder)
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy:
copy_obj.copy_file(self.__write_to, "header_separate.info")
copy_obj.copy_file(self.__write_to, "header_separate.data")
copy_obj.rename(self.__write_to, self.__file)
os.remove(self.__write_to)
def update_info(self, file, copy):
"""
Unused method
"""
self.__file = file
self.__copy = copy
def __get_head_body_func(self, line):
"""
Process lines in main body and look for beginning of headers.
@ -166,6 +167,7 @@ class Header:
self.__state = 'head'
else:
self.__write_obj.write(line)
def __get_head_head_func(self, line):
"""
Copy headers and footers from bottom of file to a separate, temporary file.
@ -174,6 +176,7 @@ class Header:
self.__state = 'body'
else:
self.__write_to_head_obj.write(line)
def __get_headers(self):
"""
Private method to remove footnotes from main file. Read one line from
@ -182,21 +185,16 @@ class Header:
These two functions do the work of separating the footnotes form the
body.
"""
read_obj = open(self.__file)
self.__write_obj = open(self.__write_to, 'w')
# self.__write_to = "footnote_info.data"
self.__write_to_head_obj = open(self.__header_holder, 'w')
line = 1
while line:
line = read_obj.readline()
self.__token_info = line[:16]
if self.__state == 'body':
self.__get_head_body_func(line)
elif self.__state == 'head':
self.__get_head_head_func(line)
read_obj.close()
self.__write_obj.close()
self.__write_to_head_obj.close()
with open(self.__file) as read_obj:
with open(self.__write_to, 'w') as self.__write_obj:
with open(self.__header_holder, 'w') as self.__write_to_head_obj:
for line in read_obj:
self.__token_info = line[:16]
if self.__state == 'body':
self.__get_head_body_func(line)
elif self.__state == 'head':
self.__get_head_head_func(line)
def __get_head_from_temp(self, num):
"""
Private method for joining headers and footers to body. This method
@ -205,18 +203,17 @@ class Header:
returns them as a string.
"""
look_for = 'mi<mk<header-ope<' + num + '\n'
found_head = 0
found_head = False
string_to_return = ''
line = 1
while line:
line = self.__read_from_head_obj.readline()
for line in self.__read_from_head_obj:
if found_head:
if line == 'mi<mk<header-clo\n':
return string_to_return
string_to_return = string_to_return + line
string_to_return += line
else:
if line == look_for:
found_head = 1
found_head = True
def __join_from_temp(self):
"""
Private method for rejoining footnotes to body. Read from the
@ -227,15 +224,13 @@ class Header:
If no footnote marker is found, simply print out the token (line).
"""
self.__read_from_head_obj = open(self.__header_holder, 'r')
read_obj = open(self.__write_to, 'r')
self.__write_obj = open(self.__write_to2, 'w')
line = 1
while line:
line = read_obj.readline()
if line[:16] == 'mi<mk<header-ind':
line = self.__get_head_from_temp(line[17:-1])
self.__write_obj.write(line)
read_obj.close()
with open(self.__write_to, 'r') as read_obj:
for line in read_obj:
if line[:16] == 'mi<mk<header-ind':
line = self.__get_head_from_temp(line[17:-1])
self.__write_obj.write(line)
def join_headers(self):
"""
Join the footnotes from the bottom of the file and put them in their

View File

@ -181,7 +181,7 @@ class Hex2Utf8:
self.__dingbats_dict.update(dingbats_base_dict)
self.__dingbats_dict.update(ms_dingbats_dict)
# load dictionary for caps, and make a string for the replacement
self.__caps_uni_dict = char_map_obj.get_char_map(map='caps_uni')
self.__caps_uni_dict = char_map_obj.get_char_map(map = 'caps_uni')
# # print self.__caps_uni_dict
# don't think I'll need this
##keys = self.__caps_uni_dict.keys()

View File

@ -11,14 +11,18 @@
# #
#########################################################################
import sys
"""
"""
class OldRtf:
"""
Check to see if the RTF is an older version
Logic:
If allowable control word/properties happen in text without being enclosed
in brackets the file will be considered old rtf
"""
def __init__(self, in_file, bug_handler, run_level ):
def __init__(self, in_file,
bug_handler,
run_level,
):
"""
Required:
'file'--file to parse
@ -32,46 +36,46 @@ class OldRtf:
"""
self.__file = in_file
self.__bug_handler = bug_handler
self.__initiate_values()
self.__ob_group = 0
def __initiate_values(self):
self.__previous_token = ''
self.__new_found = 0
self.__run_level = run_level
self.__allowable = [
'annotation' ,
'blue______' ,
'bold______',
'caps______',
'char-style' ,
'dbl-strike' ,
'emboss____',
'engrave___' ,
'font-color',
'font-down_' ,
'font-size_',
'font-style',
'font-up___',
'footnot-mk' ,
'green_____' ,
'hidden____',
'italics___',
'outline___',
'red_______',
'shadow____' ,
'small-caps',
'strike-thr',
'subscript_',
'superscrip' ,
'underlined' ,
'annotation' ,
'blue______' ,
'bold______',
'caps______',
'char-style' ,
'dbl-strike' ,
'emboss____',
'engrave___' ,
'font-color',
'font-down_' ,
'font-size_',
'font-style',
'font-up___',
'footnot-mk' ,
'green_____' ,
'hidden____',
'italics___',
'outline___',
'red_______',
'shadow____' ,
'small-caps',
'strike-thr',
'subscript_',
'superscrip' ,
'underlined' ,
]
self.__state = 'before_body'
self.__action_dict = {
'before_body' : self.__before_body_func,
'in_body' : self.__check_tokens_func,
'after_pard' : self.__after_pard_func,
}
self.__is_old = 0
def __initiate_values(self):
self.__previous_token = ''
self.__state = 'before_body'
self.__found_new = 0
self.__ob_group = 0
def __check_tokens_func(self, line):
if self.__inline_info in self.__allowable:
if self.__ob_group == self.__base_ob_count:
@ -80,48 +84,56 @@ class OldRtf:
self.__found_new += 1
elif self.__token_info == 'cw<pf<par-def___':
self.__state = 'after_pard'
def __before_body_func(self, line):
if self.__token_info == 'mi<mk<body-open_':
self.__state = 'in_body'
self.__base_ob_count = self.__ob_group
def __after_pard_func(self, line):
if line[0:2] != 'cw':
self.__state = 'in_body'
def check_if_old_rtf(self):
"""
Requires:
nothing
Returns:
1 if file is older RTf
0 if file is newer RTF
True if file is older RTf
False if file is newer RTF
"""
read_obj = open(self.__file, 'r')
line = 1
self.__initiate_values()
line_num = 0
while line:
line = read_obj.readline()
line_num += 1
self.__token_info = line[:16]
if self.__token_info == 'mi<mk<body-close':
return 0
self.__ob_group = 0
if self.__token_info == 'ob<nu<open-brack':
self.__ob_group += 1
self.__ob_count = line[-5:-1]
if self.__token_info == 'cb<nu<clos-brack':
self.__ob_group -= 1
self.__cb_count = line[-5:-1]
self.__inline_info = line[6:16]
if self.__state == 'after_body':
return 0
action = self.__action_dict.get(self.__state)
if not action:
sys.stderr.write('No action for state!\n')
result = action(line)
if result == 'new_rtf':
return 0
elif result == 'old_rtf':
return 1
self.__previous_token = line[6:16]
return 0
with open(self.__file, 'r') as read_obj:
for line in read_obj:
line_num += 1
self.__token_info = line[:16]
if self.__token_info == 'mi<mk<body-close':
return False
if self.__token_info == 'ob<nu<open-brack':
self.__ob_group += 1
self.__ob_count = line[-5:-1]
if self.__token_info == 'cb<nu<clos-brack':
self.__ob_group -= 1
self.__cb_count = line[-5:-1]
self.__inline_info = line[6:16]
if self.__state == 'after_body':
return False
action = self.__action_dict.get(self.__state)
if action is None:
try:
sys.stderr.write('No action for this state!\n')
except:
pass
result = action(line)
if result == 'new_rtf':
return False
elif result == 'old_rtf':
if self.__run_level > 3:
sys.stderr.write(
'Old rtf construction %s (bracket %s, line %s)\n'
% (self.__inline_info, str(self.__ob_group), line_num)
)
return True
self.__previous_token = line[6:16]
return False

View File

@ -10,7 +10,9 @@
# #
# #
#########################################################################
import sys, os, codecs
import sys, os
# , codecs
class Output:
"""
Output file
@ -19,7 +21,8 @@ class Output:
file,
orig_file,
output_dir = None,
out_file = None
out_file = None,
no_ask = True
):
"""
Required:
@ -33,8 +36,9 @@ class Output:
self.__file = file
self.__orig_file = orig_file
self.__output_dir = output_dir
self.__no_ask = 1
self.__no_ask = no_ask
self.__out_file = out_file
def output(self):
"""
Required:
@ -45,13 +49,14 @@ class Output:
output the line to the screen if no output file given. Otherwise, output to
the file.
"""
# self.__output_xml(self.__file, self.__out_file)
if self.__output_dir:
self.__output_to_dir_func()
elif self.__out_file:
self.__output_xml(self.__file, self.__out_file)
self.__output_to_file_func()
# self.__output_xml(self.__file, self.__out_file)
else:
self.__output_to_standard_func()
def __output_to_dir_func(self):
"""
Requires:
@ -64,32 +69,25 @@ class Output:
"""
base_name = os.path.basename(self.__orig_file)
base_name, ext = os.path.splitext(base_name)
output_file = '%s.xml' % base_name
output_file = os.path.join(self.__output_dir, output_file)
output_file = os.path.join(self.__output_dir, '%s.xml' % base_name)
# change if user wants to output to a specific file
if self.__out_file:
output_file = os.path.join(self.__output_dir, self.__out_file)
user_response = 'o'
if os.path.isfile(output_file):
if self.__no_ask:
user_response = 'o'
else:
msg = 'Do you want to over-write %s?\n' % output_file
msg += 'Type "o" to over-write.\n'
msg += 'Type any other key to print to standard output.\n'
sys.stderr.write(msg)
user_response = raw_input()
if os.path.isfile(output_file) and not self.__no_ask:
msg = 'Do you want to overwrite %s?\n' % output_file
msg += ('Type "o" to overwrite.\n'
'Type any other key to print to standard output.\n')
sys.stderr.write(msg)
user_response = raw_input()
if user_response == 'o':
read_obj = open(self.__file, 'r')
write_obj = open(output_file, 'w')
line = 1
while line:
line = read_obj.readline()
write_obj.write(line)
read_obj.close()
write_obj.close()
with open(self.__file, 'r') as read_obj:
with open(self.output_file, 'w') as write_obj:
for line in read_obj:
write_obj.write(line)
else:
self.__output_to_standard_func()
def __output_to_file_func(self):
"""
Required:
@ -99,14 +97,11 @@ class Output:
Logic:
read one line at a time. Output to standard
"""
read_obj = open(self.__file, 'r')
write_obj = open(self.__out_file, 'w')
line = 1
while line:
line = read_obj.readline()
write_obj.write(line)
read_obj.close()
write_obj.close()
with open(self.__file, 'r') as read_obj:
with open(self.__out_file, 'w') as write_obj:
for line in read_obj:
write_obj.write(line)
def __output_to_standard_func(self):
"""
Required:
@ -116,26 +111,24 @@ class Output:
Logic:
read one line at a time. Output to standard
"""
read_obj = open(self.__file, 'r')
line = 1
while line:
line = read_obj.readline()
sys.stdout.write(line)
read_obj.close()
def __output_xml(self, in_file, out_file):
"""
output the ill-formed xml file
"""
(utf8_encode, utf8_decode, utf8_reader, utf8_writer) = codecs.lookup("utf-8")
write_obj = utf8_writer(open(out_file, 'w'))
write_obj = open(out_file, 'w')
read_obj = utf8_writer(open(in_file, 'r'))
read_obj = open(in_file, 'r')
line = 1
while line:
line = read_obj.readline()
if isinstance(line, type(u"")):
line = line.encode("utf-8")
write_obj.write(line)
read_obj.close()
write_obj.close()
with open(self.__file, 'r') as read_obj:
for line in read_obj:
sys.stdout.write(line)
# def __output_xml(self, in_file, out_file):
# """
# output the ill-formed xml file
# """
# (utf8_encode, utf8_decode, utf8_reader, utf8_writer) = codecs.lookup("utf-8")
# write_obj = utf8_writer(open(out_file, 'w'))
# write_obj = open(out_file, 'w')
# read_obj = utf8_writer(open(in_file, 'r'))
# read_obj = open(in_file, 'r')
# line = 1
# while line:
# line = read_obj.readline()
# if isinstance(line, type(u"")):
# line = line.encode("utf-8")
# write_obj.write(line)
# read_obj.close()
# write_obj.close()

View File

@ -11,31 +11,32 @@
# #
#########################################################################
import sys, os
from calibre.ebooks.rtf2xml import copy
from calibre.ptempfile import better_mktemp
class Paragraphs:
"""
=================
Purpose
=================
Write paragraph tags for a tokenized file. (This module won't be any use to use
to you unless you use it as part of the other modules.)
-------------
Method
-------------
RTF does not tell you when a paragraph begins. It only tells you when the
paragraph ends.
In order to make paragraphs out of this limited info, the parser starts in the
body of the documents and assumes it is not in a paragraph. It looks for clues
to begin a paragraph. Text starts a paragraph; so does an inline field or
list-text. If an end of paragraph marker (\par) is found, then this indicates
a blank paragraph.
Once a paragraph is found, the state changes to 'paragraph.' In this state,
clues are looked to for the end of a paragraph. The end of a paragraph marker
(\par) marks the end of a paragraph. So does the end of a footnote or heading;
a paragraph definintion; the end of a field-block; and the beginning of a
section. (How about the end of a section or the end of a field-block?)
=================
Purpose
=================
Write paragraph tags for a tokenized file. (This module won't be any use to use
to you unless you use it as part of the other modules.)
-------------
Method
-------------
RTF does not tell you when a paragraph begins. It only tells you when the
paragraph ends.
In order to make paragraphs out of this limited info, the parser starts in the
body of the documents and assumes it is not in a paragraph. It looks for clues
to begin a paragraph. Text starts a paragraph; so does an inline field or
list-text. If an end of paragraph marker (\par) is found, then this indicates
a blank paragraph.
Once a paragraph is found, the state changes to 'paragraph.' In this state,
clues are looked to for the end of a paragraph. The end of a paragraph marker
(\par) marks the end of a paragraph. So does the end of a footnote or heading;
a paragraph definition; the end of a field-block; and the beginning of a
section. (How about the end of a section or the end of a field-block?)
"""
def __init__(self,
in_file,
@ -60,6 +61,7 @@ section. (How about the end of a section or the end of a field-block?)
self.__write_empty_para = write_empty_para
self.__run_level = run_level
self.__write_to = better_mktemp()
def __initiate_values(self):
"""
Initiate all values.
@ -77,7 +79,7 @@ section. (How about the end of a section or the end of a field-block?)
self.__paragraph_dict = {
'cw<pf<par-end___' : self.__close_para_func, # end of paragraph
'mi<mk<headi_-end' : self.__close_para_func, # end of header or footer
##'cw<pf<par-def___' : self.__close_para_func, # paragraph definition
## 'cw<pf<par-def___' : self.__close_para_func, # paragraph definition
# 'mi<mk<fld-bk-end' : self.__close_para_func, # end of field-block
'mi<mk<fldbk-end_' : self.__close_para_func, # end of field-block
'mi<mk<body-close' : self.__close_para_func, # end of body
@ -99,6 +101,7 @@ section. (How about the end of a section or the end of a field-block?)
'mi<mk<pict-start' : self.__start_para_func,
'cw<pf<page-break' : self.__empty_pgbk_func, # page break
}
def __before_body_func(self, line):
"""
Required:
@ -112,6 +115,7 @@ section. (How about the end of a section or the end of a field-block?)
if self.__token_info == 'mi<mk<body-open_':
self.__state = 'not_paragraph'
self.__write_obj.write(line)
def __not_paragraph_func(self, line):
"""
Required:
@ -127,6 +131,7 @@ section. (How about the end of a section or the end of a field-block?)
if action:
action(line)
self.__write_obj.write(line)
def __paragraph_func(self, line):
"""
Required:
@ -144,6 +149,7 @@ section. (How about the end of a section or the end of a field-block?)
action(line)
else:
self.__write_obj.write(line)
def __start_para_func(self, line):
"""
Requires:
@ -160,6 +166,7 @@ section. (How about the end of a section or the end of a field-block?)
)
self.__write_obj.write(self.__start2_marker)
self.__state = 'paragraph'
def __empty_para_func(self, line):
"""
Requires:
@ -176,6 +183,7 @@ section. (How about the end of a section or the end of a field-block?)
'mi<tg<empty_____<para\n'
)
self.__write_obj.write(self.__end_marker) # marker for later parsing
def __empty_pgbk_func(self, line):
"""
Requires:
@ -188,6 +196,7 @@ section. (How about the end of a section or the end of a field-block?)
self.__write_obj.write(
'mi<tg<empty_____<page-break\n'
)
def __close_para_func(self, line):
"""
Requires:
@ -205,6 +214,7 @@ section. (How about the end of a section or the end of a field-block?)
self.__write_obj.write(self.__end_marker) # marker for later parser
self.__write_obj.write(line)
self.__state = 'not_paragraph'
def __bogus_para__def_func(self, line):
"""
Requires:
@ -215,6 +225,7 @@ section. (How about the end of a section or the end of a field-block?)
if a \pard occurs in a paragraph, I want to ignore it. (I believe)
"""
self.__write_obj.write('mi<mk<bogus-pard\n')
def make_paragraphs(self):
"""
Requires:
@ -229,20 +240,18 @@ section. (How about the end of a section or the end of a field-block?)
only other state is 'paragraph'.
"""
self.__initiate_values()
read_obj = open(self.__file, 'r')
self.__write_obj = open(self.__write_to, 'w')
line_to_read = 1
while line_to_read:
line_to_read = read_obj.readline()
line = line_to_read
self.__token_info = line[:16]
action = self.__state_dict.get(self.__state)
if action == None:
sys.stderr.write('no no matching state in module sections.py\n')
sys.stderr.write(self.__state + '\n')
action(line)
read_obj.close()
self.__write_obj.close()
with open(self.__file, 'r') as read_obj:
with open(self.__write_to, 'w') as self.__write_obj:
for line in read_obj:
self.__token_info = line[:16]
action = self.__state_dict.get(self.__state)
if action is None:
try:
sys.stderr.write('no matching state in module paragraphs.py\n')
sys.stderr.write(self.__state + '\n')
except:
pass
action(line)
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy:
copy_obj.copy_file(self.__write_to, "paragraphs.data")

View File

@ -11,16 +11,24 @@
# #
#########################################################################
import sys,os
from calibre.ebooks.rtf2xml import copy
class Preamble:
"""
Fix the reamaing parts of the preamble. This module does very little. It
makes sure that no text gets put in the revision of list table. In the
future, when I understand how to interprett he revision table and list
future, when I understand how to interpret the revision table and list
table, I will make these methods more functional.
"""
def __init__(self, file, bug_handler, platform, default_font, code_page,
copy=None, temp_dir=None):
def __init__(self, file,
bug_handler,
platform,
default_font,
code_page,
copy=None,
temp_dir=None,
):
"""
Required:
file--file to parse
@ -44,6 +52,7 @@ class Preamble:
self.__write_to = os.path.join(temp_dir,"info_table_info.data")
else:
self.__write_to = "info_table_info.data"
def __initiate_values(self):
"""
Initiate all values.
@ -62,12 +71,14 @@ class Preamble:
'mi<mk<revtbl-beg' : self.__found_revision_table_func,
'mi<mk<body-open_' : self.__found_body_func,
}
def __default_func(self, line):
action = self.__default_dict.get(self.__token_info)
if action:
action(line)
else:
self.__write_obj.write(line)
def __found_rtf_head_func(self, line):
"""
Requires:
@ -84,8 +95,10 @@ class Preamble:
'<platform>%s\n' % (self.__default_font, self.__code_page,
self.__platform)
)
def __found_list_table_func(self, line):
self.__state = 'list_table'
def __list_table_func(self, line):
if self.__token_info == 'mi<mk<listabend_':
self.__state = 'default'
@ -93,8 +106,10 @@ class Preamble:
pass
else:
self.__write_obj.write(line)
def __found_revision_table_func(self, line):
self.__state = 'revision'
def __revision_table_func(self, line):
if self.__token_info == 'mi<mk<revtbl-end':
self.__state = 'default'
@ -102,11 +117,14 @@ class Preamble:
pass
else:
self.__write_obj.write(line)
def __found_body_func(self, line):
self.__state = 'body'
self.__write_obj.write(line)
def __body_func(self, line):
self.__write_obj.write(line)
def fix_preamble(self):
"""
Requires:
@ -119,20 +137,15 @@ class Preamble:
the list table.
"""
self.__initiate_values()
read_obj = open(self.__file, 'r')
self.__write_obj = open(self.__write_to, 'w')
line_to_read = 1
while line_to_read:
line_to_read = read_obj.readline()
line = line_to_read
self.__token_info = line[:16]
action = self.__state_dict.get(self.__state)
if action == None:
sys.stderr.write('no no matching state in module preamble_rest.py\n')
sys.stderr.write(self.__state + '\n')
action(line)
read_obj.close()
self.__write_obj.close()
with open(self.__file, 'r') as read_obj:
with open(self.__write_to, 'w') as self.__write_obj:
for line in read_obj:
self.__token_info = line[:16]
action = self.__state_dict.get(self.__state)
if action is None:
sys.stderr.write(
'no matching state in module preamble_rest.py\n' + self.__state + '\n')
action(line)
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy:
copy_obj.copy_file(self.__write_to, "preamble_div.data")

View File

@ -11,43 +11,44 @@
# #
#########################################################################
import sys, os
from calibre.ebooks.rtf2xml import copy
from calibre.ptempfile import better_mktemp
class Sections:
"""
=================
Purpose
=================
Write section tags for a tokenized file. (This module won't be any use to use
to you unless you use it as part of the other modules.)
---------------
logic
---------------
The tags for the first section breaks have already been written.
RTF stores section breaks with the \sect tag. Each time this tag is
encountered, add one to the counter.
When I encounter the \sectd tag, I want to collect all the appropriate tokens
that describe the section. When I reach a \pard, I know I an stop collecting
tokens and write the section tags.
The exception to this method occurs when sections occur in field blocks, such
as the index. Normally, two section break occur within the index and other
field-blocks. (If less or more section breaks occurr, this code may not work.)
I want the sections to occurr outside of the index. That is, the index
should be nested inside one section tag. After the index is complete, a new
section should begin.
In order to write the sections outside of the field blocks, I have to store
all of the field block as a string. When I ecounter the \sect tag, add one to
the section counter, but store this number in a list. Likewise, store the
information describing the section in another list.
When I reach the end of the field block, choose the first item from the
numbered list as the section number. Choose the first item in the description
list as the values and attributes of the section. Enclose the field string
between the section tags.
Start a new section outside the field-block strings. Use the second number in
the list; use the second item in the description list.
CHANGE (2004-04-26) No longer write sections that occurr in field-blocks.
Instead, ingore all section information in a field-block.
=================
Purpose
=================
Write section tags for a tokenized file. (This module won't be any use to use
to you unless you use it as part of the other modules.)
---------------
logic
---------------
The tags for the first section breaks have already been written.
RTF stores section breaks with the \sect tag. Each time this tag is
encountered, add one to the counter.
When I encounter the \sectd tag, I want to collect all the appropriate tokens
that describe the section. When I reach a \pard, I know I an stop collecting
tokens and write the section tags.
The exception to this method occurs when sections occur in field blocks, such
as the index. Normally, two section break occur within the index and other
field-blocks. (If less or more section breaks occurr, this code may not work.)
I want the sections to occur outside of the index. That is, the index
should be nested inside one section tag. After the index is complete, a new
section should begin.
In order to write the sections outside of the field blocks, I have to store
all of the field block as a string. When I ecounter the \sect tag, add one to
the section counter, but store this number in a list. Likewise, store the
information describing the section in another list.
When I reach the end of the field block, choose the first item from the
numbered list as the section number. Choose the first item in the description
list as the values and attributes of the section. Enclose the field string
between the section tags.
Start a new section outside the field-block strings. Use the second number in
the list; use the second item in the description list.
CHANGE (2004-04-26) No longer write sections that occurr in field-blocks.
Instead, ingore all section information in a field-block.
"""
def __init__(self,
in_file,

View File

@ -137,8 +137,9 @@ def _config(): # {{{
c.add_opt('LRF_ebook_viewer_options', default=None,
help=_('Options for the LRF ebook viewer'))
c.add_opt('internally_viewed_formats', default=['LRF', 'EPUB', 'LIT',
'MOBI', 'PRC', 'AZW', 'HTML', 'FB2', 'PDB', 'RB', 'SNB', 'HTMLZ'],
help=_('Formats that are viewed using the internal viewer'))
'MOBI', 'PRC', 'POBI', 'AZW', 'AZW3', 'HTML', 'FB2', 'PDB', 'RB',
'SNB', 'HTMLZ'], help=_(
'Formats that are viewed using the internal viewer'))
c.add_opt('column_map', default=ALL_COLUMNS,
help=_('Columns to be displayed in the book list'))
c.add_opt('autolaunch_server', default=False, help=_('Automatically launch content server on application startup'))

View File

@ -10,7 +10,7 @@ from functools import partial
from PyQt4.Qt import (QMenu, Qt, QInputDialog, QToolButton, QDialog,
QDialogButtonBox, QGridLayout, QLabel, QLineEdit, QIcon, QSize,
QCoreApplication)
QCoreApplication, pyqtSignal)
from calibre import isbytestring, sanitize_file_name_unicode
from calibre.constants import filesystem_encoding, iswindows
@ -142,6 +142,7 @@ class ChooseLibraryAction(InterfaceAction):
dont_add_to = frozenset(['context-menu-device'])
action_add_menu = True
action_menu_clone_qaction = _('Switch/create library...')
restore_view_state = pyqtSignal(object)
def genesis(self):
self.base_text = _('%d books')
@ -206,6 +207,17 @@ class ChooseLibraryAction(InterfaceAction):
self.maintenance_menu.addAction(ac)
self.choose_menu.addMenu(self.maintenance_menu)
self.view_state_map = {}
self.restore_view_state.connect(self._restore_view_state,
type=Qt.QueuedConnection)
@property
def preserve_state_on_switch(self):
ans = getattr(self, '_preserve_state_on_switch', None)
if ans is None:
self._preserve_state_on_switch = ans = \
self.gui.library_view.preserve_state(require_selected_ids=False)
return ans
def pick_random(self, *args):
self.gui.iactions['Pick Random Book'].pick_random()
@ -221,6 +233,13 @@ class ChooseLibraryAction(InterfaceAction):
def library_changed(self, db):
self.stats.library_used(db)
self.build_menus()
state = self.view_state_map.get(self.stats.canonicalize_path(
db.library_path), None)
if state is not None:
self.restore_view_state.emit(state)
def _restore_view_state(self, state):
self.preserve_state_on_switch.state = state
def initialization_complete(self):
self.library_changed(self.gui.library_view.model().db)
@ -401,8 +420,11 @@ class ChooseLibraryAction(InterfaceAction):
def switch_requested(self, location):
if not self.change_library_allowed():
return
db = self.gui.library_view.model().db
current_lib = self.stats.canonicalize_path(db.library_path)
self.view_state_map[current_lib] = self.preserve_state_on_switch.state
loc = location.replace('/', os.sep)
exists = self.gui.library_view.model().db.exists_at(loc)
exists = db.exists_at(loc)
if not exists:
d = MovedDialog(self.stats, location, self.gui)
ret = d.exec_()

View File

@ -6,6 +6,7 @@ __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from functools import partial
from collections import Counter
from PyQt4.Qt import QObject, QTimer
@ -117,13 +118,14 @@ class DeleteAction(InterfaceAction):
def _get_selected_formats(self, msg, ids):
from calibre.gui2.dialogs.select_formats import SelectFormats
fmts = set([])
c = Counter()
db = self.gui.library_view.model().db
for x in ids:
fmts_ = db.formats(x, index_is_id=True, verify_formats=False)
if fmts_:
fmts.update(frozenset([x.lower() for x in fmts_.split(',')]))
d = SelectFormats(list(sorted(fmts)), msg, parent=self.gui)
for x in frozenset([x.lower() for x in fmts_.split(',')]):
c[x] += 1
d = SelectFormats(c, msg, parent=self.gui)
if d.exec_() != d.Accepted:
return None
return d.selected_formats

View File

@ -12,11 +12,11 @@ from PyQt4.Qt import (QDialog, QVBoxLayout, QHBoxLayout, QRadioButton, QFrame,
from calibre import as_unicode
from calibre.constants import isosx
from calibre.gui2 import error_dialog, question_dialog, open_local_file
from calibre.gui2 import error_dialog, question_dialog, open_local_file, gprefs
from calibre.gui2.actions import InterfaceAction
from calibre.ptempfile import (PersistentTemporaryDirectory,
PersistentTemporaryFile)
from calibre.utils.config import prefs
from calibre.utils.config import prefs, tweaks
class TweakBook(QDialog):
@ -32,11 +32,16 @@ class TweakBook(QDialog):
index_is_id=True))
button = self.fmt_choice_buttons[0]
button_map = {unicode(x.text()):x for x in self.fmt_choice_buttons}
of = prefs['output_format'].upper()
for x in self.fmt_choice_buttons:
if unicode(x.text()) == of:
button = x
break
df = tweaks.get('default_tweak_format', None)
lf = gprefs.get('last_tweak_format', None)
if df and df.lower() == 'remember' and lf in button_map:
button = button_map[lf]
elif df and df.upper() in button_map:
button = button_map[df.upper()]
elif of in button_map:
button = button_map[of]
button.setChecked(True)
self.init_state()
@ -148,6 +153,8 @@ class TweakBook(QDialog):
def explode(self):
self.show_msg(_('Exploding, please wait...'))
if len(self.fmt_choice_buttons) > 1:
gprefs.set('last_tweak_format', self.current_format.upper())
QTimer.singleShot(5, self.do_explode)
def ask_question(self, msg):

View File

@ -161,8 +161,14 @@ class EditorWidget(QWebView): # {{{
self.page().setContentEditable(True)
def clear_text(self, *args):
us = self.page().undoStack()
us.beginMacro('clear all text')
self.action_select_all.trigger()
self.action_cut.trigger()
self.action_remove_format.trigger()
self.exec_command('delete')
us.endMacro()
self.set_font_style()
self.setFocus(Qt.OtherFocusReason)
def link_clicked(self, url):
open_url(url)
@ -262,20 +268,22 @@ class EditorWidget(QWebView): # {{{
def fset(self, val):
self.setHtml(val)
fi = QFontInfo(QApplication.font(self))
f = fi.pixelSize() + 1 + int(tweaks['change_book_details_font_size_by'])
fam = unicode(fi.family()).strip().replace('"', '')
if not fam:
fam = 'sans-serif'
style = 'font-size: %fpx; font-family:"%s",sans-serif;' % (f, fam)
# toList() is needed because PyQt on Debian is old/broken
for body in self.page().mainFrame().documentElement().findAll('body').toList():
body.setAttribute('style', style)
self.page().setContentEditable(True)
self.set_font_style()
return property(fget=fget, fset=fset)
def set_font_style(self):
fi = QFontInfo(QApplication.font(self))
f = fi.pixelSize() + 1 + int(tweaks['change_book_details_font_size_by'])
fam = unicode(fi.family()).strip().replace('"', '')
if not fam:
fam = 'sans-serif'
style = 'font-size: %fpx; font-family:"%s",sans-serif;' % (f, fam)
# toList() is needed because PyQt on Debian is old/broken
for body in self.page().mainFrame().documentElement().findAll('body').toList():
body.setAttribute('style', style)
self.page().setContentEditable(True)
def keyPressEvent(self, ev):
if ev.key() in (Qt.Key_Tab, Qt.Key_Escape, Qt.Key_Backtab):
ev.ignore()
@ -627,4 +635,6 @@ if __name__ == '__main__':
w = Editor()
w.resize(800, 600)
w.show()
w.html = '<b>testing</b>'
app.exec_()
#print w.html

View File

@ -126,7 +126,8 @@ class BulkConfig(Config):
def setup_output_formats(self, db, preferred_output_format):
if preferred_output_format:
preferred_output_format = preferred_output_format.lower()
output_formats = sorted(available_output_formats())
output_formats = sorted(available_output_formats(),
key=lambda x:{'EPUB':'!A', 'MOBI':'!B'}.get(x.upper(), x))
output_formats.remove('oeb')
preferred_output_format = preferred_output_format if \
preferred_output_format and preferred_output_format \

View File

@ -109,12 +109,18 @@
</item>
<item row="0" column="1">
<widget class="QDoubleSpinBox" name="opt_margin_left">
<property name="specialValueText">
<string>No margin</string>
</property>
<property name="suffix">
<string> pt</string>
</property>
<property name="decimals">
<number>1</number>
</property>
<property name="minimum">
<double>-1.000000000000000</double>
</property>
<property name="maximum">
<double>200.000000000000000</double>
</property>
@ -132,12 +138,18 @@
</item>
<item row="1" column="1">
<widget class="QDoubleSpinBox" name="opt_margin_top">
<property name="specialValueText">
<string>No margin</string>
</property>
<property name="suffix">
<string> pt</string>
</property>
<property name="decimals">
<number>1</number>
</property>
<property name="minimum">
<double>-1.000000000000000</double>
</property>
<property name="maximum">
<double>200.000000000000000</double>
</property>
@ -155,12 +167,18 @@
</item>
<item row="2" column="1">
<widget class="QDoubleSpinBox" name="opt_margin_right">
<property name="specialValueText">
<string>No margin</string>
</property>
<property name="suffix">
<string> pt</string>
</property>
<property name="decimals">
<number>1</number>
</property>
<property name="minimum">
<double>-1.000000000000000</double>
</property>
<property name="maximum">
<double>200.000000000000000</double>
</property>
@ -178,12 +196,18 @@
</item>
<item row="3" column="1">
<widget class="QDoubleSpinBox" name="opt_margin_bottom">
<property name="specialValueText">
<string>No margin</string>
</property>
<property name="suffix">
<string> pt</string>
</property>
<property name="decimals">
<number>1</number>
</property>
<property name="minimum">
<double>-1.000000000000000</double>
</property>
<property name="maximum">
<double>200.000000000000000</double>
</property>

View File

@ -242,7 +242,8 @@ class Config(ResizableDialog, Ui_Dialog):
preferred_output_format):
if preferred_output_format:
preferred_output_format = preferred_output_format.lower()
output_formats = sorted(available_output_formats())
output_formats = sorted(available_output_formats(),
key=lambda x:{'EPUB':'!A', 'MOBI':'!B'}.get(x.upper(), x))
output_formats.remove('oeb')
input_format, input_formats = get_input_format_for_book(db, book_id,
preferred_input_format)

View File

@ -349,7 +349,8 @@ class Text(Base):
return d.exec_()
def edit(self):
if self.getter() != self.initial_val:
if (self.getter() != self.initial_val and (self.getter() or
self.initial_val)):
d = self._save_dialog(self.parent, _('Values changed'),
_('You have changed the values. In order to use this '
'editor, you must either discard or apply these '

View File

@ -182,7 +182,8 @@ class SearchDialog(QDialog, Ui_Dialog):
global box_values
box_values = copy.deepcopy(self.box_last_values)
if general:
ans.append(unicode(self.general_combo.currentText()) + ':"' + general + '"')
ans.append(unicode(self.general_combo.currentText()) + ':"' +
self.mc + general + '"')
if ans:
return ' and '.join(ans)
return ''

Some files were not shown because too many files have changed in this diff Show More