Merge from trunk

This commit is contained in:
Charles Haley 2012-05-17 06:26:35 +02:00
commit 99c57ac10d
201 changed files with 62463 additions and 36984 deletions

View File

@ -16,7 +16,6 @@ resources/ebook-convert-complete.pickle
resources/builtin_recipes.xml resources/builtin_recipes.xml
resources/builtin_recipes.zip resources/builtin_recipes.zip
resources/template-functions.json resources/template-functions.json
resources/display/*.js
setup/installer/windows/calibre/build.log setup/installer/windows/calibre/build.log
src/calibre/translations/.errors src/calibre/translations/.errors
src/cssutils/.svn/ src/cssutils/.svn/

View File

@ -19,6 +19,67 @@
# new recipes: # new recipes:
# - title: # - title:
- version: 0.8.51
date: 2012-05-11
new features:
- title: "When switching libraries preserve the position and selected books if you switch back to a previously opened library."
tickets: [994514]
- title: "Conversion pipeline: Filter out the useless font-face rules inserted by Microsoft Word for every font on the system"
- title: "Driver for Motorola XT875 and Pandigital SuperNova"
tickets: [996890]
- title: "Add a colour swatch the the dialog for creating column coloring rules, to ease selection of colors"
tickets: [994811]
- title: "EPUB Output: Consolidate internal CSS generated by calibre into external stylesheets for ease of editing the EPUB"
- title: "List EPUB and MOBI at the top of the dropdown list fo formats to convert to, as they are the most common choices"
tickets: [994838]
bug fixes:
- title: "E-book viewer: Improve performance when switching between normal and fullscreen views."
tickets: [996102]
- title: "Edit metadata dialog: When running download metadata do not insert duplicate tags into the list of tags"
- title: "KF8 Input: Do not error out if the file has a few invalidly encoded bytes."
tickets: [997034]
- title: "Fix download of news in AZW3 format not working"
tickets: [996439]
- title: "Pocketbook driver: Update for new PB 611 firmware."
tickets: [903079]
- title: "ebook-convert: Error out if the user prvides extra command line args instead of silently ignoring them"
tickets: [994939]
- title: "EPUB Output: Do not self close any container tags to prevent artifacts when EPUBs are viewed using buggy browser based viewers."
tickets: [994861]
- title: "Fix regression in 0.8.50 that broke the conversion of HTML files that contained non-ascii font-face declarations, typically produced by Microsoft Word"
improved recipes:
- Mainichi news
- derStandard
- Endgadget Japan
new recipes:
- title: Mainichi English
author: Hiroshi Miura
- title: The Grid TO
author: Yusuf W
- title: National Geographic (Italy)
author: faber1971
- title: Rebelion
author: Marc Busque
- version: 0.8.50 - version: 0.8.50
date: 2012-05-04 date: 2012-05-04

View File

@ -0,0 +1,26 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1336986047(BasicNewsRecipe):
title = u'Ads of the World'
oldest_article = 7
max_articles_per_feed = 100
auto_cleanup = False
description = 'The best international advertising campaigns'
language = 'en'
__author__ = 'faber1971'
no_stylesheets = True
keep_only_tags = [
dict(name='div', attrs={'id':'primary'})
]
remove_tags = [
dict(name='ul', attrs={'class':'links inline'})
,dict(name='div', attrs={'class':'form-item'})
,dict(name='div', attrs={'id':['options', 'comments']})
,dict(name='ul', attrs={'id':'nodePager'})
]
reverse_article_order = True
masthead_url = 'http://bigcatgroup.co.uk/files/2011/01/05-ads-of-the-world.png'
feeds = [(u'Ads of the world', u'http://feeds.feedburner.com/adsoftheworld-latest')]

View File

@ -0,0 +1,43 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AirForceTimes(BasicNewsRecipe):
title = 'Air Force Times'
__author__ = 'jde'
__date__ = '16 May 2012'
__version__ = '1.0'
description = 'News of the U.S. Air Force'
language = 'en'
publisher = 'AirForceTimes.com'
category = 'news, U.S. Air Force'
tags = 'news, U.S. Air Force'
cover_url = 'http://www.airforcetimes.com/images/logo_airforcetimes_alert.jpg'
masthead_url = 'http://www.airforcetimes.com/images/logo_airforcetimes_alert.jpg'
oldest_article = 7 #days
max_articles_per_feed = 25
publication_type = 'newspaper'
no_stylesheets = True
use_embedded_content = False
encoding = None
recursions = 0
needs_subscription = False
remove_javascript = True
remove_empty_feeds = True
auto_cleanup = True
feeds = [
('News', 'http://www.airforcetimes.com/rss_news.php'),
('Benefits', 'http://www.airforcetimes.com/rss_benefits.php'),
('Money', 'http://www.airforcetimes.com/rss_money.php'),
('Careers & Education', 'http://www.airforcetimes.com/rss_careers.php'),
('Community', 'http://www.airforcetimes.com/rss_community.php'),
('Off Duty', 'http://www.airforcetimes.com/rss_off_duty.php'),
('Entertainment', 'http://www.airforcetimes.com/rss_entertainment.php'),
('Guard & Reserve', 'http://www.airforcetimes.com/rss_guard.php'),
]

42
recipes/army_times.recipe Normal file
View File

@ -0,0 +1,42 @@
from calibre.web.feeds.news import BasicNewsRecipe
class ArmyTimes(BasicNewsRecipe):
title = 'Army Times'
__author__ = 'jde'
__date__ = '16 May 2012'
__version__ = '1.0'
description = 'News of the U.S. Army'
language = 'en'
publisher = 'ArmyTimes.com'
category = 'news, U.S. Army'
tags = 'news, U.S. Army'
cover_url = 'http://www.armytimes.com/images/logo_armytimes_alert.jpg'
masthead_url = 'http://www.armytimes.com/images/logo_armytimes_alert.jpg'
oldest_article = 7 #days
max_articles_per_feed = 25
publication_type = 'newspaper'
no_stylesheets = True
use_embedded_content = False
encoding = None
recursions = 0
needs_subscription = False
remove_javascript = True
remove_empty_feeds = True
auto_cleanup = True
feeds = [
('News', 'http://www.armytimes.com/rss_news.php'),
('Benefits', 'http://www.armytimes.com/rss_benefits.php'),
('Money', 'http://www.armytimes.com/rss_money.php'),
('Careers & Education', 'http://www.armytimes.com/rss_careers.php'),
('Community', 'http://www.armytimes.com/rss_community.php'),
('Off Duty', 'http://www.armytimes.com/rss_off_duty.php'),
('Entertainment', 'http://www.armytimes.com/rss_entertainment.php'),
('Guard & Reserve', 'http://www.armytimes.com/rss_guard.php'),
]

View File

@ -1,5 +1,5 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2008-2012, Darko Miletic <darko.miletic at gmail.com>'
''' '''
arstechnica.com arstechnica.com
''' '''
@ -12,22 +12,24 @@ class ArsTechnica(BasicNewsRecipe):
title = u'Ars Technica' title = u'Ars Technica'
language = 'en' language = 'en'
__author__ = 'Darko Miletic, Sujata Raman, Alexis Rohou' __author__ = 'Darko Miletic, Sujata Raman, Alexis Rohou'
description = 'The art of technology' description = 'Ars Technica: Serving the technologist for 1.2 decades'
publisher = 'Ars Technica' publisher = 'Conde Nast Publications'
category = 'news, IT, technology' category = 'news, IT, technology'
oldest_article = 5 oldest_article = 5
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
encoding = 'utf-8' encoding = 'utf-8'
use_embedded_content = False use_embedded_content = False
remove_empty_feeds = True
publication_type = 'newsportal'
extra_css = ''' extra_css = '''
body {font-family: Arial,Helvetica,sans-serif} body {font-family: Arial,sans-serif}
.title{text-align: left} .heading{font-family: "Times New Roman",serif}
.byline{font-weight: bold; line-height: 1em; font-size: 0.625em; text-decoration: none} .byline{font-weight: bold; line-height: 1em; font-size: 0.625em; text-decoration: none}
.news-item-figure-caption-text{font-size:small; font-style:italic} img{display: block}
.news-item-figure-caption-byline{font-size:small; font-style:italic; font-weight:bold} .caption-text{font-size:small; font-style:italic}
.caption-byline{font-size:small; font-style:italic; font-weight:bold}
''' '''
ignoreEtcArticles = True # Etc feed items can be ignored, as they're not real stories
conversion_options = { conversion_options = {
'comments' : description 'comments' : description
@ -36,50 +38,38 @@ class ArsTechnica(BasicNewsRecipe):
,'publisher' : publisher ,'publisher' : publisher
} }
keep_only_tags = [
#preprocess_regexps = [ dict(attrs={'class':'standalone'})
# (re.compile(r'<div class="news-item-figure', re.DOTALL|re.IGNORECASE),lambda match: '<div class="news-item-figure"') ,dict(attrs={'id':'article-guts'})
# ,(re.compile(r'</title>.*?</head>', re.DOTALL|re.IGNORECASE),lambda match: '</title></head>') ]
# ]
keep_only_tags = [dict(name='div', attrs={'id':['story','etc-story']})]
remove_tags = [ remove_tags = [
dict(name=['object','link','embed']) dict(name=['object','link','embed','iframe','meta'])
,dict(name='div', attrs={'class':'read-more-link'}) ,dict(attrs={'class':'corner-info'})
] ]
#remove_attributes=['width','height'] remove_attributes = ['lang']
feeds = [ feeds = [
(u'Infinite Loop (Apple content)' , u'http://feeds.arstechnica.com/arstechnica/apple/' ) (u'Infinite Loop (Apple content)' , u'http://feeds.arstechnica.com/arstechnica/apple/' )
,(u'Opposable Thumbs (Gaming content)' , u'http://feeds.arstechnica.com/arstechnica/gaming/' ) ,(u'Opposable Thumbs (Gaming content)' , u'http://feeds.arstechnica.com/arstechnica/gaming/' )
,(u'Gear and Gadgets' , u'http://feeds.arstechnica.com/arstechnica/gadgets/' ) ,(u'Gear and Gadgets' , u'http://feeds.arstechnica.com/arstechnica/gadgets/' )
,(u'Chipster (Hardware content)' , u'http://feeds.arstechnica.com/arstechnica/hardware/' )
,(u'Uptime (IT content)' , u'http://feeds.arstechnica.com/arstechnica/business/' ) ,(u'Uptime (IT content)' , u'http://feeds.arstechnica.com/arstechnica/business/' )
,(u'Open Ended (Open Source content)' , u'http://feeds.arstechnica.com/arstechnica/open-source/') ,(u'Open Ended (Open Source content)' , u'http://feeds.arstechnica.com/arstechnica/open-source/')
,(u'One Microsoft Way' , u'http://feeds.arstechnica.com/arstechnica/microsoft/' ) ,(u'One Microsoft Way' , u'http://feeds.arstechnica.com/arstechnica/microsoft/' )
,(u'Nobel Intent (Science content)' , u'http://feeds.arstechnica.com/arstechnica/science/' ) ,(u'Scientific method (Science content)' , u'http://feeds.arstechnica.com/arstechnica/science/' )
,(u'Law & Disorder (Tech policy content)' , u'http://feeds.arstechnica.com/arstechnica/tech-policy/') ,(u'Law & Disorder (Tech policy content)' , u'http://feeds.arstechnica.com/arstechnica/tech-policy/')
] ]
# This deals with multi-page stories
def append_page(self, soup, appendtag, position): def append_page(self, soup, appendtag, position):
pager = soup.find('div',attrs={'class':'pager'}) pager = soup.find(attrs={'class':'numbers'})
if pager: if pager:
for atag in pager.findAll('a',href=True): nexttag = pager.find(attrs={'class':'next'})
str = self.tag_to_string(atag) if nexttag:
if str.startswith('Next'): nurl = nexttag.parent['href']
nurl = 'http://arstechnica.com' + atag['href']
rawc = self.index_to_soup(nurl,True) rawc = self.index_to_soup(nurl,True)
soup2 = BeautifulSoup(rawc, fromEncoding=self.encoding) soup2 = BeautifulSoup(rawc, fromEncoding=self.encoding)
texttag = soup2.find(attrs={'id':'article-guts'})
readmoretag = soup2.find('div', attrs={'class':'read-more-link'})
if readmoretag:
readmoretag.extract()
texttag = soup2.find('div', attrs={'class':'body'})
for it in texttag.findAll(style=True):
del it['style']
newpos = len(texttag.contents) newpos = len(texttag.contents)
self.append_page(soup2,texttag,newpos) self.append_page(soup2,texttag,newpos)
texttag.extract() texttag.extract()
@ -88,41 +78,24 @@ class ArsTechnica(BasicNewsRecipe):
def preprocess_html(self, soup): def preprocess_html(self, soup):
# Adds line breaks near the byline (not sure why this is needed)
ftag = soup.find('div', attrs={'class':'byline'})
if ftag:
brtag = Tag(soup,'br')
brtag2 = Tag(soup,'br')
ftag.insert(4,brtag)
ftag.insert(5,brtag2)
# Remove style items
for item in soup.findAll(style=True):
del item['style']
# Remove id
for item in soup.findAll(id=True):
del item['id']
# For some reason, links to authors don't have the domainname
a_author = soup.find('a',{'href':re.compile("^/author")})
if a_author:
a_author['href'] = 'http://arstechnica.com'+a_author['href']
# within div class news-item-figure, we need to grab images
# Deal with multi-page stories
self.append_page(soup, soup.body, 3) self.append_page(soup, soup.body, 3)
for item in soup.findAll('a'):
limg = item.find('img')
if item.string is not None:
str = item.string
item.replaceWith(str)
else:
if limg:
item.name = 'div'
item.attrs = []
else:
str = self.tag_to_string(item)
item.replaceWith(str)
for item in soup.findAll('img'):
if not item.has_key('alt'):
item['alt'] = 'image'
return soup return soup
def get_article_url(self, article): def preprocess_raw_html(self, raw, url):
# If the article title starts with Etc:, don't return it return '<html><head>'+raw[raw.find('</head>'):]
if self.ignoreEtcArticles:
article_title = article.get('title',None)
if re.match('Etc: ',article_title) is not None:
return None
# The actual article is in a guid tag
return article.get('guid', None).rpartition('?')[0]

View File

@ -7,10 +7,11 @@ __copyright__ = '2009, Gerhard Aigner <gerhard.aigner at gmail.com>'
''' http://www.derstandard.at - Austrian Newspaper ''' ''' http://www.derstandard.at - Austrian Newspaper '''
import re import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from time import strftime
class DerStandardRecipe(BasicNewsRecipe): class DerStandardRecipe(BasicNewsRecipe):
title = u'derStandard' title = u'derStandard'
__author__ = 'Gerhard Aigner and Sujata Raman and Marcel Jira' __author__ = 'Gerhard Aigner and Sujata Raman and Marcel Jira and Peter Reschenhofer'
description = u'Nachrichten aus Österreich' description = u'Nachrichten aus Österreich'
publisher ='derStandard.at' publisher ='derStandard.at'
category = 'news, politics, nachrichten, Austria' category = 'news, politics, nachrichten, Austria'
@ -88,3 +89,41 @@ class DerStandardRecipe(BasicNewsRecipe):
for t in soup.findAll(['ul', 'li']): for t in soup.findAll(['ul', 'li']):
t.name = 'div' t.name = 'div'
return soup return soup
def get_cover_url(self):
highResolution = True
date = strftime("%Y/%Y%m%d")
# it is also possible for the past
#date = '2012/20120503'
urlP1 = 'http://epaper.derstandarddigital.at/'
urlP2 = 'data_ep/STAN/' + date
urlP3 = '/V.B1/'
urlP4 = 'paper.htm'
urlHTML = urlP1 + urlP2 + urlP3 + urlP4
br = self.clone_browser(self.browser)
htmlF = br.open_novisit(urlHTML)
htmlC = htmlF.read()
# URL EXAMPLE: data_ep/STAN/2012/20120504/V.B1/pages/A3B6798F-2751-4D8D-A103-C5EF22F7ACBE.htm
# consists of part2 + part3 + 'pages/' + code
# 'pages/' has length 6, code has lenght 36
index = htmlC.find(urlP2) + len(urlP2 + urlP3) + 6
code = htmlC[index:index + 36]
# URL EXAMPLE HIGH RESOLUTION: http://epaper.derstandarddigital.at/data_ep/STAN/2012/20120504/pagejpg/A3B6798F-2751-4D8D-A103-C5EF22F7ACBE_b.png
# URL EXAMPLE LOW RESOLUTION: http://epaper.derstandarddigital.at/data_ep/STAN/2012/20120504/pagejpg/2AB52F71-11C1-4859-9114-CDCD79BEFDCB.png
urlPic = urlP1 + urlP2 + '/pagejpg/' + code
if highResolution:
urlPic = urlPic + '_b'
urlPic = urlPic + '.png'
return urlPic

30
recipes/economico.recipe Normal file
View File

@ -0,0 +1,30 @@
from calibre.web.feeds.news import BasicNewsRecipe
class Economico(BasicNewsRecipe):
title = u'Economico'
language = 'pt'
__author__ = 'Krittika Goyal'
oldest_article = 1 #days
max_articles_per_feed = 25
encoding = 'utf-8'
use_embedded_content = False
no_stylesheets = True
auto_cleanup = True
feeds = [
('Ultima Hora',
'http://economico.sapo.pt/rss/ultimas'),
('Em Foco',
'http://economico.sapo.pt/rss/emfoco'),
('Mercados',
'http://economico.sapo.pt/rss/mercados'),
('Empresas',
'http://economico.sapo.pt/rss/empresas'),
('Economia',
'http://economico.sapo.pt/rss/economia'),
('Politica',
'http://economico.sapo.pt/rss/politica'),
]

View File

@ -17,7 +17,25 @@ class EndgadgetJapan(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
language = 'ja' language = 'ja'
encoding = 'utf-8' encoding = 'utf-8'
feeds = [(u'engadget', u'http://japanese.engadget.com/rss.xml')] index = 'http://japanese.engadget.com/'
remove_javascript = True
remove_tags_before = dict(name="h1", attrs={'class':"post_title"})
remove_tags_after = dict(name='div', attrs={'class':'post_body'})
def parse_index(self):
feeds = []
newsarticles = []
soup = self.index_to_soup(self.index)
for topstories in soup.findAll('div',attrs={'class':'post_content'}):
itt = topstories.find('h4')
itema = itt.find('a',href=True)
newsarticles.append({
'title' :itema.string
,'date' :''
,'url' :itema['href']
,'description':''
})
feeds.append(('Latest Posts', newsarticles))
return feeds
remove_tags_before = dict(name="div", attrs={'id':"content_wrap"})
remove_tags_after = dict(name='h3', attrs={'id':'addcomments'})

82
recipes/folha.recipe Normal file
View File

@ -0,0 +1,82 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
__license__ = 'GPL v3'
__copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
'''
www.folha.uol.com.br
'''
import urllib
from calibre.web.feeds.news import BasicNewsRecipe
class Folha_de_s_paulo(BasicNewsRecipe):
title = u'Folha de São Paulo - portal'
__author__ = 'Darko Miletic'
description = 'Um Jornala a servicao do Brasil'
publisher = 'Folhapress'
category = 'news, politics, Brasil'
oldest_article = 2
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'cp1252'
use_embedded_content = False
language = 'pt_BR'
remove_empty_feeds = True
publication_type = 'newspaper'
masthead_url = 'http://f.i.uol.com.br/fsp/furniture/images/lgo-fsp-430x50-ffffff.gif'
extra_css = """
body{font-family: Arial,Helvetica,sans-serif }
img{margin-bottom: 0.4em; display:block}
"""
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
remove_tags = [dict(name=['meta','link','base','iframe','embed','object'])]
keep_only_tags = [dict(attrs={'id':'articleNew'})]
feeds = [
(u'Poder' , u'http://feeds.folha.uol.com.br/poder/rss091.xml' )
,(u'Mundo' , u'http://feeds.folha.uol.com.br/mundo/rss091.xml' )
,(u'Mercado' , u'http://feeds.folha.uol.com.br/mercado/rss091.xml' )
,(u'Cotidiano' , u'http://feeds.folha.uol.com.br/cotidiano/rss091.xml' )
,(u'Esporte' , u'http://feeds.folha.uol.com.br/esporte/rss091.xml' )
,(u'Ilustrada' , u'http://feeds.folha.uol.com.br/ilustrada/rss091.xml' )
,(u'F5' , u'http://feeds.folha.uol.com.br/f5/rss091.xml' )
,(u'Ciência' , u'http://feeds.folha.uol.com.br/ciencia/rss091.xml' )
,(u'Tec' , u'http://feeds.folha.uol.com.br/tec/rss091.xml' )
,(u'Ambiente' , u'http://feeds.folha.uol.com.br/ambiente/rss091.xml' )
,(u'Bichos' , u'http://feeds.folha.uol.com.br/bichos/rss091.xml' )
,(u'Celebridades' , u'http://feeds.folha.uol.com.br/celebridades/rss091.xml' )
,(u'Comida' , u'http://feeds.folha.uol.com.br/comida/rss091.xml' )
,(u'Equilibrio' , u'http://feeds.folha.uol.com.br/equilibrioesaude/rss091.xml' )
,(u'Folhateen' , u'http://feeds.folha.uol.com.br/folhateen/rss091.xml' )
,(u'Folhinha' , u'http://feeds.folha.uol.com.br/folhinha/rss091.xml' )
,(u'Ilustrissima' , u'http://feeds.folha.uol.com.br/ilustrissima/rss091.xml' )
,(u'Saber' , u'http://feeds.folha.uol.com.br/saber/rss091.xml' )
,(u'Turismo' , u'http://feeds.folha.uol.com.br/turismo/rss091.xml' )
,(u'Panel do Leitor', u'http://feeds.folha.uol.com.br/folha/paineldoleitor/rss091.xml')
,(u'Publifolha' , u'http://feeds.folha.uol.com.br/folha/publifolha/rss091.xml' )
,(u'Em cima da hora', u'http://feeds.folha.uol.com.br/emcimadahora/rss091.xml' )
]
def get_article_url(self, article):
url = BasicNewsRecipe.get_article_url(self, article)
curl = url.partition('/*')[2]
return curl
def print_version(self, url):
return 'http://tools.folha.com.br/print?site=emcimadahora&url=' + urllib.quote_plus(url)
def get_cover_url(self):
soup = self.index_to_soup('http://www.folha.uol.com.br/')
cont = soup.find('div', attrs={'id':'newspaper'})
if cont:
ai = cont.find('a', href='http://www1.folha.uol.com.br/fsp/')
if ai:
return ai.img['src']
return None

View File

@ -8,7 +8,7 @@ from urllib2 import Request, urlopen, URLError
class FolhaOnline(BasicNewsRecipe): class FolhaOnline(BasicNewsRecipe):
THUMBALIZR_API = '' # ---->Get your at http://www.thumbalizr.com/ and put here THUMBALIZR_API = '' # ---->Get your at http://www.thumbalizr.com/ and put here
LANGUAGE = 'pt_br' LANGUAGE = 'pt_br'
language = 'pt' language = 'pt_BR'
LANGHTM = 'pt-br' LANGHTM = 'pt-br'
ENCODING = 'cp1252' ENCODING = 'cp1252'
ENCHTM = 'iso-8859-1' ENCHTM = 'iso-8859-1'

View File

@ -14,7 +14,7 @@ class FSP(BasicNewsRecipe):
HOMEPAGE = 'http://www1.folha.uol.com.br/fsp/' HOMEPAGE = 'http://www1.folha.uol.com.br/fsp/'
masthead_url = 'http://f.i.uol.com.br/fsp/furniture/images/lgo-fsp-430x50-ffffff.gif' masthead_url = 'http://f.i.uol.com.br/fsp/furniture/images/lgo-fsp-430x50-ffffff.gif'
language = 'pt' language = 'pt_BR'
no_stylesheets = True no_stylesheets = True
max_articles_per_feed = 40 max_articles_per_feed = 40
remove_javascript = True remove_javascript = True

View File

@ -6,21 +6,20 @@ __copyright__ = '2011, Piotr Kontek, piotr.kontek@gmail.com'
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ptempfile import PersistentTemporaryFile from calibre.ptempfile import PersistentTemporaryFile
from datetime import date
import re import re
class GN(BasicNewsRecipe): class GN(BasicNewsRecipe):
EDITION = 0 EDITION = 0
__author__ = 'Piotr Kontek' __author__ = 'Piotr Kontek'
title = u'Gość niedzielny'
description = 'Weekly magazine' description = 'Weekly magazine'
encoding = 'utf-8' encoding = 'utf-8'
no_stylesheets = True no_stylesheets = True
language = 'pl' language = 'pl'
remove_javascript = True remove_javascript = True
temp_files = [] temp_files = []
simultaneous_downloads = 1
masthead_url = 'http://gosc.pl/files/11/03/12/949089_top.gif'
title = u'Gość niedzielny'
articles_are_obfuscated = True articles_are_obfuscated = True
@ -56,22 +55,28 @@ class GN(BasicNewsRecipe):
self.temp_files[-1].close() self.temp_files[-1].close()
return self.temp_files[-1].name return self.temp_files[-1].name
def find_last_issue(self): def find_last_issue(self, year):
soup = self.index_to_soup('http://gosc.pl/wyszukaj/wydania/3.Gosc-Niedzielny') soup = self.index_to_soup('http://gosc.pl/wyszukaj/wydania/3.Gosc-Niedzielny/rok/' + str(year))
#szukam zdjęcia i linka do porzedniego pełnego numeru
#szukam zdjęcia i linka do poprzedniego pełnego numeru
first = True first = True
for d in soup.findAll('div', attrs={'class':'l release_preview_l'}): for d in soup.findAll('div', attrs={'class':'l release_preview_l'}):
img = d.find('img') img = d.find('img')
if img != None: if img != None:
a = img.parent a = img.parent
self.EDITION = a['href'] self.EDITION = a['href']
self.title = img['alt']
self.cover_url = 'http://www.gosc.pl' + img['src'] self.cover_url = 'http://www.gosc.pl' + img['src']
if not first: if year != date.today().year or not first:
break break
first = False first = False
def parse_index(self): def parse_index(self):
self.find_last_issue() year = date.today().year
self.find_last_issue(year)
##jeśli to pierwszy numer w roku trzeba pobrać poprzedni rok
if self.EDITION == 0:
self.find_last_issue(year-1)
soup = self.index_to_soup('http://www.gosc.pl' + self.EDITION) soup = self.index_to_soup('http://www.gosc.pl' + self.EDITION)
feeds = [] feeds = []
#wstepniak #wstepniak

79
recipes/grid_to.recipe Normal file
View File

@ -0,0 +1,79 @@
from calibre.web.feeds.news import BasicNewsRecipe
class TheGridTO(BasicNewsRecipe):
#: The title to use for the ebook
title = u'The Grid TO'
#: A couple of lines that describe the content this recipe downloads.
#: This will be used primarily in a GUI that presents a list of recipes.
description = (u'The Grid is a weekly city magazine and daily website providing a fresh, '
'accessible voice for Toronto.')
#: The author of this recipe
__author__ = u'Yusuf W'
#: The language that the news is in. Must be an ISO-639 code either
#: two or three characters long
language = 'en_CA'
#: Publication type
#: Set to newspaper, magazine or blog
publication_type = 'newspaper'
#: Convenient flag to disable loading of stylesheets for websites
#: that have overly complex stylesheets unsuitable for conversion
#: to ebooks formats
#: If True stylesheets are not downloaded and processed
no_stylesheets = True
#: List of tags to be removed. Specified tags are removed from downloaded HTML.
remove_tags_before = dict(name='div', id='content')
remove_tags_after = dict(name='div', id='content')
remove_tags = [
dict(name='div', attrs={'class':'right-content pull-right'}),
dict(name='div', attrs={'class':'right-content'}),
dict(name='div', attrs={'class':'ftr-line'}),
dict(name='div', attrs={'class':'pull-right'}),
dict(name='div', id='comments'),
dict(name='div', id='tags')
]
#: Keep only the specified tags and their children.
#keep_only_tags = [dict(name='div', id='content')]
cover_margins = (0, 0, '#ffffff')
INDEX = 'http://www.thegridto.com'
def get_cover_url(self):
soup = self.index_to_soup(self.INDEX)
cover_url = soup.find(attrs={'class':'article-block latest-issue'}).find('img')['src']
return cover_url
def parse_index(self):
# Get the latest issue
soup = self.index_to_soup(self.INDEX)
a = soup.find('div', attrs={'class': 'full-content stuff-ftr'}).findAll('a')[2]
# Parse the index of the latest issue
self.INDEX = self.INDEX + a['href']
soup = self.index_to_soup(self.INDEX)
feeds = []
for section in ['city', 'life', 'culture']:
section_class = 'left-content article-listing ' + section + ' pull-left'
div = soup.find(attrs={'class': section_class})
articles = []
for tag in div.findAllNext(attrs={'class':'search-block'}):
a = tag.findAll('a', href=True)[1]
title = self.tag_to_string(a)
url = a['href']
articles.append({'title': title, 'url': url, 'description':'', 'date':''})
feeds.append((section, articles))
return feeds

View File

@ -0,0 +1,22 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1336289226(BasicNewsRecipe):
title = u'Heavy Metal'
oldest_article = 15
max_articles_per_feed = 100
auto_cleanup = False
masthead_url = 'http://net-static2.tccstatic.com/template/tmw/img/tj.gif'
feeds = [(u'Heavy Metal', u'http://www.heavy-metal.it/feed/')]
keep_only_tags = [
dict(name='div', attrs={'class':'entry'})
]
remove_tags_after = [
dict(name='div', attrs={'class':'sociable'})
]
description = 'An Heavy metal Italian magazine'
__author__ = 'faber1971'
language = 'it'
__version__ = 'v1.0'
__date__ = '6, May 2012'

BIN
recipes/icons/folha.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 648 B

View File

@ -20,6 +20,8 @@ class JijiDotCom(BasicNewsRecipe):
top_url = 'http://www.jiji.com/' top_url = 'http://www.jiji.com/'
feeds = [(u'\u30cb\u30e5\u30fc\u30b9', u'http://www.jiji.com/rss/ranking.rdf')] feeds = [(u'\u30cb\u30e5\u30fc\u30b9', u'http://www.jiji.com/rss/ranking.rdf')]
remove_tags_before = dict(id="article-area")
remove_tags_after = dict(id="ad_google") remove_tags_after = dict(id="ad_google")
def get_cover_url(self): def get_cover_url(self):

View File

@ -0,0 +1,24 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1336504510(BasicNewsRecipe):
title = u'Juve - La Stampa'
oldest_article = 1
language = 'it'
max_articles_per_feed = 100
auto_cleanup = True
masthead_url = 'http://www3.lastampa.it/fileadmin/media/sport/quijuve/top_quijuve.jpg'
feeds = [(u'Qui Juve - La Stampa', u'http://feed43.com/2352784107537677.xml')]
remove_tags = [dict(name='div',attrs={'class':['article-toolbar', 'sezione sezione-news', 'intestazione']})]
extra_css = '''
div.dettaglio div.immagine_girata p.news-single-imgcaption {color: #000000; font-family: "Georgia", "Times", serif; font-size: 7px; font-weight: 400;line-height: 1.2; padding-bottom: 12px; text-transform: none; }
.sezione {color: #000000; font-family: "Georgia", "Times", serif; font-size: 7px; font-weight: 400;line-height: 1.2; padding-bottom: 12px; text-transform: none; }
body {color: #000000; font-family: "Georgia", "Times", serif; font-size: 7px; font-weight: 400;line-height: 1.2; padding-bottom: 12px; text-transform: none; }
h3 {color: #000000; font-family: "Georgia", "Times", serif; font-size: 22px; font-weight: 400;line-height: 1.2; padding-bottom: 12px; text-transform: none; }
div.dettaglio h2.catenaccio {color: #000000; font-family: "Georgia", "Times", serif; font-size: 18px; font-weight: 400;line-height: 1.2; padding-bottom: 12px; text-transform: none; }
'''
description = 'News about Juventus from La Stampa'
__author__ = 'faber1971'
__version__ = 'v1.0'
__date__ = '8, May 2012'

View File

@ -1,7 +1,7 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__author__ = 'Lorenzo Vigentini, based on Darko Miletic, Gabriele Marini' __author__ = 'Lorenzo Vigentini, based on Darko Miletic, Gabriele Marini; minor fixes by faber1971'
__copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>, Lorenzo Vigentini <l.vigentini at gmail.com>' __copyright__ = '2009-2012, Darko Miletic <darko.miletic at gmail.com>, Lorenzo Vigentini <l.vigentini at gmail.com>, faber1971'
description = 'Italian daily newspaper - v1.01 (04, January 2010); 16.05.2010 new version; 17.10.2011 new version; 14.12.2011 new version' description = 'Italian daily newspaper - v1.02 (04, January 2010); 16.05.2010 new version; 17.10.2011 new version; 14.12.2011 new version; 11.05.2012 new version'
''' '''
http://www.repubblica.it/ http://www.repubblica.it/
@ -12,14 +12,14 @@ from calibre.web.feeds.news import BasicNewsRecipe
class LaRepubblica(BasicNewsRecipe): class LaRepubblica(BasicNewsRecipe):
title = 'La Repubblica' title = 'La Repubblica'
__author__ = 'Lorenzo Vigentini, Gabriele Marini, Darko Miletic' __author__ = 'Lorenzo Vigentini, Gabriele Marini, Darko Miletic, faber1971'
description = 'il quotidiano online con tutte le notizie in tempo reale. News e ultime notizie. Tutti i settori: politica, cronaca, economia, sport, esteri, scienza, tecnologia, internet, spettacoli, musica, cultura, arte, mostre, libri, dvd, vhs, concerti, cinema, attori, attrici, recensioni, chat, cucina, mappe. Le citta di Repubblica: Roma, Milano, Bologna, Firenze, Palermo, Napoli, Bari, Torino.' description = 'il quotidiano online con tutte le notizie in tempo reale. News e ultime notizie. Tutti i settori: politica, cronaca, economia, sport, esteri, scienza, tecnologia, internet, spettacoli, musica, cultura, arte, mostre, libri, dvd, vhs, concerti, cinema, attori, attrici, recensioni, chat, cucina, mappe. Le citta di Repubblica: Roma, Milano, Bologna, Firenze, Palermo, Napoli, Bari, Torino.'
masthead_url = 'http://www.repubblica.it/static/images/homepage/2010/la-repubblica-logo-home-payoff.png' masthead_url = 'http://www.repubblica.it/static/images/homepage/2010/la-repubblica-logo-home-payoff.png'
publisher = 'Gruppo editoriale L\'Espresso' publisher = 'Gruppo editoriale L\'Espresso'
category = 'News, politics, culture, economy, general interest' category = 'News, politics, culture, economy, general interest'
language = 'it' language = 'it'
timefmt = '[%a, %d %b, %Y]' timefmt = '[%a, %d %b, %Y]'
oldest_article = 5 oldest_article = 1
encoding = 'utf8' encoding = 'utf8'
use_embedded_content = False use_embedded_content = False
no_stylesheets = True no_stylesheets = True
@ -59,6 +59,7 @@ class LaRepubblica(BasicNewsRecipe):
dict(attrs={'class':'articolo'}), dict(attrs={'class':'articolo'}),
dict(attrs={'class':'body-text'}), dict(attrs={'class':'body-text'}),
dict(name='p', attrs={'class':'disclaimer clearfix'}), dict(name='p', attrs={'class':'disclaimer clearfix'}),
dict(name='div', attrs={'id':'main'}),
dict(attrs={'id':'contA'}) dict(attrs={'id':'contA'})
] ]
@ -67,7 +68,7 @@ class LaRepubblica(BasicNewsRecipe):
dict(name=['object','link','meta','iframe','embed']), dict(name=['object','link','meta','iframe','embed']),
dict(name='span',attrs={'class':'linkindice'}), dict(name='span',attrs={'class':'linkindice'}),
dict(name='div', attrs={'class':['bottom-mobile','adv adv-middle-inline']}), dict(name='div', attrs={'class':['bottom-mobile','adv adv-middle-inline']}),
dict(name='div', attrs={'id':['rssdiv','blocco','fb-like-head']}), dict(name='div', attrs={'id':['rssdiv','blocco','fb-like-head', 'sidebar']}),
dict(name='div', attrs={'class':['utility','fb-like-button','archive-button']}), dict(name='div', attrs={'class':['utility','fb-like-button','archive-button']}),
dict(name='div', attrs={'class':'generalbox'}), dict(name='div', attrs={'class':'generalbox'}),
dict(name='ul', attrs={'id':'hystory'}) dict(name='ul', attrs={'id':'hystory'})
@ -88,11 +89,12 @@ class LaRepubblica(BasicNewsRecipe):
(u'Sport', u'http://www.repubblica.it/rss/sport/rss2.0.xml'), (u'Sport', u'http://www.repubblica.it/rss/sport/rss2.0.xml'),
(u'Calcio', u'http://www.repubblica.it/rss/sport/calcio/rss2.0.xml'), (u'Calcio', u'http://www.repubblica.it/rss/sport/calcio/rss2.0.xml'),
(u'Motori', u'http://www.repubblica.it/rss/motori/rss2.0.xml'), (u'Motori', u'http://www.repubblica.it/rss/motori/rss2.0.xml'),
(u'Edizione Roma', u'http://roma.repubblica.it/rss/rss2.0.xml'), (u'Roma', u'http://roma.repubblica.it/rss/rss2.0.xml'),
(u'Edizione Torino', u'http://torino.repubblica.it/rss/rss2.0.xml'), (u'Torino', u'http://torino.repubblica.it/rss/rss2.0.xml'),
(u'Edizione Milano', u'feed://milano.repubblica.it/rss/rss2.0.xml'), (u'Milano', u'feed://milano.repubblica.it/rss/rss2.0.xml'),
(u'Edizione Napoli', u'feed://napoli.repubblica.it/rss/rss2.0.xml'), (u'Napoli', u'feed://napoli.repubblica.it/rss/rss2.0.xml'),
(u'Edizione Palermo', u'feed://palermo.repubblica.it/rss/rss2.0.xml') (u'Bari', u'http://bari.repubblica.it/rss/rss2.0.xml'),
(u'Palermo', u'feed://palermo.repubblica.it/rss/rss2.0.xml')
] ]
def preprocess_html(self, soup): def preprocess_html(self, soup):

View File

@ -16,12 +16,12 @@ class MainichiDailyNews(BasicNewsRecipe):
publisher = 'Mainichi Daily News' publisher = 'Mainichi Daily News'
category = 'news, japan' category = 'news, japan'
language = 'ja' language = 'ja'
index = 'http://mainichi.jp/select/'
feeds = [(u'daily news', u'http://mainichi.jp/rss/etc/flash.rss')] remove_javascript = True
masthead_title = u'MAINICHI DAILY NEWS'
remove_tags_before = {'class':"NewsTitle"} remove_tags_before = {'class':"NewsTitle"}
remove_tags = [{'class':"RelatedArticle"}] remove_tags_after = {'class':"NewsBody clr"}
remove_tags_after = {'class':"Credit"}
def parse_feeds(self): def parse_feeds(self):
@ -32,9 +32,30 @@ class MainichiDailyNews(BasicNewsRecipe):
for a,curarticle in enumerate(curfeed.articles): for a,curarticle in enumerate(curfeed.articles):
if re.search(r'pheedo.jp', curarticle.url): if re.search(r'pheedo.jp', curarticle.url):
delList.append(curarticle) delList.append(curarticle)
if re.search(r'rssad.jp', curarticle.url):
delList.append(curarticle)
if len(delList)>0: if len(delList)>0:
for d in delList: for d in delList:
index = curfeed.articles.index(d) index = curfeed.articles.index(d)
curfeed.articles[index:index+1] = [] curfeed.articles[index:index+1] = []
return feeds return feeds
def parse_index(self):
feeds = []
soup = self.index_to_soup(self.index)
topstories = soup.find('ul',attrs={'class':'MaiLink'})
if topstories:
newsarticles = []
for itt in topstories.findAll('li'):
itema = itt.find('a',href=True)
if itema:
newsarticles.append({
'title' :itema.string
,'date' :''
,'url' :itema['href']
,'description':''
})
feeds.append(('latest', newsarticles))
return feeds

View File

@ -0,0 +1,67 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
'''
www.mainichi.jp
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class MainichiEnglishNews(BasicNewsRecipe):
title = u'The Mainichi'
__author__ = 'Hiroshi Miura'
oldest_article = 2
max_articles_per_feed = 40
description = 'Japanese traditional newspaper Mainichi news in English'
publisher = 'Mainichi News'
category = 'news, japan'
language = 'en_JP'
index = 'http://mainichi.jp/english/english/index.html'
remove_javascript = True
masthead_url = 'http://mainichi.jp/english/images/themainichi.png'
remove_tags_before = {'class':"NewsTitle"}
remove_tags_after = {'class':"NewsBody clr"}
def parse_feeds(self):
feeds = BasicNewsRecipe.parse_feeds(self)
for curfeed in feeds:
delList = []
for a,curarticle in enumerate(curfeed.articles):
if re.search(r'pheedo.jp', curarticle.url):
delList.append(curarticle)
if re.search(r'rssad.jp', curarticle.url):
delList.append(curarticle)
if len(delList)>0:
for d in delList:
index = curfeed.articles.index(d)
curfeed.articles[index:index+1] = []
return feeds
def parse_index(self):
feeds = []
soup = self.index_to_soup(self.index)
for section in soup.findAll('section'):
newsarticles = []
section_name = 'news'
hds = section.find('div', attrs={'class':'CategoryHead clr'})
if hds:
section_item = hds.find('h1')
if section_item:
section_name = section_item.find('a').string
items = section.find('ul', attrs={'class':'MaiLink'})
for item in items.findAll('li'):
if item:
itema = item.find('a')
newsarticles.append({
'title' :itema.string
,'date' :''
,'url' :itema['href']
,'description':''
})
feeds.append((section_name, newsarticles))
return feeds

View File

@ -1,34 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
import re
class MainichiDailyITNews(BasicNewsRecipe):
title = u'\u6bce\u65e5\u65b0\u805e(IT&\u5bb6\u96fb)'
__author__ = 'Hiroshi Miura'
oldest_article = 2
max_articles_per_feed = 100
description = 'Japanese traditional newspaper Mainichi Daily News - IT and electronics'
publisher = 'Mainichi Daily News'
category = 'news, Japan, IT, Electronics'
language = 'ja'
feeds = [(u'IT News', u'http://mainichi.pheedo.jp/f/mainichijp_electronics')]
remove_tags_before = {'class':"NewsTitle"}
remove_tags = [{'class':"RelatedArticle"}]
remove_tags_after = {'class':"Credit"}
def parse_feeds(self):
feeds = BasicNewsRecipe.parse_feeds(self)
for curfeed in feeds:
delList = []
for a,curarticle in enumerate(curfeed.articles):
if re.search(r'pheedo.jp', curarticle.url):
delList.append(curarticle)
if len(delList)>0:
for d in delList:
index = curfeed.articles.index(d)
curfeed.articles[index:index+1] = []
return feeds

View File

@ -0,0 +1,59 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
'''
www.mainichi.jp
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class MainichiDailyScienceNews(BasicNewsRecipe):
title = u'\u6bce\u65e5\u65b0\u805e(Science)'
__author__ = 'Hiroshi Miura'
oldest_article = 2
max_articles_per_feed = 20
description = 'Japanese traditional newspaper Mainichi Daily News - science'
publisher = 'Mainichi Daily News'
category = 'news, japan'
language = 'ja'
index = 'http://mainichi.jp/select/science'
remove_javascript = True
masthead_title = u'MAINICHI DAILY NEWS'
remove_tags_before = {'class':"NewsTitle"}
remove_tags_after = {'class':"NewsBody clr"}
def parse_feeds(self):
feeds = BasicNewsRecipe.parse_feeds(self)
for curfeed in feeds:
delList = []
for a,curarticle in enumerate(curfeed.articles):
if re.search(r'rssad.jp', curarticle.url):
delList.append(curarticle)
if len(delList)>0:
for d in delList:
index = curfeed.articles.index(d)
curfeed.articles[index:index+1] = []
return feeds
def parse_index(self):
feeds = []
soup = self.index_to_soup(self.index)
topstories = soup.find('ul',attrs={'class':'MaiLink'})
if topstories:
newsarticles = []
for itt in topstories.findAll('li'):
itema = itt.find('a',href=True)
if itema:
newsarticles.append({
'title' :itema.string
,'date' :''
,'url' :itema['href']
,'description':''
})
feeds.append(('Science', newsarticles))
return feeds

View File

@ -0,0 +1,42 @@
from calibre.web.feeds.news import BasicNewsRecipe
class MarineCorpsTimes(BasicNewsRecipe):
title = 'Marine Corps Times'
__author__ = 'jde'
__date__ = '16 May 2012'
__version__ = '1.0'
description = 'News of the U.S. Marine Corps'
language = 'en'
publisher = 'MarineCorpsTimes.com'
category = 'news, U.S. Marine Corps'
tags = 'news, U.S. Marine Corps'
cover_url = 'http://www.marinecorpstimes.com/images/logo_marinetimes-alert.jpg'
masthead_url = 'http://www.marinecorpstimes.com/images/logo_marinetimes-alert.jpg'
oldest_article = 7 #days
max_articles_per_feed = 25
publication_type = 'newspaper'
no_stylesheets = True
use_embedded_content = False
encoding = None
recursions = 0
needs_subscription = False
remove_javascript = True
remove_empty_feeds = True
auto_cleanup = True
feeds = [
('News', 'http://www.MarineCorpstimes.com/rss_news.php'),
('Benefits', 'http://www.MarineCorpstimes.com/rss_benefits.php'),
('Money', 'http://www.MarineCorpstimes.com/rss_money.php'),
('Careers & Education', 'http://www.MarineCorpstimes.com/rss_careers.php'),
('Community', 'http://www.MarineCorpstimes.com/rss_community.php'),
('Off Duty', 'http://www.MarineCorpstimes.com/rss_off_duty.php'),
('Entertainment', 'http://www.MarineCorpstimes.com/rss_entertainment.php'),
('Guard & Reserve', 'http://www.MarineCorpstimes.com/rss_guard.php'),
]

View File

@ -0,0 +1,41 @@
from calibre.web.feeds.news import BasicNewsRecipe
class MilitaryTimes(BasicNewsRecipe):
title = 'Military Times'
__author__ = 'jde'
__date__ = '16 May 2012'
__version__ = '1.0'
description = 'News of the U.S. Military'
language = 'en'
publisher = 'MilitaryTimes.com'
category = 'news, U.S. Military'
tags = 'news, U.S. Military'
cover_url = 'http://www.militarytimes.com/images/logo_militarytimes_landing-s.gif'
masthead_url = 'http://www.militarytimes.com/images/logo_militarytimes_landing-s.gif'
oldest_article = 7 #days
max_articles_per_feed = 25
publication_type = 'newspaper'
no_stylesheets = True
use_embedded_content = False
encoding = None
recursions = 0
needs_subscription = False
remove_javascript = True
remove_empty_feeds = True
auto_cleanup = True
feeds = [
('News', 'http://www.militarytimes.com/rss_news.php'),
('Benefits', 'http://www.militarytimes.com/rss_benefits.php'),
('Money', 'http://www.militarytimes.com/rss_money.php'),
('Careers & Education', 'http://www.militarytimes.com/rss_careers.php'),
('Community', 'http://www.militarytimes.com/rss_community.php'),
('Off Duty', 'http://www.militarytimes.com/rss_off_duty.php'),
('Entertainment', 'http://www.militarytimes.com/rss_entertainment.php'),
('Guard & Reserve', 'http://www.militarytimes.com/rss_guard.php'),
]

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*-
__license__ = 'GPL v3' __license__ = 'GPL v3'
@ -7,77 +6,21 @@ __license__ = 'GPL v3'
www.canada.com www.canada.com
''' '''
import re from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
class CanWestPaper(BasicNewsRecipe): class CanWestPaper(BasicNewsRecipe):
# un-comment the following four lines for the Victoria Times Colonist # un-comment the following three lines for the Montreal Gazette
## title = u'Victoria Times Colonist'
## url_prefix = 'http://www.timescolonist.com'
## description = u'News from Victoria, BC'
## fp_tag = 'CAN_TC'
# un-comment the following four lines for the Vancouver Province
## title = u'Vancouver Province'
## url_prefix = 'http://www.theprovince.com'
## description = u'News from Vancouver, BC'
## fp_tag = 'CAN_VP'
# un-comment the following four lines for the Vancouver Sun
## title = u'Vancouver Sun'
## url_prefix = 'http://www.vancouversun.com'
## description = u'News from Vancouver, BC'
## fp_tag = 'CAN_VS'
# un-comment the following four lines for the Edmonton Journal
## title = u'Edmonton Journal'
## url_prefix = 'http://www.edmontonjournal.com'
## description = u'News from Edmonton, AB'
## fp_tag = 'CAN_EJ'
# un-comment the following four lines for the Calgary Herald
## title = u'Calgary Herald'
## url_prefix = 'http://www.calgaryherald.com'
## description = u'News from Calgary, AB'
## fp_tag = 'CAN_CH'
# un-comment the following four lines for the Regina Leader-Post
## title = u'Regina Leader-Post'
## url_prefix = 'http://www.leaderpost.com'
## description = u'News from Regina, SK'
## fp_tag = ''
# un-comment the following four lines for the Saskatoon Star-Phoenix
## title = u'Saskatoon Star-Phoenix'
## url_prefix = 'http://www.thestarphoenix.com'
## description = u'News from Saskatoon, SK'
## fp_tag = ''
# un-comment the following four lines for the Windsor Star
## title = u'Windsor Star'
## url_prefix = 'http://www.windsorstar.com'
## description = u'News from Windsor, ON'
## fp_tag = 'CAN_'
# un-comment the following four lines for the Ottawa Citizen
## title = u'Ottawa Citizen'
## url_prefix = 'http://www.ottawacitizen.com'
## description = u'News from Ottawa, ON'
## fp_tag = 'CAN_OC'
# un-comment the following four lines for the Montreal Gazette
title = u'Montreal Gazette' title = u'Montreal Gazette'
url_prefix = 'http://www.montrealgazette.com'
description = u'News from Montreal, QC' description = u'News from Montreal, QC'
fp_tag = 'CAN_MG'
language = 'en_CA' language = 'en_CA'
__author__ = 'Nick Redding' __author__ = 'Nick Redding'
no_stylesheets = True no_stylesheets = True
auto_cleanup = True
auto_cleanup_keep = '//*[@id="imageBox"]'
timefmt = ' [%b %d]' timefmt = ' [%b %d]'
extra_css = ''' extra_css = '''
.timestamp { font-size:xx-small; display: block; } .timestamp { font-size:xx-small; display: block; }
@ -87,135 +30,19 @@ class CanWestPaper(BasicNewsRecipe):
.byline { font-size:xx-small; } .byline { font-size:xx-small; }
#photocaption { font-size: small; font-style: italic } #photocaption { font-size: small; font-style: italic }
#photocredit { font-size: xx-small; }''' #photocredit { font-size: xx-small; }'''
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
remove_tags = [{'class':'comments'},
dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
dict(name='div', attrs={'class':'rule_grey_solid'}),
dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
def get_cover_url(self):
from datetime import timedelta, date
if self.fp_tag=='':
return None
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
br = BasicNewsRecipe.get_browser()
daysback=1
try:
br.open(cover)
except:
while daysback<7:
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str((date.today() - timedelta(days=daysback)).day)+'/lg/'+self.fp_tag+'.jpg'
br = BasicNewsRecipe.get_browser()
try:
br.open(cover)
except:
daysback = daysback+1
continue
break
if daysback==7:
self.log("\nCover unavailable")
cover = None
return cover
def fixChars(self,string):
# Replace lsquo (\x91)
fixed = re.sub("\x91","",string)
# Replace rsquo (\x92)
fixed = re.sub("\x92","",fixed)
# Replace ldquo (\x93)
fixed = re.sub("\x93","“",fixed)
# Replace rdquo (\x94)
fixed = re.sub("\x94","”",fixed)
# Replace ndash (\x96)
fixed = re.sub("\x96","",fixed)
# Replace mdash (\x97)
fixed = re.sub("\x97","—",fixed)
fixed = re.sub("&#x2019;","",fixed)
return fixed
def massageNCXText(self, description):
# Kindle TOC descriptions won't render certain characters
if description:
massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
# Replace '&' with '&'
massaged = re.sub("&","&", massaged)
return self.fixChars(massaged)
else:
return description
def populate_article_metadata(self, article, soup, first):
if first:
picdiv = soup.find('body').find('img')
if picdiv is not None:
self.add_toc_thumbnail(article,re.sub(r'links\\link\d+\\','',picdiv['src']))
xtitle = article.text_summary.strip()
if len(xtitle) == 0:
desc = soup.find('meta',attrs={'property':'og:description'})
if desc is not None:
article.summary = article.text_summary = desc['content']
def strip_anchors(self,soup):
paras = soup.findAll(True)
for para in paras:
aTags = para.findAll('a')
for a in aTags:
if a.img is None:
a.replaceWith(a.renderContents().decode('cp1252','replace'))
return soup
def preprocess_html(self, soup):
return self.strip_anchors(soup)
def parse_index(self): feeds = [
soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html') ('News',
'http://rss.canada.com/get/?F297'),
('Sports',
'http://rss.canada.com/get/?F299'),
('Entertainment',
'http://rss.canada.com/get/?F7366'),
('Business',
'http://rss.canada.com/get/?F6939'),
]
articles = {}
key = 'News'
ans = ['News']
# Find each instance of class="sectiontitle", class="featurecontent"
for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
#self.log(" div class = %s" % divtag['class'])
if divtag['class'].startswith('section_title'):
# div contains section title
if not divtag.h3:
continue
key = self.tag_to_string(divtag.h3,False)
ans.append(key)
self.log("Section name %s" % key)
continue
# div contains article data
h1tag = divtag.find('h1')
if not h1tag:
continue
atag = h1tag.find('a',href=True)
if not atag:
continue
url = self.url_prefix+'/news/todays-paper/'+atag['href']
#self.log("Section %s" % key)
#self.log("url %s" % url)
title = self.tag_to_string(atag,False)
#self.log("title %s" % title)
pubdate = ''
description = ''
ptag = divtag.find('p');
if ptag:
description = self.tag_to_string(ptag,False)
#self.log("description %s" % description)
author = ''
autag = divtag.find('h4')
if autag:
author = self.tag_to_string(autag,False)
#self.log("author %s" % author)
if not articles.has_key(key):
articles[key] = []
articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
return ans

View File

@ -0,0 +1,22 @@
from calibre.web.feeds.news import BasicNewsRecipe
class Nachdenkseiten(BasicNewsRecipe):
title = u'Nachdenkseiten'
__author__ = 'jrda'
publisher = 'www.nachdenkseiten.de Albrecht Mueller und Dr. Wolfgang Lieb'
description = 'NachDenkSeiten - Die kritische Website'
category = 'news'
oldest_article = 7
use_embedded_content = False
language = 'de'
timefmt = ''
max_articles_per_feed = 6
no_stylesheets = True
encoding = 'utf-8'
remove_javascript = True
keep_only_tags = [
{'id':'content'}]
feeds = [
('News', 'http://www.nachdenkseiten.de/?feed=rss2'),
]

View File

@ -0,0 +1,16 @@
__version__ = 'v1.0'
__date__ = '5, May 2012'
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1336226255(BasicNewsRecipe):
title = u'National Geographic'
__author__ = 'faber1971'
description = 'Science magazine'
language = 'it'
oldest_article = 15
max_articles_per_feed = 100
auto_cleanup = True
remove_tags = [dict(name='div',attrs={'class':'banner-abbonamenti'})]
feeds = [(u'National Geographic', u'http://www.nationalgeographic.it/rss/all/rss2.0.xml')]

42
recipes/navy_times.recipe Normal file
View File

@ -0,0 +1,42 @@
from calibre.web.feeds.news import BasicNewsRecipe
class NavyTimes(BasicNewsRecipe):
title = 'Navy Times'
__author__ = 'jde'
__date__ = '16 May 2012'
__version__ = '1.0'
description = 'News of the U.S. Navy'
language = 'en'
publisher = 'NavyTimes.com'
category = 'news, U.S. Navy'
tags = 'news, U.S. Navy'
cover_url = 'http://www.navytimes.com/images/logo_navytimes_alert.jpg'
masthead_url = 'http://www.navytimes.com/images/logo_navytimes_alert.jpg'
oldest_article = 7 #days
max_articles_per_feed = 25
publication_type = 'newspaper'
no_stylesheets = True
use_embedded_content = False
encoding = None
recursions = 0
needs_subscription = False
remove_javascript = True
remove_empty_feeds = True
auto_cleanup = True
feeds = [
('News', 'http://www.navytimes.com/rss_news.php'),
('Benefits', 'http://www.navytimes.com/rss_benefits.php'),
('Money', 'http://www.navytimes.com/rss_money.php'),
('Careers & Education', 'http://www.navytimes.com/rss_careers.php'),
('Community', 'http://www.navytimes.com/rss_community.php'),
('Off Duty', 'http://www.navytimes.com/rss_off_duty.php'),
('Entertainment', 'http://www.navytimes.com/rss_entertainment.php'),
('Guard & Reserve', 'http://www.navytimes.com/rss_guard.php'),
]

View File

@ -0,0 +1,20 @@
from calibre.web.feeds.news import BasicNewsRecipe
class NewsBusters(BasicNewsRecipe):
title = u'News Busters'
description = 'Exposing and Combating Liberal Media Bias'
__author__ = 'jde'
oldest_article = 1#day
max_articles_per_feed = 100
cover_url = "http://newsbusters.org/sites/all/themes/genesis_nb/images/nb-mrc.png"
language = 'en'
encoding = 'utf8'
needs_subscription = False
remove_javascript = True
recursions = 0
use_embedded_content = False
no_stylesheets = True
auto_cleanup = True
feeds = [(u'Blog', u'http://www.newsbusters.org/rss.xml')]

View File

@ -9,10 +9,10 @@ import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Pescanik(BasicNewsRecipe): class Pescanik(BasicNewsRecipe):
title = 'Peščanik' title = u'Peščanik'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'Peščanik je udruženje građana osnovano 2006. godine. Glavni proizvod Peščanika je radio emisija koja je emitovana na Radiju B92 od 02.02.2000. do 16.06.2011, a od septembra 2011. se emituje na osam radio stanica u Srbiji, Crnoj Gori i BiH' description = u'Peščanik je udruženje građana osnovano 2006. godine. Glavni proizvod Peščanika je radio emisija koja je emitovana na Radiju B92 od 02.02.2000. do 16.06.2011, a od septembra 2011. se emituje na osam radio stanica u Srbiji, Crnoj Gori i BiH'
publisher = 'Peščanik' publisher = u'Peščanik'
category = 'news, politics, Serbia' category = 'news, politics, Serbia'
oldest_article = 10 oldest_article = 10
max_articles_per_feed = 100 max_articles_per_feed = 100

View File

@ -1,5 +1,5 @@
""" """
Pocket Calibre Recipe v1.0 Pocket Calibre Recipe v1.2
""" """
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = ''' __copyright__ = '''
@ -73,6 +73,9 @@ class Pocket(BasicNewsRecipe):
articles = [] articles = []
soup = self.index_to_soup(feedurl) soup = self.index_to_soup(feedurl)
ritem = soup.find('ul', attrs={'id':'list'}) ritem = soup.find('ul', attrs={'id':'list'})
if ritem is None:
self.log.exception("Page %s skipped: invalid HTML" % (feedtitle if feedtitle else feedurl))
continue
for item in reversed(ritem.findAll('li')): for item in reversed(ritem.findAll('li')):
if articlesToGrab < 1: if articlesToGrab < 1:
break break
@ -94,7 +97,12 @@ class Pocket(BasicNewsRecipe):
self.readList.append(readLink) self.readList.append(readLink)
totalfeeds.append((feedtitle, articles)) totalfeeds.append((feedtitle, articles))
if len(self.readList) < self.minimum_articles: if len(self.readList) < self.minimum_articles:
raise Exception("Not enough articles in RIL! Change minimum_articles or add more.") self.mark_as_read_after_dl = False
if hasattr(self, 'abort_recipe_processing'):
self.abort_recipe_processing("Only %d articles retrieved, minimum_articles not reached" % len(self.readList))
else:
self.log.exception("Only %d articles retrieved, minimum_articles not reached" % len(self.readList))
return []
return totalfeeds return totalfeeds
def mark_as_read(self, markList): def mark_as_read(self, markList):

View File

@ -0,0 +1,22 @@
__license__ = 'GPL v3'
__author__ = 'Vakya'
__version__ = 'v1.0'
__date__ = '14, May 2012'
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1336226255(BasicNewsRecipe):
title = u'Revista Summa'
publisher = u'Summa'
__author__ = 'Vakya'
description = 'Informacion regional sobre economia y negocios'
language = 'es'
oldest_article = 15
max_articles_per_feed = 100
auto_cleanup = True
remove_tags_before = dict(name='h1')
remove_tags_after = dict(name='label')
feeds = [(u'Revista Summa', u'http://www.revistasumma.com/rss/rss-v2.0.rss')]

View File

@ -1,3 +1,4 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
''' '''
@ -15,6 +16,8 @@ class Spiegel_int(BasicNewsRecipe):
language = 'en_DE' language = 'en_DE'
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
auto_cleanup = True
auto_cleanup_keep = '//*[@id="spArticleTopAsset"]'
encoding = 'cp1252' encoding = 'cp1252'
publisher = 'SPIEGEL ONLINE GmbH' publisher = 'SPIEGEL ONLINE GmbH'
category = 'news, politics, Germany' category = 'news, politics, Germany'
@ -43,25 +46,25 @@ class Spiegel_int(BasicNewsRecipe):
.spPhotoGallery{font-size:x-small; color:#990000 ;} .spPhotoGallery{font-size:x-small; color:#990000 ;}
''' '''
keep_only_tags = [dict(attrs={'id':'spArticleContent'})] #keep_only_tags = [dict(attrs={'id':'spArticleContent'})]
remove_tags_after = dict(attrs={'id':'spArticleBody'}) #remove_tags_after = dict(attrs={'id':'spArticleBody'})
remove_tags = [dict(name=['meta','base','iframe','embed','object'])] #remove_tags = [dict(name=['meta','base','iframe','embed','object'])]
remove_attributes = ['clear'] #remove_attributes = ['clear']
feeds = [(u'Spiegel Online', u'http://www.spiegel.de/international/index.rss')] feeds = [(u'Spiegel Online', u'http://www.spiegel.de/international/index.rss')]
def print_version(self, url): #def print_version(self, url):
main, sep, rest = url.rpartition(',') #main, sep, rest = url.rpartition(',')
rmain, rsep, rrest = main.rpartition(',') #rmain, rsep, rrest = main.rpartition(',')
return rmain + ',druck-' + rrest + ',' + rest #return rmain + ',druck-' + rrest + ',' + rest
def preprocess_html(self, soup): #def preprocess_html(self, soup):
for item in soup.findAll(style=True): #for item in soup.findAll(style=True):
del item['style'] #del item['style']
for item in soup.findAll('a'): #for item in soup.findAll('a'):
if item.string is not None: #if item.string is not None:
str = item.string #str = item.string
item.replaceWith(str) #item.replaceWith(str)
else: #else:
str = self.tag_to_string(item) #str = self.tag_to_string(item)
item.replaceWith(str) #item.replaceWith(str)
return soup #return soup

View File

@ -0,0 +1,39 @@
''' Stars and Stripes
'''
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
class AdvancedUserRecipe1308791026(BasicNewsRecipe):
title = u'Stars and Stripes'
oldest_article = 3
max_articles_per_feed = 100
__author__ = 'adoucette'
description = 'The U.S. militarys independent news source, featuring exclusive reports from Iraq, Afghanistan, Europe and the Far East.'
no_stylesheets = True
#delay = 1
use_embedded_content = False
encoding = 'utf8'
publisher = 'stripes.com'
category = 'news, US, world'
language = 'en_US'
publication_type = 'newsportal'
preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
,'linearize_tables': True
}
keep_only_tags = [dict(name='div', attrs={'class':['element article']})]
remove_tags_after = [dict(name='ul', attrs={'class':'inline-bookmarks'})]
feeds = [
(u'News', u'http://feeds.stripes.com/starsandstripes/news'),
(u'Sports', u'http://feeds.stripes.com/starsandstripes/sports'),
(u'Military Life', u'http://feeds.stripes.com/starsandstripes/militarylife'),
(u'Opinion', u'http://feeds.stripes.com/starsandstripes/opinion'),
(u'Travel', u'http://feeds.stripes.com/starsandstripes/travel')
]

View File

@ -0,0 +1,92 @@
__license__ = 'GPL v3'
__copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
'''
www.strategic-culture.org
'''
import time
from calibre import strftime
from calibre.web.feeds.recipes import BasicNewsRecipe
class StrategicCulture(BasicNewsRecipe):
title = 'Strategic Culture Foundation'
__author__ = 'Darko Miletic'
description = 'Online Journal'
publisher = 'Strategic Culture Foundation'
category = 'news, politics'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
language = 'en'
publication_type = 'newsportal'
masthead_url = 'http://www.strategic-culture.org/img/logo.jpg'
extra_css = '''
body{font-family: Arial, sans-serif}
h1{font-family: "Times New Roman",Times,serif}
img{margin-bottom: 0.8em}
'''
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
keep_only_tags = [
dict(name=['h1','p'])
,dict(name='div', attrs={'id':'cke_pastebin'})
]
remove_tags = [dict(name=['object','link','base','meta','iframe'])]
feeds = [
(u'News' , u'http://www.strategic-culture.org/blocks/news.html' )
,(u'Politics' , u'http://www.strategic-culture.org/rubrics/politics.html' )
,(u'Economics' , u'http://www.strategic-culture.org/rubrics/economics.html' )
,(u'History & Culture', u'http://www.strategic-culture.org/rubrics/history-and-culture.html')
,(u'Columnists' , u'http://www.strategic-culture.org/rubrics/columnists.html' )
]
def print_version(self, url):
return url.replace('-culture.org/news/','-culture.org/pview/')
def parse_index(self):
totalfeeds = []
lfeeds = self.get_feeds()
for feedobj in lfeeds:
feedtitle, feedurl = feedobj
self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
articles = []
soup = self.index_to_soup(feedurl)
if feedurl.endswith('news.html'):
clname = 'sini14'
else:
clname = 'h22'
checker = []
for item in soup.findAll('a', attrs={'class':clname}):
atag = item
url = atag['href']
title = self.tag_to_string(atag)
description = ''
daypart = url.rpartition('/')[0]
mpart,sep,day = daypart.rpartition('/')
ypart,sep,month = mpart.rpartition('/')
year = ypart.rpartition('/')[2]
date = strftime("%a, %d %b %Y %H:%M:%S +0000", time.strptime(day + "/" + month + "/" + year, "%d/%m/%Y"))
if url not in checker:
checker.append(url)
articles.append({
'title' :title
,'date' :date
,'url' :url
,'description':description
})
totalfeeds.append((feedtitle, articles))
return totalfeeds

Binary file not shown.

View File

@ -506,3 +506,17 @@ change_book_details_font_size_by = 0
# No compile: compile_gpm_templates = False # No compile: compile_gpm_templates = False
compile_gpm_templates = True compile_gpm_templates = True
#: What format to default to when using the Tweak feature
# The Tweak feature of calibre allows direct editing of a book format.
# If multiple formats are available, calibre will offer you a choice
# of formats, defaulting to your preferred output format if it is available.
# Set this tweak to a specific value of 'EPUB' or 'AZW3' to always default
# to that format rather than your output format preference.
# Set to a value of 'remember' to use whichever format you chose last time you
# used the Tweak feature.
# Examples:
# default_tweak_format = None (Use output format)
# default_tweak_format = 'EPUB'
# default_tweak_format = 'remember'
default_tweak_format = None

View File

@ -20,7 +20,11 @@ vipy.session.initialize(project_name='calibre', src_dir=src_dir,
project_dir=project_dir, base_dir=project_dir) project_dir=project_dir, base_dir=project_dir)
def recipe_title_callback(raw): def recipe_title_callback(raw):
return eval(raw.decode('utf-8')).replace(' ', '_') try:
return eval(raw.decode('utf-8')).replace(u' ', u'_')
except:
print ('Failed to decode recipe title: %r'%raw)
raise
vipy.session.add_content_browser('<leader>r', 'Recipe', vipy.session.add_content_browser('<leader>r', 'Recipe',
vipy.session.glob_based_iterator(os.path.join(project_dir, 'recipes', '*.recipe')), vipy.session.glob_based_iterator(os.path.join(project_dir, 'recipes', '*.recipe')),

View File

@ -22,7 +22,8 @@ Do not modify it unless you know what you are doing.
import sys, os import sys, os
path = os.environ.get('CALIBRE_PYTHON_PATH', {path!r}) path = os.environ.get('CALIBRE_PYTHON_PATH', {path!r})
sys.path.insert(0, path) if path not in sys.path:
sys.path.insert(0, path)
sys.resources_location = os.environ.get('CALIBRE_RESOURCES_PATH', {resources!r}) sys.resources_location = os.environ.get('CALIBRE_RESOURCES_PATH', {resources!r})
sys.extensions_location = os.environ.get('CALIBRE_EXTENSIONS_PATH', {extensions!r}) sys.extensions_location = os.environ.get('CALIBRE_EXTENSIONS_PATH', {extensions!r})

View File

@ -41,8 +41,8 @@ binary_includes = [
'/usr/lib/libgthread-2.0.so.0', '/usr/lib/libgthread-2.0.so.0',
'/usr/lib/libpng14.so.14', '/usr/lib/libpng14.so.14',
'/usr/lib/libexslt.so.0', '/usr/lib/libexslt.so.0',
MAGICK_PREFIX+'/lib/libMagickWand.so.4', MAGICK_PREFIX+'/lib/libMagickWand.so.5',
MAGICK_PREFIX+'/lib/libMagickCore.so.4', MAGICK_PREFIX+'/lib/libMagickCore.so.5',
'/usr/lib/libgcrypt.so.11', '/usr/lib/libgcrypt.so.11',
'/usr/lib/libgpg-error.so.0', '/usr/lib/libgpg-error.so.0',
'/usr/lib/libphonon.so.4', '/usr/lib/libphonon.so.4',

View File

@ -429,7 +429,7 @@ class Py2App(object):
def add_imagemagick(self): def add_imagemagick(self):
info('\nAdding ImageMagick') info('\nAdding ImageMagick')
for x in ('Wand', 'Core'): for x in ('Wand', 'Core'):
self.install_dylib(os.path.join(SW, 'lib', 'libMagick%s.4.dylib'%x)) self.install_dylib(os.path.join(SW, 'lib', 'libMagick%s.5.dylib'%x))
idir = glob.glob(os.path.join(SW, 'lib', 'ImageMagick-*'))[-1] idir = glob.glob(os.path.join(SW, 'lib', 'ImageMagick-*'))[-1]
dest = os.path.join(self.frameworks_dir, 'ImageMagick') dest = os.path.join(self.frameworks_dir, 'ImageMagick')
if os.path.exists(dest): if os.path.exists(dest):

View File

@ -18,7 +18,7 @@ QT_DIR = 'Q:\\Qt\\4.8.1'
QT_DLLS = ['Core', 'Gui', 'Network', 'Svg', 'WebKit', 'Xml', 'XmlPatterns'] QT_DLLS = ['Core', 'Gui', 'Network', 'Svg', 'WebKit', 'Xml', 'XmlPatterns']
LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll' LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll'
SW = r'C:\cygwin\home\kovid\sw' SW = r'C:\cygwin\home\kovid\sw'
IMAGEMAGICK = os.path.join(SW, 'build', 'ImageMagick-6.6.6', IMAGEMAGICK = os.path.join(SW, 'build', 'ImageMagick-6.7.6',
'VisualMagick', 'bin') 'VisualMagick', 'bin')
CRT = r'C:\Microsoft.VC90.CRT' CRT = r'C:\Microsoft.VC90.CRT'

View File

@ -336,6 +336,8 @@ Index: src/PdfFiltersPrivate.cpp
ImageMagick ImageMagick
-------------- --------------
Get the source from: http://www.imagemagick.org/download/windows/ImageMagick-windows.zip
Edit VisualMagick/configure/configure.cpp to set Edit VisualMagick/configure/configure.cpp to set
int projectType = MULTITHREADEDDLL; int projectType = MULTITHREADEDDLL;
@ -349,7 +351,10 @@ Edit magick/magick-config.h
Undefine ProvideDllMain and MAGICKCORE_X11_DELEGATE Undefine ProvideDllMain and MAGICKCORE_X11_DELEGATE
Now open VisualMagick/VisualDynamicMT.sln set to Release Now open VisualMagick/VisualDynamicMT.sln set to Release
Remove the CORE_xlib and UTIL_Imdisplay project CORE_Magick++ Remove the CORE_xlib, UTIL_Imdisplay and CORE_Magick++ projects.
F7 for build project, you will get one error due to the removal of xlib, ignore
it.
calibre calibre
--------- ---------

View File

@ -12,14 +12,14 @@ msgstr ""
"Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-" "Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
"devel@lists.alioth.debian.org>\n" "devel@lists.alioth.debian.org>\n"
"POT-Creation-Date: 2011-11-25 14:01+0000\n" "POT-Creation-Date: 2011-11-25 14:01+0000\n"
"PO-Revision-Date: 2012-04-28 10:42+0000\n" "PO-Revision-Date: 2012-05-03 16:09+0000\n"
"Last-Translator: Ferran Rius <frius64@hotmail.com>\n" "Last-Translator: Dídac Rios <didac@niorcs.com>\n"
"Language-Team: Catalan <linux@softcatala.org>\n" "Language-Team: Catalan <linux@softcatala.org>\n"
"MIME-Version: 1.0\n" "MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n" "Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n" "Content-Transfer-Encoding: 8bit\n"
"X-Launchpad-Export-Date: 2012-04-29 04:45+0000\n" "X-Launchpad-Export-Date: 2012-05-04 04:47+0000\n"
"X-Generator: Launchpad (build 15149)\n" "X-Generator: Launchpad (build 15195)\n"
"Language: ca\n" "Language: ca\n"
#. name for aaa #. name for aaa
@ -9936,11 +9936,11 @@ msgstr "Ibani"
#. name for ica #. name for ica
msgid "Ede Ica" msgid "Ede Ica"
msgstr "" msgstr "Ede Ica"
#. name for ich #. name for ich
msgid "Etkywan" msgid "Etkywan"
msgstr "" msgstr "Etkywan"
#. name for icl #. name for icl
msgid "Icelandic Sign Language" msgid "Icelandic Sign Language"
@ -9952,7 +9952,7 @@ msgstr "Anglès crioll; Islander"
#. name for ida #. name for ida
msgid "Idakho-Isukha-Tiriki" msgid "Idakho-Isukha-Tiriki"
msgstr "" msgstr "Idakho-Isukha-Tiriki"
#. name for idb #. name for idb
msgid "Indo-Portuguese" msgid "Indo-Portuguese"
@ -9960,15 +9960,15 @@ msgstr "Indo-portuguès"
#. name for idc #. name for idc
msgid "Idon" msgid "Idon"
msgstr "" msgstr "Idon"
#. name for idd #. name for idd
msgid "Ede Idaca" msgid "Ede Idaca"
msgstr "" msgstr "Ede Idaca"
#. name for ide #. name for ide
msgid "Idere" msgid "Idere"
msgstr "" msgstr "Idere"
#. name for idi #. name for idi
msgid "Idi" msgid "Idi"
@ -9976,43 +9976,43 @@ msgstr ""
#. name for ido #. name for ido
msgid "Ido" msgid "Ido"
msgstr "" msgstr "ido"
#. name for idr #. name for idr
msgid "Indri" msgid "Indri"
msgstr "" msgstr "Indri"
#. name for ids #. name for ids
msgid "Idesa" msgid "Idesa"
msgstr "" msgstr "Idesa"
#. name for idt #. name for idt
msgid "Idaté" msgid "Idaté"
msgstr "" msgstr "Idaté"
#. name for idu #. name for idu
msgid "Idoma" msgid "Idoma"
msgstr "" msgstr "Idoma"
#. name for ifa #. name for ifa
msgid "Ifugao; Amganad" msgid "Ifugao; Amganad"
msgstr "" msgstr "Ifugao; Amganad"
#. name for ifb #. name for ifb
msgid "Ifugao; Batad" msgid "Ifugao; Batad"
msgstr "" msgstr "Ifugao; Batad"
#. name for ife #. name for ife
msgid "Ifè" msgid "Ifè"
msgstr "" msgstr "Ifè"
#. name for iff #. name for iff
msgid "Ifo" msgid "Ifo"
msgstr "" msgstr "Ifo"
#. name for ifk #. name for ifk
msgid "Ifugao; Tuwali" msgid "Ifugao; Tuwali"
msgstr "" msgstr "Ifugao; Tuwali"
#. name for ifm #. name for ifm
msgid "Teke-Fuumu" msgid "Teke-Fuumu"
@ -10020,15 +10020,15 @@ msgstr "Teke; Fuumu"
#. name for ifu #. name for ifu
msgid "Ifugao; Mayoyao" msgid "Ifugao; Mayoyao"
msgstr "" msgstr "Ifugao; Mayoyao"
#. name for ify #. name for ify
msgid "Kallahan; Keley-I" msgid "Kallahan; Keley-I"
msgstr "" msgstr "Kallahan; Keley-I"
#. name for igb #. name for igb
msgid "Ebira" msgid "Ebira"
msgstr "" msgstr "Ebira"
#. name for ige #. name for ige
msgid "Igede" msgid "Igede"

View File

@ -8,14 +8,14 @@ msgstr ""
"Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-" "Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
"devel@lists.alioth.debian.org>\n" "devel@lists.alioth.debian.org>\n"
"POT-Creation-Date: 2011-11-25 14:01+0000\n" "POT-Creation-Date: 2011-11-25 14:01+0000\n"
"PO-Revision-Date: 2012-03-25 12:19+0000\n" "PO-Revision-Date: 2012-05-03 14:49+0000\n"
"Last-Translator: Radan Putnik <srastral@gmail.com>\n" "Last-Translator: Иван Старчевић <ivanstar61@gmail.com>\n"
"Language-Team: Serbian <gnu@prevod.org>\n" "Language-Team: Serbian <gnu@prevod.org>\n"
"MIME-Version: 1.0\n" "MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n" "Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n" "Content-Transfer-Encoding: 8bit\n"
"X-Launchpad-Export-Date: 2012-03-26 04:37+0000\n" "X-Launchpad-Export-Date: 2012-05-04 04:47+0000\n"
"X-Generator: Launchpad (build 15008)\n" "X-Generator: Launchpad (build 15195)\n"
"Language: sr\n" "Language: sr\n"
#. name for aaa #. name for aaa
@ -6152,7 +6152,7 @@ msgstr ""
#. name for deu #. name for deu
msgid "German" msgid "German"
msgstr "немачки" msgstr "Немачки"
#. name for dev #. name for dev
msgid "Domung" msgid "Domung"
@ -8416,7 +8416,7 @@ msgstr "ирски"
#. name for glg #. name for glg
msgid "Galician" msgid "Galician"
msgstr "" msgstr "Галицијски"
#. name for glh #. name for glh
msgid "Pashayi; Northwest" msgid "Pashayi; Northwest"
@ -8472,11 +8472,11 @@ msgstr ""
#. name for gmh #. name for gmh
msgid "German; Middle High (ca. 1050-1500)" msgid "German; Middle High (ca. 1050-1500)"
msgstr "" msgstr "Немачки; средње високи (ca. 1050-1500)"
#. name for gml #. name for gml
msgid "German; Middle Low" msgid "German; Middle Low"
msgstr "" msgstr "Немачки; средње низак"
#. name for gmm #. name for gmm
msgid "Gbaya-Mbodomo" msgid "Gbaya-Mbodomo"
@ -8792,7 +8792,7 @@ msgstr ""
#. name for gsg #. name for gsg
msgid "German Sign Language" msgid "German Sign Language"
msgstr "" msgstr "Немачки језик"
#. name for gsl #. name for gsl
msgid "Gusilay" msgid "Gusilay"
@ -8820,7 +8820,7 @@ msgstr ""
#. name for gsw #. name for gsw
msgid "German; Swiss" msgid "German; Swiss"
msgstr "" msgstr "Немачки ; Швајцарска"
#. name for gta #. name for gta
msgid "Guató" msgid "Guató"
@ -17954,7 +17954,7 @@ msgstr ""
#. name for nds #. name for nds
msgid "German; Low" msgid "German; Low"
msgstr "" msgstr "Немачки; низак"
#. name for ndt #. name for ndt
msgid "Ndunga" msgid "Ndunga"
@ -18778,7 +18778,7 @@ msgstr ""
#. name for nno #. name for nno
msgid "Norwegian Nynorsk" msgid "Norwegian Nynorsk"
msgstr "норвешки модерни" msgstr "Норвешки модерни"
#. name for nnp #. name for nnp
msgid "Naga; Wancho" msgid "Naga; Wancho"
@ -18830,7 +18830,7 @@ msgstr ""
#. name for nob #. name for nob
msgid "Norwegian Bokmål" msgid "Norwegian Bokmål"
msgstr "" msgstr "Норвешки (књижевни)"
#. name for noc #. name for noc
msgid "Nuk" msgid "Nuk"
@ -18886,7 +18886,7 @@ msgstr ""
#. name for nor #. name for nor
msgid "Norwegian" msgid "Norwegian"
msgstr "норвешки" msgstr "Норвешки"
#. name for nos #. name for nos
msgid "Nisu; Eastern" msgid "Nisu; Eastern"
@ -19066,7 +19066,7 @@ msgstr ""
#. name for nsl #. name for nsl
msgid "Norwegian Sign Language" msgid "Norwegian Sign Language"
msgstr "" msgstr "Норвешки језик"
#. name for nsm #. name for nsm
msgid "Naga; Sumi" msgid "Naga; Sumi"
@ -20406,7 +20406,7 @@ msgstr ""
#. name for pdc #. name for pdc
msgid "German; Pennsylvania" msgid "German; Pennsylvania"
msgstr "" msgstr "Немачки ; Пенсилванија"
#. name for pdi #. name for pdi
msgid "Pa Di" msgid "Pa Di"
@ -22086,7 +22086,7 @@ msgstr ""
#. name for rmg #. name for rmg
msgid "Norwegian; Traveller" msgid "Norwegian; Traveller"
msgstr "" msgstr "Норвешки; путнички"
#. name for rmh #. name for rmh
msgid "Murkim" msgid "Murkim"
@ -22871,7 +22871,7 @@ msgstr ""
#. name for sgg #. name for sgg
msgid "Swiss-German Sign Language" msgid "Swiss-German Sign Language"
msgstr "" msgstr "Швајцарско-Немачки језик"
#. name for sgh #. name for sgh
msgid "Shughni" msgid "Shughni"

View File

@ -26,7 +26,7 @@ def get_opts_from_parser(parser):
class Coffee(Command): # {{{ class Coffee(Command): # {{{
description = 'Compile coffeescript files into javascript' description = 'Compile coffeescript files into javascript'
COFFEE_DIRS = {'ebooks/oeb/display': 'display'} COFFEE_DIRS = ('ebooks/oeb/display',)
def add_options(self, parser): def add_options(self, parser):
parser.add_option('--watch', '-w', action='store_true', default=False, parser.add_option('--watch', '-w', action='store_true', default=False,
@ -47,47 +47,67 @@ class Coffee(Command): # {{{
except KeyboardInterrupt: except KeyboardInterrupt:
pass pass
def show_js(self, jsfile): def show_js(self, raw):
from pygments.lexers import JavascriptLexer from pygments.lexers import JavascriptLexer
from pygments.formatters import TerminalFormatter from pygments.formatters import TerminalFormatter
from pygments import highlight from pygments import highlight
with open(jsfile, 'rb') as f:
raw = f.read()
print highlight(raw, JavascriptLexer(), TerminalFormatter()) print highlight(raw, JavascriptLexer(), TerminalFormatter())
def do_coffee_compile(self, opts, timestamp=False, ignore_errors=False): def do_coffee_compile(self, opts, timestamp=False, ignore_errors=False):
for toplevel, dest in self.COFFEE_DIRS.iteritems(): src_files = {}
dest = self.j(self.RESOURCES, dest) for src in self.COFFEE_DIRS:
for x in glob.glob(self.j(self.SRC, __appname__, toplevel, '*.coffee')): for f in glob.glob(self.j(self.SRC, __appname__, src,
js = self.j(dest, os.path.basename(x.rpartition('.')[0]+'.js')) '*.coffee')):
if self.newer(js, x): bn = os.path.basename(f).rpartition('.')[0]
arcname = src.replace('/', '.') + '.' + bn + '.js'
src_files[arcname] = (f, os.stat(f).st_mtime)
existing = {}
dest = self.j(self.RESOURCES, 'compiled_coffeescript.zip')
if os.path.exists(dest):
with zipfile.ZipFile(dest, 'r') as zf:
for info in zf.infolist():
mtime = time.mktime(info.date_time + (0, 0, -1))
arcname = info.filename
if (arcname in src_files and src_files[arcname][1] <
mtime):
existing[arcname] = (zf.read(info), info)
todo = set(src_files) - set(existing)
updated = {}
for arcname in todo:
name = arcname.rpartition('.')[0]
print ('\t%sCompiling %s'%(time.strftime('[%H:%M:%S] ') if print ('\t%sCompiling %s'%(time.strftime('[%H:%M:%S] ') if
timestamp else '', os.path.basename(x))) timestamp else '', name))
src = src_files[arcname][0]
try: try:
cs = subprocess.check_output(self.compiler + js = subprocess.check_output(self.compiler +
[x]).decode('utf-8') [src]).decode('utf-8')
except Exception as e: except Exception as e:
print ('\n\tCompilation of %s failed'%os.path.basename(x)) print ('\n\tCompilation of %s failed'%name)
print (e) print (e)
if ignore_errors: if ignore_errors:
with open(js, 'wb') as f: js = u'# Compilation from coffeescript failed'
f.write('# Compilation from coffeescript failed')
else: else:
raise SystemExit(1) raise SystemExit(1)
else: else:
with open(js, 'wb') as f:
f.write(cs.encode('utf-8'))
if opts.show_js: if opts.show_js:
self.show_js(js) self.show_js(js)
print ('#'*80) print ('#'*80)
print ('#'*80) print ('#'*80)
zi = zipfile.ZipInfo()
zi.filename = arcname
zi.date_time = time.localtime()[:6]
updated[arcname] = (js.encode('utf-8'), zi)
if updated:
with zipfile.ZipFile(dest, 'w', zipfile.ZIP_STORED) as zf:
for raw, zi in updated.itervalues():
zf.writestr(zi, raw)
for raw, zi in existing.itervalues():
zf.writestr(zi, raw)
def clean(self): def clean(self):
for toplevel, dest in self.COFFEE_DIRS.iteritems(): x = self.j(self.RESOURCES, 'compiled_coffeescript.zip')
dest = self.j(self.RESOURCES, dest)
for x in glob.glob(self.j(self.SRC, __appname__, toplevel, '*.coffee')):
x = x.rpartition('.')[0] + '.js'
x = self.j(dest, os.path.basename(x))
if os.path.exists(x): if os.path.exists(x):
os.remove(x) os.remove(x)
# }}} # }}}

View File

@ -4,7 +4,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
__appname__ = u'calibre' __appname__ = u'calibre'
numeric_version = (0, 8, 50) numeric_version = (0, 8, 51)
__version__ = u'.'.join(map(unicode, numeric_version)) __version__ = u'.'.join(map(unicode, numeric_version))
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>" __author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"

View File

@ -302,7 +302,9 @@ class OutputFormatPlugin(Plugin):
:param item: The item (HTML file) being processed :param item: The item (HTML file) being processed
:param stylizer: A Stylizer object containing the flattened styles for :param stylizer: A Stylizer object containing the flattened styles for
item. You can get the style for any element by stylizer.style(element). item. You can get the style for any element by
stylizer.style(element).
''' '''
pass pass

View File

@ -57,6 +57,7 @@ class ANDROID(USBMS):
0x4316 : [0x216], 0x4316 : [0x216],
0x42d6 : [0x216], 0x42d6 : [0x216],
0x42d7 : [0x216], 0x42d7 : [0x216],
0x42f7 : [0x216],
}, },
# Freescale # Freescale
0x15a2 : { 0x15a2 : {
@ -193,7 +194,7 @@ class ANDROID(USBMS):
'GT-I9003_CARD', 'XT912', 'FILE-CD_GADGET', 'RK29_SDK', 'MB855', 'GT-I9003_CARD', 'XT912', 'FILE-CD_GADGET', 'RK29_SDK', 'MB855',
'XT910', 'BOOK_A10', 'USB_2.0_DRIVER', 'I9100T', 'P999DW', 'XT910', 'BOOK_A10', 'USB_2.0_DRIVER', 'I9100T', 'P999DW',
'KTABLET_PC', 'INGENIC', 'GT-I9001_CARD', 'USB_2.0_DRIVER', 'KTABLET_PC', 'INGENIC', 'GT-I9001_CARD', 'USB_2.0_DRIVER',
'GT-S5830L_CARD', 'UNIVERSE'] 'GT-S5830L_CARD', 'UNIVERSE', 'XT875']
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897', WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD', 'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD', 'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
@ -201,7 +202,8 @@ class ANDROID(USBMS):
'ANDROID_MID', 'P990_SD_CARD', '.K080', 'LTE_CARD', 'MB853', 'ANDROID_MID', 'P990_SD_CARD', '.K080', 'LTE_CARD', 'MB853',
'A1-07___C0541A4F', 'XT912', 'MB855', 'XT910', 'BOOK_A10_CARD', 'A1-07___C0541A4F', 'XT912', 'MB855', 'XT910', 'BOOK_A10_CARD',
'USB_2.0_DRIVER', 'I9100T', 'P999DW_SD_CARD', 'KTABLET_PC', 'USB_2.0_DRIVER', 'I9100T', 'P999DW_SD_CARD', 'KTABLET_PC',
'FILE-CD_GADGET', 'GT-I9001_CARD', 'USB_2.0_DRIVER'] 'FILE-CD_GADGET', 'GT-I9001_CARD', 'USB_2.0_DRIVER', 'XT875',
'UMS_COMPOSITE']
OSX_MAIN_MEM = 'Android Device Main Memory' OSX_MAIN_MEM = 'Android Device Main Memory'

View File

@ -92,6 +92,10 @@ class POCKETBOOK360(EB600):
name = 'PocketBook 360 Device Interface' name = 'PocketBook 360 Device Interface'
gui_name = 'PocketBook 360' gui_name = 'PocketBook 360'
VENDOR_ID = [0x1f85, 0x525]
PRODUCT_ID = [0x1688, 0xa4a5]
BCD = [0x110]
FORMATS = ['epub', 'fb2', 'prc', 'mobi', 'pdf', 'djvu', 'rtf', 'chm', 'txt'] FORMATS = ['epub', 'fb2', 'prc', 'mobi', 'pdf', 'djvu', 'rtf', 'chm', 'txt']

View File

@ -1,4 +1,25 @@
from __future__ import with_statement # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
__license__ = 'GPL 3' from __future__ import (unicode_literals, division, absolute_import,
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>' print_function)
__license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
class ConversionUserFeedBack(Exception):
def __init__(self, title, msg, level='info', det_msg=''):
''' Show a simple message to the user
:param title: The title (very short description)
:param msg: The message to show the user
:param level: Must be one of 'info', 'warn' or 'error'
:param det_msg: Optional detailed message to show the user
'''
import json
Exception.__init__(self, json.dumps({'msg':msg, 'level':level,
'det_msg':det_msg, 'title':title}))
self.title, self.msg, self.det_msg = title, msg, det_msg
self.level = level

View File

@ -15,6 +15,7 @@ from calibre.utils.logging import Log
from calibre.constants import preferred_encoding from calibre.constants import preferred_encoding
from calibre.customize.conversion import OptionRecommendation from calibre.customize.conversion import OptionRecommendation
from calibre import patheq from calibre import patheq
from calibre.ebooks.conversion import ConversionUserFeedBack
USAGE = '%prog ' + _('''\ USAGE = '%prog ' + _('''\
input_file output_file [options] input_file output_file [options]
@ -304,7 +305,10 @@ def read_sr_patterns(path, log=None):
def main(args=sys.argv): def main(args=sys.argv):
log = Log() log = Log()
parser, plumber = create_option_parser(args, log) parser, plumber = create_option_parser(args, log)
opts = parser.parse_args(args)[0] opts, leftover_args = parser.parse_args(args)
if len(leftover_args) > 3:
log.error('Extra arguments not understood:', u', '.join(leftover_args[3:]))
return 1
for x in ('read_metadata_from_opf', 'cover'): for x in ('read_metadata_from_opf', 'cover'):
if getattr(opts, x, None) is not None: if getattr(opts, x, None) is not None:
setattr(opts, x, abspath(getattr(opts, x))) setattr(opts, x, abspath(getattr(opts, x)))
@ -317,7 +321,16 @@ def main(args=sys.argv):
if n.dest] if n.dest]
plumber.merge_ui_recommendations(recommendations) plumber.merge_ui_recommendations(recommendations)
try:
plumber.run() plumber.run()
except ConversionUserFeedBack as e:
ll = {'info': log.info, 'warn': log.warn,
'error':log.error}.get(e.level, log.info)
ll(e.title)
if e.det_msg:
log.debug(e.detmsg)
ll(e.msg)
raise SystemExit(1)
log(_('Output saved to'), ' ', plumber.output) log(_('Output saved to'), ' ', plumber.output)

View File

@ -207,7 +207,7 @@ class EPUBInput(InputFormatPlugin):
if rc: if rc:
cover_toc_item = None cover_toc_item = None
for item in oeb.toc.iterdescendants(): for item in oeb.toc.iterdescendants():
if item.href == rc: if item.href and item.href.partition('#')[0] == rc:
cover_toc_item = item cover_toc_item = item
break break
spine = {x.href for x in oeb.spine} spine = {x.href for x in oeb.spine}

View File

@ -393,8 +393,14 @@ class EPUBOutput(OutputFormatPlugin):
for tag in XPath('//h:body/descendant::h:script')(root): for tag in XPath('//h:body/descendant::h:script')(root):
tag.getparent().remove(tag) tag.getparent().remove(tag)
formchildren = XPath('./h:input|./h:button|./h:textarea|'
'./h:label|./h:fieldset|./h:legend')
for tag in XPath('//h:form')(root): for tag in XPath('//h:form')(root):
if formchildren(tag):
tag.getparent().remove(tag) tag.getparent().remove(tag)
else:
# Not a real form
tag.tag = XHTML('div')
for tag in XPath('//h:center')(root): for tag in XPath('//h:center')(root):
tag.tag = XHTML('div') tag.tag = XHTML('div')

View File

@ -12,7 +12,7 @@ class MOBIInput(InputFormatPlugin):
name = 'MOBI Input' name = 'MOBI Input'
author = 'Kovid Goyal' author = 'Kovid Goyal'
description = 'Convert MOBI files (.mobi, .prc, .azw) to HTML' description = 'Convert MOBI files (.mobi, .prc, .azw) to HTML'
file_types = set(['mobi', 'prc', 'azw', 'azw3']) file_types = set(['mobi', 'prc', 'azw', 'azw3', 'pobi'])
def convert(self, stream, options, file_ext, log, def convert(self, stream, options, file_ext, log,
accelerators): accelerators):

View File

@ -343,21 +343,25 @@ OptionRecommendation(name='remove_fake_margins',
OptionRecommendation(name='margin_top', OptionRecommendation(name='margin_top',
recommended_value=5.0, level=OptionRecommendation.LOW, recommended_value=5.0, level=OptionRecommendation.LOW,
help=_('Set the top margin in pts. Default is %default. ' help=_('Set the top margin in pts. Default is %default. '
'Setting this to less than zero will cause no margin to be set. '
'Note: 72 pts equals 1 inch')), 'Note: 72 pts equals 1 inch')),
OptionRecommendation(name='margin_bottom', OptionRecommendation(name='margin_bottom',
recommended_value=5.0, level=OptionRecommendation.LOW, recommended_value=5.0, level=OptionRecommendation.LOW,
help=_('Set the bottom margin in pts. Default is %default. ' help=_('Set the bottom margin in pts. Default is %default. '
'Setting this to less than zero will cause no margin to be set. '
'Note: 72 pts equals 1 inch')), 'Note: 72 pts equals 1 inch')),
OptionRecommendation(name='margin_left', OptionRecommendation(name='margin_left',
recommended_value=5.0, level=OptionRecommendation.LOW, recommended_value=5.0, level=OptionRecommendation.LOW,
help=_('Set the left margin in pts. Default is %default. ' help=_('Set the left margin in pts. Default is %default. '
'Setting this to less than zero will cause no margin to be set. '
'Note: 72 pts equals 1 inch')), 'Note: 72 pts equals 1 inch')),
OptionRecommendation(name='margin_right', OptionRecommendation(name='margin_right',
recommended_value=5.0, level=OptionRecommendation.LOW, recommended_value=5.0, level=OptionRecommendation.LOW,
help=_('Set the right margin in pts. Default is %default. ' help=_('Set the right margin in pts. Default is %default. '
'Setting this to less than zero will cause no margin to be set. '
'Note: 72 pts equals 1 inch')), 'Note: 72 pts equals 1 inch')),
OptionRecommendation(name='change_justification', OptionRecommendation(name='change_justification',
@ -885,7 +889,10 @@ OptionRecommendation(name='search_replace',
self.log.debug('Resolved conversion options') self.log.debug('Resolved conversion options')
try: try:
self.log.debug('calibre version:', __version__) self.log.debug('calibre version:', __version__)
self.log.debug(pprint.pformat(self.opts.__dict__)) odict = dict(self.opts.__dict__)
for x in ('username', 'password'):
odict.pop(x, None)
self.log.debug(pprint.pformat(odict))
except: except:
self.log.exception('Failed to get resolved conversion options') self.log.exception('Failed to get resolved conversion options')

View File

@ -5,7 +5,7 @@ __license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import re, error as re_error import re
from math import ceil from math import ceil
from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator
from calibre.utils.logging import default_log from calibre.utils.logging import default_log
@ -184,7 +184,7 @@ class HeuristicProcessor(object):
except OverflowError: except OverflowError:
# match.group(0) was too large to be compiled into a regex # match.group(0) was too large to be compiled into a regex
continue continue
except re_error: except re.error:
# the match was not a valid regular expression # the match was not a valid regular expression
continue continue

View File

@ -113,6 +113,11 @@ class HTMLFile(object):
raise IOError(msg) raise IOError(msg)
raise IgnoreFile(msg, err.errno) raise IgnoreFile(msg, err.errno)
if not src:
if level == 0:
raise ValueError('The file %s is empty'%self.path)
self.is_binary = True
if not self.is_binary: if not self.is_binary:
if not encoding: if not encoding:
encoding = detect_xml_encoding(src[:4096], verbose=verbose)[1] encoding = detect_xml_encoding(src[:4096], verbose=verbose)[1]

View File

@ -18,7 +18,7 @@ from calibre.ebooks.metadata import check_isbn
from calibre.ebooks.metadata.sources.base import (Source, Option, fixcase, from calibre.ebooks.metadata.sources.base import (Source, Option, fixcase,
fixauthors) fixauthors)
from calibre.ebooks.metadata.book.base import Metadata from calibre.ebooks.metadata.book.base import Metadata
from calibre.utils.date import parse_date from calibre.utils.date import parse_only_date
from calibre.utils.localization import canonicalize_lang from calibre.utils.localization import canonicalize_lang
class Worker(Thread): # Get details {{{ class Worker(Thread): # Get details {{{
@ -471,7 +471,7 @@ class Worker(Thread): # Get details {{{
ans = x.tail ans = x.tail
date = ans.rpartition('(')[-1].replace(')', '').strip() date = ans.rpartition('(')[-1].replace(')', '').strip()
date = self.delocalize_datestr(date) date = self.delocalize_datestr(date)
return parse_date(date, assume_utc=True) return parse_only_date(date, assume_utc=True)
def parse_language(self, pd): def parse_language(self, pd):
for x in reversed(pd.xpath(self.language_xpath)): for x in reversed(pd.xpath(self.language_xpath)):

View File

@ -306,6 +306,11 @@ class MOBIHeader(object): # {{{
self.extra_data_flags = 0 self.extra_data_flags = 0
if self.has_extra_data_flags: if self.has_extra_data_flags:
self.unknown4 = self.raw[184:192] self.unknown4 = self.raw[184:192]
if self.file_version < 8:
self.first_text_record, self.last_text_record = \
struct.unpack_from(b'>HH', self.raw, 192)
self.fdst_count = struct.unpack_from(b'>L', self.raw, 196)
else:
self.fdst_idx, self.fdst_count = struct.unpack_from(b'>LL', self.fdst_idx, self.fdst_count = struct.unpack_from(b'>LL',
self.raw, 192) self.raw, 192)
if self.fdst_count <= 1: if self.fdst_count <= 1:
@ -409,6 +414,10 @@ class MOBIHeader(object): # {{{
a('DRM Flags: %r'%self.drm_flags) a('DRM Flags: %r'%self.drm_flags)
if self.has_extra_data_flags: if self.has_extra_data_flags:
a('Unknown4: %r'%self.unknown4) a('Unknown4: %r'%self.unknown4)
if hasattr(self, 'first_text_record'):
a('First content record: %d'%self.first_text_record)
a('Last content record: %d'%self.last_text_record)
else:
r('FDST Index', 'fdst_idx') r('FDST Index', 'fdst_idx')
a('FDST Count: %d'% self.fdst_count) a('FDST Count: %d'% self.fdst_count)
r('FCIS number', 'fcis_number') r('FCIS number', 'fcis_number')

View File

@ -111,7 +111,11 @@ def update_flow_links(mobi8_reader, resource_map, log):
continue continue
if not isinstance(flow, unicode): if not isinstance(flow, unicode):
try:
flow = flow.decode(mr.header.codec) flow = flow.decode(mr.header.codec)
except UnicodeDecodeError:
log.error('Flow part has invalid %s encoded bytes'%mr.header.codec)
flow = flow.decode(mr.header.codec, 'replace')
# links to raster image files from image tags # links to raster image files from image tags
# image_pattern # image_pattern

View File

@ -207,9 +207,9 @@ class Mobi8Reader(object):
fname = 'svgimg' + nstr + '.svg' fname = 'svgimg' + nstr + '.svg'
else: else:
# search for CDATA and if exists inline it # search for CDATA and if exists inline it
if flowpart.find('[CDATA[') >= 0: if flowpart.find(b'[CDATA[') >= 0:
typ = 'css' typ = 'css'
flowpart = '<style type="text/css">\n' + flowpart + '\n</style>\n' flowpart = b'<style type="text/css">\n' + flowpart + b'\n</style>\n'
format = 'inline' format = 'inline'
dir = None dir = None
fname = None fname = None

View File

@ -382,6 +382,7 @@ class MobiWriter(object):
first_image_record = len(self.records) first_image_record = len(self.records)
self.resources.serialize(self.records, used_images) self.resources.serialize(self.records, used_images)
resource_record_count = len(self.records) - old resource_record_count = len(self.records) - old
last_content_record = len(self.records) - 1
# FCIS/FLIS (Seems to serve no purpose) # FCIS/FLIS (Seems to serve no purpose)
flis_number = len(self.records) flis_number = len(self.records)
@ -406,7 +407,7 @@ class MobiWriter(object):
# header # header
header_fields['first_resource_record'] = first_image_record header_fields['first_resource_record'] = first_image_record
header_fields['exth_flags'] = 0b100001010000 # Kinglegen uses this header_fields['exth_flags'] = 0b100001010000 # Kinglegen uses this
header_fields['fdst_record'] = NULL_INDEX header_fields['fdst_record'] = pack(b'>HH', 1, last_content_record)
header_fields['fdst_count'] = 1 # Why not 0? Kindlegen uses 1 header_fields['fdst_count'] = 1 # Why not 0? Kindlegen uses 1
header_fields['flis_record'] = flis_number header_fields['flis_record'] = flis_number
header_fields['fcis_record'] = fcis_number header_fields['fcis_record'] = fcis_number

View File

@ -314,9 +314,9 @@ class KF8Writer(object):
return return
# Flatten the ToC into a depth first list # Flatten the ToC into a depth first list
fl = toc.iter() if is_periodical else toc.iterdescendants() fl = toc.iterdescendants()
for i, item in enumerate(fl): for i, item in enumerate(fl):
entry = {'id': id(item), 'index': i, 'href':item.href, entry = {'id': id(item), 'index': i, 'href':item.href or '',
'label':(item.title or _('Unknown')), 'label':(item.title or _('Unknown')),
'children':[]} 'children':[]}
entry['depth'] = getattr(item, 'ncx_hlvl', 0) entry['depth'] = getattr(item, 'ncx_hlvl', 0)

View File

@ -138,6 +138,8 @@ class MOBIHeader(Header): # {{{
unknown2 = zeroes(8) unknown2 = zeroes(8)
# 192: FDST # 192: FDST
# In MOBI 6 the fdst record is instead two two byte fields storing the
# index of the first and last content records
fdst_record = DYN fdst_record = DYN
fdst_count = DYN fdst_count = DYN

View File

@ -966,7 +966,7 @@ class Manifest(object):
data = data.cssText data = data.cssText
if isinstance(data, unicode): if isinstance(data, unicode):
data = data.encode('utf-8') data = data.encode('utf-8')
return data return data + b'\n'
return str(data) return str(data)
def __unicode__(self): def __unicode__(self):

View File

@ -389,8 +389,17 @@ class CanonicalFragmentIdentifier
# Drill down into iframes, etc. # Drill down into iframes, etc.
while true while true
target = cdoc.elementFromPoint x, y target = cdoc.elementFromPoint x, y
if not target or target.localName == 'html' if not target or target.localName in ['html', 'body']
log("No element at (#{ x }, #{ y })") # We ignore both html and body even though body could
# have text nodes under it as performance is very poor if body
# has large margins/padding (for e.g. in fullscreen mode)
# A possible solution for this is to wrap all text node
# children of body in <span> but that is seriously ugly and
# might have side effects. Lets do this only if there are lots of
# books in the wild that actually have text children of body,
# and even in this case it might be better to change the input
# plugin to prevent this from happening.
# log("No element at (#{ x }, #{ y })")
return null return null
name = target.localName name = target.localName

View File

@ -0,0 +1,76 @@
#!/usr/bin/env coffee
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
###
Copyright 2012, Kovid Goyal <kovid@kovidgoyal.net>
Released under the GPLv3 License
###
body_height = () ->
db = document.body
dde = document.documentElement
if db? and dde?
return Math.max(db.scrollHeight, dde.scrollHeight, db.offsetHeight,
dde.offsetHeight, db.clientHeight, dde.clientHeight)
return 0
abstop = (elem) ->
ans = elem.offsetTop
while elem.offsetParent
elem = elem.offsetParent
ans += elem.offsetTop
return ans
class BookIndexing
###
This class is a namespace to expose indexing functions via the
window.book_indexing object. The most important functions are:
anchor_positions(): Get the absolute (document co-ordinate system) position
for elements with the specified id/name attributes.
###
constructor: () ->
this.cache = {}
this.body_height_at_last_check = null
cache_valid: (anchors) ->
for a in anchors
if not Object.prototype.hasOwnProperty.call(this.cache, a)
return false
for p of this.cache
if Object.prototype.hasOwnProperty.call(this.cache, p) and p not in anchors
return false
return true
anchor_positions: (anchors, use_cache=false) ->
if use_cache and body_height() == this.body_height_at_last_check and this.cache_valid(anchors)
return this.cache
ans = {}
for anchor in anchors
elem = document.getElementById(anchor)
if elem == null
# Look for an <a name="anchor"> element
try
result = document.evaluate(
".//*[local-name() = 'a' and @name='#{ anchor }']",
document.body, null,
XPathResult.FIRST_ORDERED_NODE_TYPE, null)
elem = result.singleNodeValue
catch error
# The anchor had a ' or other invalid char
elem = null
if elem == null
pos = body_height() + 10000
else
pos = abstop(elem)
ans[anchor] = pos
this.cache = ans
this.body_height_at_last_check = body_height()
return ans
if window?
window.book_indexing = new BookIndexing()

View File

@ -1,383 +0,0 @@
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008 Kovid Goyal <kovid at kovidgoyal.net>'
'''
Iterate over the HTML files in an ebook. Useful for writing viewers.
'''
import re, os, math
from cStringIO import StringIO
from PyQt4.Qt import QFontDatabase
from calibre.customize.ui import available_input_formats
from calibre.ebooks.metadata.opf2 import OPF
from calibre.ptempfile import TemporaryDirectory
from calibre.ebooks.chardet import xml_to_unicode
from calibre.utils.zipfile import safe_replace
from calibre.utils.config import DynamicConfig
from calibre.utils.logging import Log
from calibre import (guess_type, prints, prepare_string_for_xml,
xml_replace_entities)
from calibre.ebooks.oeb.transforms.cover import CoverManager
from calibre.constants import filesystem_encoding
TITLEPAGE = CoverManager.SVG_TEMPLATE.decode('utf-8').replace(\
'__ar__', 'none').replace('__viewbox__', '0 0 600 800'
).replace('__width__', '600').replace('__height__', '800')
BM_FIELD_SEP = u'*|!|?|*'
BM_LEGACY_ESC = u'esc-text-%&*#%(){}ads19-end-esc'
def character_count(html):
'''
Return the number of "significant" text characters in a HTML string.
'''
count = 0
strip_space = re.compile(r'\s+')
for match in re.finditer(r'>[^<]+<', html):
count += len(strip_space.sub(' ', match.group()))-2
return count
class UnsupportedFormatError(Exception):
def __init__(self, fmt):
Exception.__init__(self, _('%s format books are not supported')%fmt.upper())
class SpineItem(unicode):
def __new__(cls, path, mime_type=None):
ppath = path.partition('#')[0]
if not os.path.exists(path) and os.path.exists(ppath):
path = ppath
obj = super(SpineItem, cls).__new__(cls, path)
raw = open(path, 'rb').read()
raw, obj.encoding = xml_to_unicode(raw)
obj.character_count = character_count(raw)
obj.start_page = -1
obj.pages = -1
obj.max_page = -1
if mime_type is None:
mime_type = guess_type(obj)[0]
obj.mime_type = mime_type
return obj
class FakeOpts(object):
verbose = 0
breadth_first = False
max_levels = 5
input_encoding = None
def is_supported(path):
ext = os.path.splitext(path)[1].replace('.', '').lower()
ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext)
return ext in available_input_formats()
def write_oebbook(oeb, path):
from calibre.ebooks.oeb.writer import OEBWriter
from calibre import walk
w = OEBWriter()
w(oeb, path)
for f in walk(path):
if f.endswith('.opf'):
return f
class EbookIterator(object):
CHARACTERS_PER_PAGE = 1000
def __init__(self, pathtoebook, log=None):
self.log = log
if log is None:
self.log = Log()
pathtoebook = pathtoebook.strip()
self.pathtoebook = os.path.abspath(pathtoebook)
self.config = DynamicConfig(name='iterator')
ext = os.path.splitext(pathtoebook)[1].replace('.', '').lower()
ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext)
self.ebook_ext = ext.replace('original_', '')
def search(self, text, index, backwards=False):
text = prepare_string_for_xml(text.lower())
pmap = [(i, path) for i, path in enumerate(self.spine)]
if backwards:
pmap.reverse()
for i, path in pmap:
if (backwards and i < index) or (not backwards and i > index):
with open(path, 'rb') as f:
raw = f.read().decode(path.encoding)
try:
raw = xml_replace_entities(raw)
except:
pass
if text in raw.lower():
return i
def find_missing_css_files(self):
for x in os.walk(os.path.dirname(self.pathtoopf)):
for f in x[-1]:
if f.endswith('.css'):
yield os.path.join(x[0], f)
def find_declared_css_files(self):
for item in self.opf.manifest:
if item.mime_type and 'css' in item.mime_type.lower():
yield item.path
def find_embedded_fonts(self):
'''
This will become unnecessary once Qt WebKit supports the @font-face rule.
'''
css_files = set(self.find_declared_css_files())
if not css_files:
css_files = set(self.find_missing_css_files())
bad_map = {}
font_family_pat = re.compile(r'font-family\s*:\s*([^;]+)')
for csspath in css_files:
try:
css = open(csspath, 'rb').read().decode('utf-8', 'replace')
except:
continue
for match in re.compile(r'@font-face\s*{([^}]+)}').finditer(css):
block = match.group(1)
family = font_family_pat.search(block)
url = re.compile(r'url\s*\([\'"]*(.+?)[\'"]*\)', re.DOTALL).search(block)
if url:
path = url.group(1).split('/')
path = os.path.join(os.path.dirname(csspath), *path)
if not os.access(path, os.R_OK):
continue
id = QFontDatabase.addApplicationFont(path)
if id != -1:
families = [unicode(f) for f in QFontDatabase.applicationFontFamilies(id)]
if family:
family = family.group(1)
specified_families = [x.strip().replace('"',
'').replace("'", '') for x in family.split(',')]
aliasing_ok = False
for f in specified_families:
bad_map[f] = families[0]
if not aliasing_ok and f in families:
aliasing_ok = True
if not aliasing_ok:
prints('WARNING: Family aliasing not fully supported.')
prints('\tDeclared family: %r not in actual families: %r'
% (family, families))
else:
prints('Loaded embedded font:', repr(family))
if bad_map:
def prepend_embedded_font(match):
for bad, good in bad_map.items():
if bad in match.group(1):
prints('Substituting font family: %s -> %s'%(bad, good))
return match.group().replace(bad, '"%s"'%good)
from calibre.ebooks.chardet import force_encoding
for csspath in css_files:
with open(csspath, 'r+b') as f:
css = f.read()
enc = force_encoding(css, False)
css = css.decode(enc, 'replace')
ncss = font_family_pat.sub(prepend_embedded_font, css)
if ncss != css:
f.seek(0)
f.truncate()
f.write(ncss.encode(enc))
def __enter__(self, processed=False, only_input_plugin=False):
self.delete_on_exit = []
self._tdir = TemporaryDirectory('_ebook_iter')
self.base = self._tdir.__enter__()
if not isinstance(self.base, unicode):
self.base = self.base.decode(filesystem_encoding)
from calibre.ebooks.conversion.plumber import Plumber, create_oebbook
plumber = Plumber(self.pathtoebook, self.base, self.log)
plumber.setup_options()
if self.pathtoebook.lower().endswith('.opf'):
plumber.opts.dont_package = True
if hasattr(plumber.opts, 'no_process'):
plumber.opts.no_process = True
plumber.input_plugin.for_viewer = True
with plumber.input_plugin:
self.pathtoopf = plumber.input_plugin(open(plumber.input, 'rb'),
plumber.opts, plumber.input_fmt, self.log,
{}, self.base)
if not only_input_plugin:
if processed or plumber.input_fmt.lower() in ('pdb', 'pdf', 'rb') and \
not hasattr(self.pathtoopf, 'manifest'):
if hasattr(self.pathtoopf, 'manifest'):
self.pathtoopf = write_oebbook(self.pathtoopf, self.base)
self.pathtoopf = create_oebbook(self.log, self.pathtoopf,
plumber.opts)
if hasattr(self.pathtoopf, 'manifest'):
self.pathtoopf = write_oebbook(self.pathtoopf, self.base)
self.book_format = os.path.splitext(self.pathtoebook)[1][1:].upper()
if getattr(plumber.input_plugin, 'is_kf8', False):
self.book_format = 'KF8'
self.opf = getattr(plumber.input_plugin, 'optimize_opf_parsing', None)
if self.opf is None:
self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf))
self.language = self.opf.language
if self.language:
self.language = self.language.lower()
ordered = [i for i in self.opf.spine if i.is_linear] + \
[i for i in self.opf.spine if not i.is_linear]
self.spine = []
for i in ordered:
spath = i.path
mt = None
if i.idref is not None:
mt = self.opf.manifest.type_for_id(i.idref)
if mt is None:
mt = guess_type(spath)[0]
try:
self.spine.append(SpineItem(spath, mime_type=mt))
except:
self.log.warn('Missing spine item:', repr(spath))
cover = self.opf.cover
if self.ebook_ext in ('lit', 'mobi', 'prc', 'opf', 'fb2') and cover:
cfile = os.path.join(self.base, 'calibre_iterator_cover.html')
rcpath = os.path.relpath(cover, self.base).replace(os.sep, '/')
chtml = (TITLEPAGE%prepare_string_for_xml(rcpath, True)).encode('utf-8')
open(cfile, 'wb').write(chtml)
self.spine[0:0] = [SpineItem(cfile,
mime_type='application/xhtml+xml')]
self.delete_on_exit.append(cfile)
if self.opf.path_to_html_toc is not None and \
self.opf.path_to_html_toc not in self.spine:
try:
self.spine.append(SpineItem(self.opf.path_to_html_toc))
except:
import traceback
traceback.print_exc()
sizes = [i.character_count for i in self.spine]
self.pages = [math.ceil(i/float(self.CHARACTERS_PER_PAGE)) for i in sizes]
for p, s in zip(self.pages, self.spine):
s.pages = p
start = 1
for s in self.spine:
s.start_page = start
start += s.pages
s.max_page = s.start_page + s.pages - 1
self.toc = self.opf.toc
self.read_bookmarks()
return self
def parse_bookmarks(self, raw):
for line in raw.splitlines():
bm = None
if line.count('^') > 0:
tokens = line.rpartition('^')
title, ref = tokens[0], tokens[2]
try:
spine, _, pos = ref.partition('#')
spine = int(spine.strip())
except:
continue
bm = {'type':'legacy', 'title':title, 'spine':spine, 'pos':pos}
elif BM_FIELD_SEP in line:
try:
title, spine, pos = line.strip().split(BM_FIELD_SEP)
spine = int(spine)
except:
continue
# Unescape from serialization
pos = pos.replace(BM_LEGACY_ESC, u'^')
# Check for pos being a scroll fraction
try:
pos = float(pos)
except:
pass
bm = {'type':'cfi', 'title':title, 'pos':pos, 'spine':spine}
if bm:
self.bookmarks.append(bm)
def serialize_bookmarks(self, bookmarks):
dat = []
for bm in bookmarks:
if bm['type'] == 'legacy':
rec = u'%s^%d#%s'%(bm['title'], bm['spine'], bm['pos'])
else:
pos = bm['pos']
if isinstance(pos, (int, float)):
pos = unicode(pos)
else:
pos = pos.replace(u'^', BM_LEGACY_ESC)
rec = BM_FIELD_SEP.join([bm['title'], unicode(bm['spine']), pos])
dat.append(rec)
return (u'\n'.join(dat) +u'\n')
def read_bookmarks(self):
self.bookmarks = []
bmfile = os.path.join(self.base, 'META-INF', 'calibre_bookmarks.txt')
raw = ''
if os.path.exists(bmfile):
with open(bmfile, 'rb') as f:
raw = f.read()
else:
saved = self.config['bookmarks_'+self.pathtoebook]
if saved:
raw = saved
if not isinstance(raw, unicode):
raw = raw.decode('utf-8')
self.parse_bookmarks(raw)
def save_bookmarks(self, bookmarks=None):
if bookmarks is None:
bookmarks = self.bookmarks
dat = self.serialize_bookmarks(bookmarks)
if os.path.splitext(self.pathtoebook)[1].lower() == '.epub' and \
os.access(self.pathtoebook, os.R_OK):
try:
zf = open(self.pathtoebook, 'r+b')
except IOError:
return
safe_replace(zf, 'META-INF/calibre_bookmarks.txt',
StringIO(dat.encode('utf-8')),
add_missing=True)
else:
self.config['bookmarks_'+self.pathtoebook] = dat
def add_bookmark(self, bm):
self.bookmarks = [x for x in self.bookmarks if x['title'] !=
bm['title']]
self.bookmarks.append(bm)
self.save_bookmarks()
def set_bookmarks(self, bookmarks):
self.bookmarks = bookmarks
def __exit__(self, *args):
self._tdir.__exit__(*args)
for x in self.delete_on_exit:
if os.path.exists(x):
os.remove(x)
def get_preprocess_html(path_to_ebook, output):
from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
iterator = EbookIterator(path_to_ebook)
iterator.__enter__(only_input_plugin=True)
preprocessor = HTMLPreProcessor(None, False)
with open(output, 'wb') as out:
for path in iterator.spine:
with open(path, 'rb') as f:
html = f.read().decode('utf-8', 'replace')
html = preprocessor(html, get_preprocess_html=True)
out.write(html.encode('utf-8'))
out.write(b'\n\n' + b'-'*80 + b'\n\n')

View File

@ -0,0 +1,42 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os, re
from calibre.customize.ui import available_input_formats
def is_supported(path):
ext = os.path.splitext(path)[1].replace('.', '').lower()
ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext)
return ext in available_input_formats()
class UnsupportedFormatError(Exception):
def __init__(self, fmt):
Exception.__init__(self, _('%s format books are not supported')%fmt.upper())
def EbookIterator(*args, **kwargs):
'For backwards compatibility'
from calibre.ebooks.oeb.iterator.book import EbookIterator
return EbookIterator(*args, **kwargs)
def get_preprocess_html(path_to_ebook, output):
from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
iterator = EbookIterator(path_to_ebook)
iterator.__enter__(only_input_plugin=True, run_char_count=False,
read_anchor_map=False)
preprocessor = HTMLPreProcessor(None, False)
with open(output, 'wb') as out:
for path in iterator.spine:
with open(path, 'rb') as f:
html = f.read().decode('utf-8', 'replace')
html = preprocessor(html, get_preprocess_html=True)
out.write(html.encode('utf-8'))
out.write(b'\n\n' + b'-'*80 + b'\n\n')

View File

@ -0,0 +1,187 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
'''
Iterate over the HTML files in an ebook. Useful for writing viewers.
'''
import re, os, math
from functools import partial
from calibre.ebooks.metadata.opf2 import OPF
from calibre.ptempfile import TemporaryDirectory
from calibre.utils.config import DynamicConfig
from calibre.utils.logging import default_log
from calibre import (guess_type, prepare_string_for_xml,
xml_replace_entities)
from calibre.ebooks.oeb.transforms.cover import CoverManager
from calibre.ebooks.oeb.iterator.spine import (SpineItem, create_indexing_data)
from calibre.ebooks.oeb.iterator.bookmarks import BookmarksMixin
TITLEPAGE = CoverManager.SVG_TEMPLATE.decode('utf-8').replace(\
'__ar__', 'none').replace('__viewbox__', '0 0 600 800'
).replace('__width__', '600').replace('__height__', '800')
class FakeOpts(object):
verbose = 0
breadth_first = False
max_levels = 5
input_encoding = None
def write_oebbook(oeb, path):
from calibre.ebooks.oeb.writer import OEBWriter
from calibre import walk
w = OEBWriter()
w(oeb, path)
for f in walk(path):
if f.endswith('.opf'):
return f
class EbookIterator(BookmarksMixin):
CHARACTERS_PER_PAGE = 1000
def __init__(self, pathtoebook, log=None):
self.log = log or default_log
pathtoebook = pathtoebook.strip()
self.pathtoebook = os.path.abspath(pathtoebook)
self.config = DynamicConfig(name='iterator')
ext = os.path.splitext(pathtoebook)[1].replace('.', '').lower()
ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext)
self.ebook_ext = ext.replace('original_', '')
def search(self, text, index, backwards=False):
text = prepare_string_for_xml(text.lower())
pmap = [(i, path) for i, path in enumerate(self.spine)]
if backwards:
pmap.reverse()
for i, path in pmap:
if (backwards and i < index) or (not backwards and i > index):
with open(path, 'rb') as f:
raw = f.read().decode(path.encoding)
try:
raw = xml_replace_entities(raw)
except:
pass
if text in raw.lower():
return i
def __enter__(self, processed=False, only_input_plugin=False,
run_char_count=True, read_anchor_map=True):
''' Convert an ebook file into an exploded OEB book suitable for
display in viewers/preprocessing etc. '''
from calibre.ebooks.conversion.plumber import Plumber, create_oebbook
self.delete_on_exit = []
self._tdir = TemporaryDirectory('_ebook_iter')
self.base = self._tdir.__enter__()
plumber = Plumber(self.pathtoebook, self.base, self.log)
plumber.setup_options()
if self.pathtoebook.lower().endswith('.opf'):
plumber.opts.dont_package = True
if hasattr(plumber.opts, 'no_process'):
plumber.opts.no_process = True
plumber.input_plugin.for_viewer = True
with plumber.input_plugin, open(plumber.input, 'rb') as inf:
self.pathtoopf = plumber.input_plugin(inf,
plumber.opts, plumber.input_fmt, self.log,
{}, self.base)
if not only_input_plugin:
# Run the HTML preprocess/parsing from the conversion pipeline as
# well
if (processed or plumber.input_fmt.lower() in {'pdb', 'pdf', 'rb'}
and not hasattr(self.pathtoopf, 'manifest')):
if hasattr(self.pathtoopf, 'manifest'):
self.pathtoopf = write_oebbook(self.pathtoopf, self.base)
self.pathtoopf = create_oebbook(self.log, self.pathtoopf,
plumber.opts)
if hasattr(self.pathtoopf, 'manifest'):
self.pathtoopf = write_oebbook(self.pathtoopf, self.base)
self.book_format = os.path.splitext(self.pathtoebook)[1][1:].upper()
if getattr(plumber.input_plugin, 'is_kf8', False):
self.book_format = 'KF8'
self.opf = getattr(plumber.input_plugin, 'optimize_opf_parsing', None)
if self.opf is None:
self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf))
self.language = self.opf.language
if self.language:
self.language = self.language.lower()
ordered = [i for i in self.opf.spine if i.is_linear] + \
[i for i in self.opf.spine if not i.is_linear]
self.spine = []
Spiny = partial(SpineItem, read_anchor_map=read_anchor_map,
run_char_count=run_char_count)
for i in ordered:
spath = i.path
mt = None
if i.idref is not None:
mt = self.opf.manifest.type_for_id(i.idref)
if mt is None:
mt = guess_type(spath)[0]
try:
self.spine.append(Spiny(spath, mime_type=mt))
except:
self.log.warn('Missing spine item:', repr(spath))
cover = self.opf.cover
if cover and self.ebook_ext in {'lit', 'mobi', 'prc', 'opf', 'fb2',
'azw', 'azw3'}:
cfile = os.path.join(self.base, 'calibre_iterator_cover.html')
rcpath = os.path.relpath(cover, self.base).replace(os.sep, '/')
chtml = (TITLEPAGE%prepare_string_for_xml(rcpath, True)).encode('utf-8')
with open(cfile, 'wb') as f:
f.write(chtml)
self.spine[0:0] = [Spiny(cfile,
mime_type='application/xhtml+xml')]
self.delete_on_exit.append(cfile)
if self.opf.path_to_html_toc is not None and \
self.opf.path_to_html_toc not in self.spine:
try:
self.spine.append(Spiny(self.opf.path_to_html_toc))
except:
import traceback
traceback.print_exc()
sizes = [i.character_count for i in self.spine]
self.pages = [math.ceil(i/float(self.CHARACTERS_PER_PAGE)) for i in sizes]
for p, s in zip(self.pages, self.spine):
s.pages = p
start = 1
for s in self.spine:
s.start_page = start
start += s.pages
s.max_page = s.start_page + s.pages - 1
self.toc = self.opf.toc
if read_anchor_map:
create_indexing_data(self.spine, self.toc)
self.read_bookmarks()
return self
def __exit__(self, *args):
self._tdir.__exit__(*args)
for x in self.delete_on_exit:
try:
os.remove(x)
except:
pass

View File

@ -0,0 +1,105 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os
from io import BytesIO
from calibre.utils.zipfile import safe_replace
BM_FIELD_SEP = u'*|!|?|*'
BM_LEGACY_ESC = u'esc-text-%&*#%(){}ads19-end-esc'
class BookmarksMixin(object):
def parse_bookmarks(self, raw):
for line in raw.splitlines():
bm = None
if line.count('^') > 0:
tokens = line.rpartition('^')
title, ref = tokens[0], tokens[2]
try:
spine, _, pos = ref.partition('#')
spine = int(spine.strip())
except:
continue
bm = {'type':'legacy', 'title':title, 'spine':spine, 'pos':pos}
elif BM_FIELD_SEP in line:
try:
title, spine, pos = line.strip().split(BM_FIELD_SEP)
spine = int(spine)
except:
continue
# Unescape from serialization
pos = pos.replace(BM_LEGACY_ESC, u'^')
# Check for pos being a scroll fraction
try:
pos = float(pos)
except:
pass
bm = {'type':'cfi', 'title':title, 'pos':pos, 'spine':spine}
if bm:
self.bookmarks.append(bm)
def serialize_bookmarks(self, bookmarks):
dat = []
for bm in bookmarks:
if bm['type'] == 'legacy':
rec = u'%s^%d#%s'%(bm['title'], bm['spine'], bm['pos'])
else:
pos = bm['pos']
if isinstance(pos, (int, float)):
pos = unicode(pos)
else:
pos = pos.replace(u'^', BM_LEGACY_ESC)
rec = BM_FIELD_SEP.join([bm['title'], unicode(bm['spine']), pos])
dat.append(rec)
return (u'\n'.join(dat) +u'\n')
def read_bookmarks(self):
self.bookmarks = []
bmfile = os.path.join(self.base, 'META-INF', 'calibre_bookmarks.txt')
raw = ''
if os.path.exists(bmfile):
with open(bmfile, 'rb') as f:
raw = f.read()
else:
saved = self.config['bookmarks_'+self.pathtoebook]
if saved:
raw = saved
if not isinstance(raw, unicode):
raw = raw.decode('utf-8')
self.parse_bookmarks(raw)
def save_bookmarks(self, bookmarks=None):
if bookmarks is None:
bookmarks = self.bookmarks
dat = self.serialize_bookmarks(bookmarks)
if os.path.splitext(self.pathtoebook)[1].lower() == '.epub' and \
os.access(self.pathtoebook, os.R_OK):
try:
zf = open(self.pathtoebook, 'r+b')
except IOError:
return
safe_replace(zf, 'META-INF/calibre_bookmarks.txt',
BytesIO(dat.encode('utf-8')),
add_missing=True)
else:
self.config['bookmarks_'+self.pathtoebook] = dat
def add_bookmark(self, bm):
self.bookmarks = [x for x in self.bookmarks if x['title'] !=
bm['title']]
self.bookmarks.append(bm)
self.save_bookmarks()
def set_bookmarks(self, bookmarks):
self.bookmarks = bookmarks

View File

@ -0,0 +1,120 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
from future_builtins import map
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import re, os
from functools import partial
from operator import attrgetter
from collections import namedtuple
from calibre import guess_type
from calibre.ebooks.chardet import xml_to_unicode
def character_count(html):
''' Return the number of "significant" text characters in a HTML string. '''
count = 0
strip_space = re.compile(r'\s+')
for match in re.finditer(r'>[^<]+<', html):
count += len(strip_space.sub(' ', match.group()))-2
return count
def anchor_map(html):
''' Return map of all anchor names to their offsets in the html '''
ans = {}
for match in re.finditer(
r'''(?:id|name)\s*=\s*['"]([^'"]+)['"]''', html):
anchor = match.group(0)
ans[anchor] = ans.get(anchor, match.start())
return ans
class SpineItem(unicode):
def __new__(cls, path, mime_type=None, read_anchor_map=True,
run_char_count=True):
ppath = path.partition('#')[0]
if not os.path.exists(path) and os.path.exists(ppath):
path = ppath
obj = super(SpineItem, cls).__new__(cls, path)
with open(path, 'rb') as f:
raw = f.read()
raw, obj.encoding = xml_to_unicode(raw)
obj.character_count = character_count(raw) if run_char_count else 10000
obj.anchor_map = anchor_map(raw) if read_anchor_map else {}
obj.start_page = -1
obj.pages = -1
obj.max_page = -1
obj.index_entries = []
if mime_type is None:
mime_type = guess_type(obj)[0]
obj.mime_type = mime_type
return obj
class IndexEntry(object):
def __init__(self, spine, toc_entry, num):
self.num = num
self.text = toc_entry.text or _('Unknown')
self.key = toc_entry.abspath
self.anchor = self.start_anchor = toc_entry.fragment or None
try:
self.spine_pos = spine.index(self.key)
except ValueError:
self.spine_pos = -1
self.anchor_pos = 0
if self.spine_pos > -1:
self.anchor_pos = spine[self.spine_pos].anchor_map.get(self.anchor,
0)
self.depth = 0
p = toc_entry.parent
while p is not None:
self.depth += 1
p = p.parent
self.sort_key = (self.spine_pos, self.anchor_pos)
self.spine_count = len(spine)
def find_end(self, all_entries):
potential_enders = [i for i in all_entries if
i.depth <= self.depth and
(
(i.spine_pos == self.spine_pos and i.anchor_pos >
self.anchor_pos)
or
i.spine_pos > self.spine_pos
)]
if potential_enders:
# potential_enders is sorted by (spine_pos, anchor_pos)
end = potential_enders[0]
self.end_spine_pos = end.spine_pos
self.end_anchor = end.anchor
else:
self.end_spine_pos = self.spine_count - 1
self.end_anchor = None
def create_indexing_data(spine, toc):
if not toc: return
f = partial(IndexEntry, spine)
index_entries = list(map(f,
(t for t in toc.flat() if t is not toc),
(i-1 for i, t in enumerate(toc.flat()) if t is not toc)
))
index_entries.sort(key=attrgetter('sort_key'))
[ i.find_end(index_entries) for i in index_entries ]
ie = namedtuple('IndexEntry', 'entry start_anchor end_anchor')
for spine_pos, spine_item in enumerate(spine):
for i in index_entries:
if i.end_spine_pos < spine_pos or i.spine_pos > spine_pos:
continue # Does not touch this file
start = i.anchor if i.spine_pos == spine_pos else None
end = i.end_anchor if i.spine_pos == spine_pos else None
spine_item.index_entries.append(ie(i, start, end))

View File

@ -361,9 +361,11 @@ def parse_html(data, log=None, decoder=None, preprocessor=None,
# Remove any encoding-specifying <meta/> elements # Remove any encoding-specifying <meta/> elements
for meta in META_XP(data): for meta in META_XP(data):
meta.getparent().remove(meta) meta.getparent().remove(meta)
etree.SubElement(head, XHTML('meta'), meta = etree.SubElement(head, XHTML('meta'),
attrib={'http-equiv': 'Content-Type', attrib={'http-equiv': 'Content-Type'})
'content': '%s; charset=utf-8' % XHTML_NS}) meta.set('content', 'text/html; charset=utf-8') # Ensure content is second
# attribute
# Ensure has a <body/> # Ensure has a <body/>
if not xpath(data, '/h:html/h:body'): if not xpath(data, '/h:html/h:body'):
body = xpath(data, '//h:body') body = xpath(data, '//h:body')

View File

@ -347,6 +347,10 @@ class Stylizer(object):
style = self.flatten_style(rule.style) style = self.flatten_style(rule.style)
self.page_rule.update(style) self.page_rule.update(style)
elif isinstance(rule, CSSFontFaceRule): elif isinstance(rule, CSSFontFaceRule):
if rule.style.length > 1:
# Ignore the meaningless font face rules generated by the
# benighted MS Word that contain only a font-family declaration
# and nothing else
self.font_face_rules.append(rule) self.font_face_rules.append(rule)
return results return results

View File

@ -157,9 +157,11 @@ class CSSFlattener(object):
bs = body.get('style', '').split(';') bs = body.get('style', '').split(';')
bs.append('margin-top: 0pt') bs.append('margin-top: 0pt')
bs.append('margin-bottom: 0pt') bs.append('margin-bottom: 0pt')
bs.append('margin-left : %fpt'%\ if float(self.context.margin_left) >= 0:
bs.append('margin-left : %gpt'%\
float(self.context.margin_left)) float(self.context.margin_left))
bs.append('margin-right : %fpt'%\ if float(self.context.margin_right) >= 0:
bs.append('margin-right : %gpt'%\
float(self.context.margin_right)) float(self.context.margin_right))
bs.extend(['padding-left: 0pt', 'padding-right: 0pt']) bs.extend(['padding-left: 0pt', 'padding-right: 0pt'])
if self.page_break_on_body: if self.page_break_on_body:
@ -393,6 +395,7 @@ class CSSFlattener(object):
l = etree.SubElement(head, XHTML('link'), l = etree.SubElement(head, XHTML('link'),
rel='stylesheet', type=CSS_MIME, href=href) rel='stylesheet', type=CSS_MIME, href=href)
l.tail='\n' l.tail='\n'
if global_href:
href = item.relhref(global_href) href = item.relhref(global_href)
l = etree.SubElement(head, XHTML('link'), l = etree.SubElement(head, XHTML('link'),
rel='stylesheet', type=CSS_MIME, href=href) rel='stylesheet', type=CSS_MIME, href=href)
@ -413,14 +416,16 @@ class CSSFlattener(object):
global_css = defaultdict(list) global_css = defaultdict(list)
for item in self.oeb.spine: for item in self.oeb.spine:
stylizer = self.stylizers[item] stylizer = self.stylizers[item]
if float(self.context.margin_top) >= 0:
stylizer.page_rule['margin-top'] = '%gpt'%\ stylizer.page_rule['margin-top'] = '%gpt'%\
float(self.context.margin_top) float(self.context.margin_top)
if float(self.context.margin_bottom) >= 0:
stylizer.page_rule['margin-bottom'] = '%gpt'%\ stylizer.page_rule['margin-bottom'] = '%gpt'%\
float(self.context.margin_bottom) float(self.context.margin_bottom)
items = stylizer.page_rule.items() items = stylizer.page_rule.items()
items.sort() items.sort()
css = ';\n'.join("%s: %s" % (key, val) for key, val in items) css = ';\n'.join("%s: %s" % (key, val) for key, val in items)
css = '@page {\n%s\n}\n'%css css = ('@page {\n%s\n}\n'%css) if items else ''
rules = [r.cssText for r in stylizer.font_face_rules] rules = [r.cssText for r in stylizer.font_face_rules]
raw = '\n\n'.join(rules) raw = '\n\n'.join(rules)
css += '\n\n' + raw css += '\n\n' + raw
@ -429,6 +434,8 @@ class CSSFlattener(object):
gc_map = {} gc_map = {}
manifest = self.oeb.manifest manifest = self.oeb.manifest
for css in global_css: for css in global_css:
href = None
if css.strip():
id_, href = manifest.generate('page_css', 'page_styles.css') id_, href = manifest.generate('page_css', 'page_styles.css')
manifest.add(id_, href, CSS_MIME, data=cssutils.parseString(css, manifest.add(id_, href, CSS_MIME, data=cssutils.parseString(css,
validate=False)) validate=False))

View File

@ -6,7 +6,7 @@ __license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import os import os, re
from calibre.utils.date import isoformat, now from calibre.utils.date import isoformat, now
from calibre import guess_type from calibre import guess_type
@ -141,7 +141,7 @@ class MergeMetadata(object):
item = self.oeb.manifest.hrefs[old_cover.href] item = self.oeb.manifest.hrefs[old_cover.href]
if not cdata: if not cdata:
return item.id return item.id
self.oeb.manifest.remove(item) self.remove_old_cover(item)
elif not cdata: elif not cdata:
id = self.oeb.manifest.generate(id='cover') id = self.oeb.manifest.generate(id='cover')
self.oeb.manifest.add(id, old_cover.href, 'image/jpeg') self.oeb.manifest.add(id, old_cover.href, 'image/jpeg')
@ -152,3 +152,41 @@ class MergeMetadata(object):
self.oeb.guide.add('cover', 'Cover', href) self.oeb.guide.add('cover', 'Cover', href)
return id return id
def remove_old_cover(self, cover_item):
from calibre.ebooks.oeb.base import XPath
from lxml import etree
self.oeb.manifest.remove(cover_item)
# Remove any references to the cover in the HTML
affected_items = set()
for item in self.oeb.spine:
try:
images = XPath('//h:img[@src]')(item.data)
except:
images = []
removed = False
for img in images:
href = item.abshref(img.get('src'))
if href == cover_item.href:
img.getparent().remove(img)
removed = True
if removed:
affected_items.add(item)
# Check if the resulting HTML has no content, if so remove it
for item in affected_items:
body = XPath('//h:body')(item.data)
if body:
text = etree.tostring(body[0], method='text', encoding=unicode)
else:
text = ''
text = re.sub(r'\s+', '', text)
if not text and not XPath('//h:img|//svg:svg')(item.data):
self.log('Removing %s as it is a wrapper around'
' the cover image'%item.href)
self.oeb.spine.remove(item)
self.oeb.manifest.remove(item)

View File

@ -372,8 +372,8 @@ class ParseRtf:
old_rtf = old_rtf_obj.check_if_old_rtf() old_rtf = old_rtf_obj.check_if_old_rtf()
if old_rtf: if old_rtf:
if self.__run_level > 5: if self.__run_level > 5:
msg = 'Older RTF\n' msg = 'Older RTF\n' \
msg += 'self.__run_level is "%s"\n' % self.__run_level 'self.__run_level is "%s"\n' % self.__run_level
raise RtfInvalidCodeException, msg raise RtfInvalidCodeException, msg
if self.__run_level > 1: if self.__run_level > 1:
sys.stderr.write('File could be older RTF...\n') sys.stderr.write('File could be older RTF...\n')
@ -381,7 +381,7 @@ class ParseRtf:
if self.__run_level > 1: if self.__run_level > 1:
sys.stderr.write( sys.stderr.write(
'File also has newer RTF.\n' 'File also has newer RTF.\n'
'Will do the best to convert.\n' 'Will do the best to convert...\n'
) )
add_brackets_obj = add_brackets.AddBrackets( add_brackets_obj = add_brackets.AddBrackets(
in_file = self.__temp_file, in_file = self.__temp_file,

View File

@ -20,6 +20,9 @@ class AddBrackets:
""" """
Add brackets for old RTF. Add brackets for old RTF.
Logic: Logic:
When control words without their own brackets are encountered
and in the list of allowed words, this will add brackets
to facilitate the treatment of the file
""" """
def __init__(self, in_file, def __init__(self, in_file,
bug_handler, bug_handler,
@ -41,26 +44,17 @@ class AddBrackets:
self.__copy = copy self.__copy = copy
self.__write_to = better_mktemp() self.__write_to = better_mktemp()
self.__run_level = run_level self.__run_level = run_level
def __initiate_values(self):
"""
"""
self.__state_dict = { self.__state_dict = {
'before_body' : self.__before_body_func, 'before_body' : self.__before_body_func,
'in_body' : self.__in_body_func, 'in_body' : self.__in_body_func,
'after_control_word' : self.__after_control_word_func, 'after_control_word' : self.__after_control_word_func,
'in_ignore' : self.__ignore_func, 'in_ignore' : self.__ignore_func,
} }
self.__state = 'before_body'
self.__inline = {}
self.__temp_group = []
self.__open_bracket = 0
self.__found_brackets = 0
self.__accept = [ self.__accept = [
'cw<ci<bold______', 'cw<ci<bold______' ,
'cw<ci<annotation' , 'cw<ci<annotation' ,
'cw<ci<blue______' , 'cw<ci<blue______' ,
'cw<ci<bold______' , # 'cw<ci<bold______' ,
'cw<ci<caps______' , 'cw<ci<caps______' ,
'cw<ci<char-style' , 'cw<ci<char-style' ,
'cw<ci<dbl-strike' , 'cw<ci<dbl-strike' ,
@ -70,24 +64,36 @@ class AddBrackets:
'cw<ci<font-down_' , 'cw<ci<font-down_' ,
'cw<ci<font-size_' , 'cw<ci<font-size_' ,
'cw<ci<font-style' , 'cw<ci<font-style' ,
'cw<ci<font-up___', 'cw<ci<font-up___' ,
'cw<ci<footnot-mk', 'cw<ci<footnot-mk' ,
'cw<ci<green_____' , 'cw<ci<green_____' ,
'cw<ci<hidden____', 'cw<ci<hidden____' ,
'cw<ci<italics___' , 'cw<ci<italics___' ,
'cw<ci<outline___', 'cw<ci<outline___' ,
'cw<ci<red_______' , 'cw<ci<red_______' ,
'cw<ci<shadow____', 'cw<ci<shadow____' ,
'cw<ci<small-caps' , 'cw<ci<small-caps' ,
'cw<ci<strike-thr', 'cw<ci<strike-thr' ,
'cw<ci<subscript_' , 'cw<ci<subscript_' ,
'cw<ci<superscrip', 'cw<ci<superscrip' ,
'cw<ci<underlined' , 'cw<ci<underlined' ,
# 'cw<ul<underlined' , # 'cw<ul<underlined' ,
] ]
def __initiate_values(self):
"""
Init temp values
"""
self.__state = 'before_body'
self.__inline = {}
self.__temp_group = []
self.__open_bracket = False
self.__found_brackets = False
def __before_body_func(self, line): def __before_body_func(self, line):
""" """
If we are before the body, not interest in changing anything
""" """
if self.__token_info == 'mi<mk<body-open_': if self.__token_info == 'mi<mk<body-open_':
self.__state = 'in_body' self.__state = 'in_body'
@ -95,6 +101,14 @@ class AddBrackets:
def __in_body_func(self, line): def __in_body_func(self, line):
""" """
Select what action to take in body:
1-At the end of the file close the braket if a bracket was opened
This happens if there is achange
2-If an open bracket is found the code inside is ignore
(written without modifications)
3-If an accepted control word is found put the line
in a buffer then chage state to after cw
4-Else simply write the line
""" """
if line == 'cb<nu<clos-brack<0001\n' and self.__open_bracket: if line == 'cb<nu<clos-brack<0001\n' and self.__open_bracket:
self.__write_obj.write( self.__write_obj.write(
@ -102,7 +116,7 @@ class AddBrackets:
) )
self.__write_obj.write(line) self.__write_obj.write(line)
elif self.__token_info == 'ob<nu<open-brack': elif self.__token_info == 'ob<nu<open-brack':
self.__found_brackets = 1 self.__found_brackets = True
self.__state = 'in_ignore' self.__state = 'in_ignore'
self.__ignore_count = self.__ob_count self.__ignore_count = self.__ob_count
self.__write_obj.write(line) self.__write_obj.write(line)
@ -114,6 +128,10 @@ class AddBrackets:
def __after_control_word_func(self, line): def __after_control_word_func(self, line):
""" """
After a cw either add next allowed cw to temporary list or
change groupe and write it.
If the token leading to an exit is an open bracket go to
ignore otherwise goto in body
""" """
if self.__token_info in self.__accept: if self.__token_info in self.__accept:
self.__temp_group.append(line) self.__temp_group.append(line)
@ -129,75 +147,77 @@ class AddBrackets:
def __write_group(self): def __write_group(self):
""" """
Write a tempory group after accepted control words end
But this is mostly useless in my opinion as there is no list of rejected cw
This may be a way to implement future old rtf processing for cw
Utility: open a group to just put brackets but why be so complicated?
Scheme: open brackets, write cw then go to body and back with cw after
""" """
if self.__open_bracket: if self.__open_bracket:
self.__write_obj.write( self.__write_obj.write(
'cb<nu<clos-brack<0003\n' 'cb<nu<clos-brack<0003\n'
) )
self.__open_bracket = 0 self.__open_bracket = False
inline_string = ''
the_keys = self.__inline.keys() inline_string = ''.join(['%s<nu<%s\n' % (k, v) \
for the_key in the_keys: for k, v in self.__inline.iteritems() \
value = self.__inline[the_key] if v != 'false'])
if value != 'false':
inline_string += '%s<nu<%s\n' % (the_key, value)
if inline_string: if inline_string:
self.__write_obj.write('ob<nu<open-brack<0003\n') self.__write_obj.write('ob<nu<open-brack<0003\n'
self.__write_obj.write(inline_string) '%s' % inline_string)
self.__open_bracket = 1 self.__open_bracket = True
self.__temp_group = [] self.__temp_group = []
def __change_permanent_group(self): def __change_permanent_group(self):
""" """
use temp group to change permanent group Use temp group to change permanent group
If the control word is not accepted remove it
What is the interest as it is build to accept only accepted cw
in __after_control_word_func?
""" """
for line in self.__temp_group: self.__inline = {line[:16] : line[20:-1]\
token_info = line[:16] for line in self.__temp_group\
if token_info in self.__accept: # Is this really necessary?
att = line[20:-1] if line[:16] in self.__accept}
self.__inline[token_info] = att
def __ignore_func(self, line): def __ignore_func(self, line):
""" """
Don't add any brackets while inside of brackets RTF has already Just copy data inside of RTF brackets already here.
added.
""" """
self.__write_obj.write(line) self.__write_obj.write(line)
if self.__token_info == 'cb<nu<clos-brack'and\ if self.__token_info == 'cb<nu<clos-brack'\
self.__cb_count == self.__ignore_count: and self.__cb_count == self.__ignore_count:
self.__state = 'in_body' self.__state = 'in_body'
def __check_brackets(self, in_file): def __check_brackets(self, in_file):
self.__check_brack_obj = check_brackets.CheckBrackets\ """
Return True if brackets match
"""
check_brack_obj = check_brackets.CheckBrackets\
(file = in_file) (file = in_file)
good_br = self.__check_brack_obj.check_brackets()[0] return check_brack_obj.check_brackets()[0]
if not good_br:
return 1
def add_brackets(self): def add_brackets(self):
""" """
""" """
self.__initiate_values() self.__initiate_values()
read_obj = open(self.__file, 'r') with open(self.__file, 'r') as read_obj:
self.__write_obj = open(self.__write_to, 'w') with open(self.__write_to, 'w') as self.__write_obj:
line_to_read = 1 for line in read_obj:
while line_to_read:
line_to_read = read_obj.readline()
line = line_to_read
self.__token_info = line[:16] self.__token_info = line[:16]
if self.__token_info == 'ob<nu<open-brack': if self.__token_info == 'ob<nu<open-brack':
self.__ob_count = line[-5:-1] self.__ob_count = line[-5:-1]
if self.__token_info == 'cb<nu<clos-brack': if self.__token_info == 'cb<nu<clos-brack':
self.__cb_count = line[-5:-1] self.__cb_count = line[-5:-1]
action = self.__state_dict.get(self.__state) action = self.__state_dict.get(self.__state)
if action == None: if action is None:
sys.stderr.write('No matching state in module add_brackets.py\n') sys.stderr.write(
sys.stderr.write(self.__state + '\n') 'No matching state in module add_brackets.py\n'
'%s\n' % self.__state)
action(line) action(line)
read_obj.close() #Check bad brackets
self.__write_obj.close() if self.__check_brackets(self.__write_to):
bad_brackets = self.__check_brackets(self.__write_to)
if not bad_brackets:
copy_obj = copy.Copy(bug_handler = self.__bug_handler) copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy: if self.__copy:
copy_obj.copy_file(self.__write_to, "add_brackets.data") copy_obj.copy_file(self.__write_to, "add_brackets.data")

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,5 @@
import os, sys import os, sys
from codecs import EncodedFile
from calibre.ebooks.rtf2xml import copy, check_encoding from calibre.ebooks.rtf2xml import copy, check_encoding
from calibre.ptempfile import better_mktemp from calibre.ptempfile import better_mktemp
@ -41,6 +42,7 @@ class ConvertToTags:
self.__run_level = run_level self.__run_level = run_level
self.__write_to = better_mktemp() self.__write_to = better_mktemp()
self.__convert_utf = False self.__convert_utf = False
self.__bad_encoding = False
def __initiate_values(self): def __initiate_values(self):
""" """
@ -213,13 +215,14 @@ class ConvertToTags:
if not check_encoding_obj.check_encoding(self.__file, verbose=False): if not check_encoding_obj.check_encoding(self.__file, verbose=False):
self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>') self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
elif not check_encoding_obj.check_encoding(self.__file, self.__encoding): elif not check_encoding_obj.check_encoding(self.__file, self.__encoding, verbose=False):
self.__write_obj.write('<?xml version="1.0" encoding="UTF-8" ?>') self.__write_obj.write('<?xml version="1.0" encoding="UTF-8" ?>')
self.__convert_utf = True self.__convert_utf = True
else: else:
self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>') self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
sys.stderr.write('Bad RTF encoding, revert to US-ASCII chars and' sys.stderr.write('Bad RTF encoding, revert to US-ASCII chars and'
' hope for the best') ' hope for the best')
self.__bad_encoding = True
self.__new_line = 0 self.__new_line = 0
self.__write_new_line() self.__write_new_line()
if self.__no_dtd: if self.__no_dtd:
@ -247,7 +250,7 @@ class ConvertToTags:
the appropriate function. the appropriate function.
The functions that are called: The functions that are called:
a text function for text a text function for text
an open funciton for open tags an open function for open tags
an open with attribute function for tags with attributes an open with attribute function for tags with attributes
an empty with attribute function for tags that are empty but have an empty with attribute function for tags that are empty but have
attribtes. attribtes.
@ -263,20 +266,19 @@ class ConvertToTags:
action = self.__state_dict.get(self.__token_info) action = self.__state_dict.get(self.__token_info)
if action is not None: if action is not None:
action(line) action(line)
self.__write_obj.close() #convert all encodings to UTF8 or ASCII to avoid unsupported encodings in lxml
#convert all encodings to UTF8 to avoid unsupported encodings in lxml if self.__convert_utf or self.__bad_encoding:
if self.__convert_utf:
copy_obj = copy.Copy(bug_handler = self.__bug_handler) copy_obj = copy.Copy(bug_handler = self.__bug_handler)
copy_obj.rename(self.__write_to, self.__file) copy_obj.rename(self.__write_to, self.__file)
file_encoding = "utf-8"
if self.__bad_encoding:
file_encoding = "us-ascii"
with open(self.__file, 'r') as read_obj: with open(self.__file, 'r') as read_obj:
with open(self.__write_to, 'w') as write_obj: with open(self.__write_to, 'w') as write_obj:
file = read_obj.read() write_objenc = EncodedFile(write_obj, self.__encoding,
try: file_encoding, 'replace')
file = file.decode(self.__encoding) for line in read_obj:
write_obj.write(file.encode('utf-8')) write_objenc.write(line)
except:
sys.stderr.write('Conversion to UTF-8 is not possible,'
' encoding should be very carefully checked')
copy_obj = copy.Copy(bug_handler = self.__bug_handler) copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy: if self.__copy:
copy_obj.copy_file(self.__write_to, "convert_to_tags.data") copy_obj.copy_file(self.__write_to, "convert_to_tags.data")

View File

@ -11,6 +11,7 @@
# # # #
######################################################################### #########################################################################
import sys, os import sys, os
from calibre.ebooks.rtf2xml import copy from calibre.ebooks.rtf2xml import copy
from calibre.ptempfile import better_mktemp from calibre.ptempfile import better_mktemp
@ -31,29 +32,29 @@ class Header:
self.__bug_handler = bug_handler self.__bug_handler = bug_handler
self.__copy = copy self.__copy = copy
self.__write_to = better_mktemp() self.__write_to = better_mktemp()
self.__found_a_header = 0 self.__found_a_header = False
def __in_header_func(self, line): def __in_header_func(self, line):
""" """
Handle all tokens that are part of header Handle all tokens that are part of header
""" """
if self.__cb_count == self.__header_bracket_count: if self.__cb_count == self.__header_bracket_count:
self.__in_header = 0 self.__in_header = False
self.__write_obj.write(line) self.__write_obj.write(line)
self.__write_to_head_obj.write( self.__write_to_head_obj.write(
'mi<mk<head___clo\n') 'mi<mk<head___clo\n' \
self.__write_to_head_obj.write( 'mi<tg<close_____<header-or-footer\n' \
'mi<tg<close_____<header-or-footer\n')
self.__write_to_head_obj.write(
'mi<mk<header-clo\n') 'mi<mk<header-clo\n')
else: else:
self.__write_to_head_obj.write(line) self.__write_to_head_obj.write(line)
def __found_header(self, line): def __found_header(self, line):
""" """
Found a header Found a header
""" """
# but this could be header or footer # but this could be header or footer
self.__found_a_header = 1 self.__found_a_header = True
self.__in_header = 1 self.__in_header = True
self.__header_count += 1 self.__header_count += 1
# temporarily set this to zero so I can enter loop # temporarily set this to zero so I can enter loop
self.__cb_count = 0 self.__cb_count = 0
@ -69,18 +70,23 @@ class Header:
'mi<tg<open-att__<header-or-footer<type>%s\n' % (type) 'mi<tg<open-att__<header-or-footer<type>%s\n' % (type)
) )
else: else:
sys.stderr.write('module is header\n') sys.stderr.write(
sys.stderr.write('method is __found_header\n') 'module is header\n' \
sys.stderr.write('no dict entry\n') 'method is __found_header\n' \
sys.stderr.write('line is %s' % line) 'no dict entry\n' \
'line is %s' % line)
self.__write_to_head_obj.write( self.__write_to_head_obj.write(
'mi<tg<open-att__<header-or-footer<type>none\n' 'mi<tg<open-att__<header-or-footer<type>none\n'
) )
def __default_sep(self, line): def __default_sep(self, line):
"""Handle all tokens that are not header tokens""" """
Handle all tokens that are not header tokens
"""
if self.__token_info[3:5] == 'hf': if self.__token_info[3:5] == 'hf':
self.__found_header(line) self.__found_header(line)
self.__write_obj.write(line) self.__write_obj.write(line)
def __initiate_sep_values(self): def __initiate_sep_values(self):
""" """
initiate counters for separate_footnotes method. initiate counters for separate_footnotes method.
@ -89,7 +95,7 @@ class Header:
self.__ob_count = 0 self.__ob_count = 0
self.__cb_count = 0 self.__cb_count = 0
self.__header_bracket_count = 0 self.__header_bracket_count = 0
self.__in_header = 0 self.__in_header = False
self.__header_count = 0 self.__header_count = 0
self.__head_dict = { self.__head_dict = {
'head-left_' : ('header-left'), 'head-left_' : ('header-left'),
@ -101,6 +107,7 @@ class Header:
'header____' : ('header' ), 'header____' : ('header' ),
'footer____' : ('footer' ), 'footer____' : ('footer' ),
} }
def separate_headers(self): def separate_headers(self):
""" """
Separate all the footnotes in an RTF file and put them at the bottom, Separate all the footnotes in an RTF file and put them at the bottom,
@ -110,14 +117,11 @@ class Header:
bottom of the main file. bottom of the main file.
""" """
self.__initiate_sep_values() self.__initiate_sep_values()
read_obj = open(self.__file)
self.__write_obj = open(self.__write_to, 'w')
self.__header_holder = better_mktemp() self.__header_holder = better_mktemp()
self.__write_to_head_obj = open(self.__header_holder, 'w') with open(self.__file) as read_obj:
line_to_read = 1 with open(self.__write_to, 'w') as self.__write_obj:
while line_to_read: with open(self.__header_holder, 'w') as self.__write_to_head_obj:
line_to_read = read_obj.readline() for line in read_obj:
line = line_to_read
self.__token_info = line[:16] self.__token_info = line[:16]
# keep track of opening and closing brackets # keep track of opening and closing brackets
if self.__token_info == 'ob<nu<open-brack': if self.__token_info == 'ob<nu<open-brack':
@ -130,33 +134,30 @@ class Header:
# not in the middle of footnote text # not in the middle of footnote text
else: else:
self.__default_sep(line) self.__default_sep(line)
self.__write_obj.close()
read_obj.close() with open(self.__header_holder, 'r') as read_obj:
self.__write_to_head_obj.close() with open(self.__write_to, 'a') as write_obj:
read_obj = open(self.__header_holder, 'r')
write_obj = open(self.__write_to, 'a')
write_obj.write( write_obj.write(
'mi<mk<header-beg\n') 'mi<mk<header-beg\n')
line = 1 for line in read_obj:
while line:
line = read_obj.readline()
write_obj.write(line) write_obj.write(line)
write_obj.write( write_obj.write(
'mi<mk<header-end\n') 'mi<mk<header-end\n')
read_obj.close()
write_obj.close()
os.remove(self.__header_holder) os.remove(self.__header_holder)
copy_obj = copy.Copy(bug_handler = self.__bug_handler) copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy: if self.__copy:
copy_obj.copy_file(self.__write_to, "header_separate.info") copy_obj.copy_file(self.__write_to, "header_separate.data")
copy_obj.rename(self.__write_to, self.__file) copy_obj.rename(self.__write_to, self.__file)
os.remove(self.__write_to) os.remove(self.__write_to)
def update_info(self, file, copy): def update_info(self, file, copy):
""" """
Unused method Unused method
""" """
self.__file = file self.__file = file
self.__copy = copy self.__copy = copy
def __get_head_body_func(self, line): def __get_head_body_func(self, line):
""" """
Process lines in main body and look for beginning of headers. Process lines in main body and look for beginning of headers.
@ -166,6 +167,7 @@ class Header:
self.__state = 'head' self.__state = 'head'
else: else:
self.__write_obj.write(line) self.__write_obj.write(line)
def __get_head_head_func(self, line): def __get_head_head_func(self, line):
""" """
Copy headers and footers from bottom of file to a separate, temporary file. Copy headers and footers from bottom of file to a separate, temporary file.
@ -174,6 +176,7 @@ class Header:
self.__state = 'body' self.__state = 'body'
else: else:
self.__write_to_head_obj.write(line) self.__write_to_head_obj.write(line)
def __get_headers(self): def __get_headers(self):
""" """
Private method to remove footnotes from main file. Read one line from Private method to remove footnotes from main file. Read one line from
@ -182,21 +185,16 @@ class Header:
These two functions do the work of separating the footnotes form the These two functions do the work of separating the footnotes form the
body. body.
""" """
read_obj = open(self.__file) with open(self.__file) as read_obj:
self.__write_obj = open(self.__write_to, 'w') with open(self.__write_to, 'w') as self.__write_obj:
# self.__write_to = "footnote_info.data" with open(self.__header_holder, 'w') as self.__write_to_head_obj:
self.__write_to_head_obj = open(self.__header_holder, 'w') for line in read_obj:
line = 1
while line:
line = read_obj.readline()
self.__token_info = line[:16] self.__token_info = line[:16]
if self.__state == 'body': if self.__state == 'body':
self.__get_head_body_func(line) self.__get_head_body_func(line)
elif self.__state == 'head': elif self.__state == 'head':
self.__get_head_head_func(line) self.__get_head_head_func(line)
read_obj.close()
self.__write_obj.close()
self.__write_to_head_obj.close()
def __get_head_from_temp(self, num): def __get_head_from_temp(self, num):
""" """
Private method for joining headers and footers to body. This method Private method for joining headers and footers to body. This method
@ -205,18 +203,17 @@ class Header:
returns them as a string. returns them as a string.
""" """
look_for = 'mi<mk<header-ope<' + num + '\n' look_for = 'mi<mk<header-ope<' + num + '\n'
found_head = 0 found_head = False
string_to_return = '' string_to_return = ''
line = 1 for line in self.__read_from_head_obj:
while line:
line = self.__read_from_head_obj.readline()
if found_head: if found_head:
if line == 'mi<mk<header-clo\n': if line == 'mi<mk<header-clo\n':
return string_to_return return string_to_return
string_to_return = string_to_return + line string_to_return += line
else: else:
if line == look_for: if line == look_for:
found_head = 1 found_head = True
def __join_from_temp(self): def __join_from_temp(self):
""" """
Private method for rejoining footnotes to body. Read from the Private method for rejoining footnotes to body. Read from the
@ -227,15 +224,13 @@ class Header:
If no footnote marker is found, simply print out the token (line). If no footnote marker is found, simply print out the token (line).
""" """
self.__read_from_head_obj = open(self.__header_holder, 'r') self.__read_from_head_obj = open(self.__header_holder, 'r')
read_obj = open(self.__write_to, 'r')
self.__write_obj = open(self.__write_to2, 'w') self.__write_obj = open(self.__write_to2, 'w')
line = 1 with open(self.__write_to, 'r') as read_obj:
while line: for line in read_obj:
line = read_obj.readline()
if line[:16] == 'mi<mk<header-ind': if line[:16] == 'mi<mk<header-ind':
line = self.__get_head_from_temp(line[17:-1]) line = self.__get_head_from_temp(line[17:-1])
self.__write_obj.write(line) self.__write_obj.write(line)
read_obj.close()
def join_headers(self): def join_headers(self):
""" """
Join the footnotes from the bottom of the file and put them in their Join the footnotes from the bottom of the file and put them in their

View File

@ -181,7 +181,7 @@ class Hex2Utf8:
self.__dingbats_dict.update(dingbats_base_dict) self.__dingbats_dict.update(dingbats_base_dict)
self.__dingbats_dict.update(ms_dingbats_dict) self.__dingbats_dict.update(ms_dingbats_dict)
# load dictionary for caps, and make a string for the replacement # load dictionary for caps, and make a string for the replacement
self.__caps_uni_dict = char_map_obj.get_char_map(map='caps_uni') self.__caps_uni_dict = char_map_obj.get_char_map(map = 'caps_uni')
# # print self.__caps_uni_dict # # print self.__caps_uni_dict
# don't think I'll need this # don't think I'll need this
##keys = self.__caps_uni_dict.keys() ##keys = self.__caps_uni_dict.keys()

View File

@ -11,14 +11,18 @@
# # # #
######################################################################### #########################################################################
import sys import sys
"""
"""
class OldRtf: class OldRtf:
""" """
Check to see if the RTF is an older version Check to see if the RTF is an older version
Logic: Logic:
If allowable control word/properties happen in text without being enclosed
in brackets the file will be considered old rtf
""" """
def __init__(self, in_file, bug_handler, run_level ): def __init__(self, in_file,
bug_handler,
run_level,
):
""" """
Required: Required:
'file'--file to parse 'file'--file to parse
@ -32,11 +36,7 @@ class OldRtf:
""" """
self.__file = in_file self.__file = in_file
self.__bug_handler = bug_handler self.__bug_handler = bug_handler
self.__initiate_values() self.__run_level = run_level
self.__ob_group = 0
def __initiate_values(self):
self.__previous_token = ''
self.__new_found = 0
self.__allowable = [ self.__allowable = [
'annotation' , 'annotation' ,
'blue______' , 'blue______' ,
@ -64,14 +64,18 @@ class OldRtf:
'superscrip' , 'superscrip' ,
'underlined' , 'underlined' ,
] ]
self.__state = 'before_body'
self.__action_dict = { self.__action_dict = {
'before_body' : self.__before_body_func, 'before_body' : self.__before_body_func,
'in_body' : self.__check_tokens_func, 'in_body' : self.__check_tokens_func,
'after_pard' : self.__after_pard_func, 'after_pard' : self.__after_pard_func,
} }
self.__is_old = 0
def __initiate_values(self):
self.__previous_token = ''
self.__state = 'before_body'
self.__found_new = 0 self.__found_new = 0
self.__ob_group = 0
def __check_tokens_func(self, line): def __check_tokens_func(self, line):
if self.__inline_info in self.__allowable: if self.__inline_info in self.__allowable:
if self.__ob_group == self.__base_ob_count: if self.__ob_group == self.__base_ob_count:
@ -80,32 +84,32 @@ class OldRtf:
self.__found_new += 1 self.__found_new += 1
elif self.__token_info == 'cw<pf<par-def___': elif self.__token_info == 'cw<pf<par-def___':
self.__state = 'after_pard' self.__state = 'after_pard'
def __before_body_func(self, line): def __before_body_func(self, line):
if self.__token_info == 'mi<mk<body-open_': if self.__token_info == 'mi<mk<body-open_':
self.__state = 'in_body' self.__state = 'in_body'
self.__base_ob_count = self.__ob_group self.__base_ob_count = self.__ob_group
def __after_pard_func(self, line): def __after_pard_func(self, line):
if line[0:2] != 'cw': if line[0:2] != 'cw':
self.__state = 'in_body' self.__state = 'in_body'
def check_if_old_rtf(self): def check_if_old_rtf(self):
""" """
Requires: Requires:
nothing nothing
Returns: Returns:
1 if file is older RTf True if file is older RTf
0 if file is newer RTF False if file is newer RTF
""" """
self.__initiate_values()
read_obj = open(self.__file, 'r')
line = 1
line_num = 0 line_num = 0
while line: with open(self.__file, 'r') as read_obj:
line = read_obj.readline() for line in read_obj:
line_num += 1 line_num += 1
self.__token_info = line[:16] self.__token_info = line[:16]
if self.__token_info == 'mi<mk<body-close': if self.__token_info == 'mi<mk<body-close':
return 0 return False
self.__ob_group = 0
if self.__token_info == 'ob<nu<open-brack': if self.__token_info == 'ob<nu<open-brack':
self.__ob_group += 1 self.__ob_group += 1
self.__ob_count = line[-5:-1] self.__ob_count = line[-5:-1]
@ -114,14 +118,22 @@ class OldRtf:
self.__cb_count = line[-5:-1] self.__cb_count = line[-5:-1]
self.__inline_info = line[6:16] self.__inline_info = line[6:16]
if self.__state == 'after_body': if self.__state == 'after_body':
return 0 return False
action = self.__action_dict.get(self.__state) action = self.__action_dict.get(self.__state)
if not action: if action is None:
sys.stderr.write('No action for state!\n') try:
sys.stderr.write('No action for this state!\n')
except:
pass
result = action(line) result = action(line)
if result == 'new_rtf': if result == 'new_rtf':
return 0 return False
elif result == 'old_rtf': elif result == 'old_rtf':
return 1 if self.__run_level > 3:
sys.stderr.write(
'Old rtf construction %s (bracket %s, line %s)\n'
% (self.__inline_info, str(self.__ob_group), line_num)
)
return True
self.__previous_token = line[6:16] self.__previous_token = line[6:16]
return 0 return False

View File

@ -10,7 +10,9 @@
# # # #
# # # #
######################################################################### #########################################################################
import sys, os, codecs import sys, os
# , codecs
class Output: class Output:
""" """
Output file Output file
@ -19,7 +21,8 @@ class Output:
file, file,
orig_file, orig_file,
output_dir = None, output_dir = None,
out_file = None out_file = None,
no_ask = True
): ):
""" """
Required: Required:
@ -33,8 +36,9 @@ class Output:
self.__file = file self.__file = file
self.__orig_file = orig_file self.__orig_file = orig_file
self.__output_dir = output_dir self.__output_dir = output_dir
self.__no_ask = 1 self.__no_ask = no_ask
self.__out_file = out_file self.__out_file = out_file
def output(self): def output(self):
""" """
Required: Required:
@ -45,13 +49,14 @@ class Output:
output the line to the screen if no output file given. Otherwise, output to output the line to the screen if no output file given. Otherwise, output to
the file. the file.
""" """
# self.__output_xml(self.__file, self.__out_file)
if self.__output_dir: if self.__output_dir:
self.__output_to_dir_func() self.__output_to_dir_func()
elif self.__out_file: elif self.__out_file:
self.__output_xml(self.__file, self.__out_file) self.__output_to_file_func()
# self.__output_xml(self.__file, self.__out_file)
else: else:
self.__output_to_standard_func() self.__output_to_standard_func()
def __output_to_dir_func(self): def __output_to_dir_func(self):
""" """
Requires: Requires:
@ -64,32 +69,25 @@ class Output:
""" """
base_name = os.path.basename(self.__orig_file) base_name = os.path.basename(self.__orig_file)
base_name, ext = os.path.splitext(base_name) base_name, ext = os.path.splitext(base_name)
output_file = '%s.xml' % base_name output_file = os.path.join(self.__output_dir, '%s.xml' % base_name)
output_file = os.path.join(self.__output_dir, output_file)
# change if user wants to output to a specific file # change if user wants to output to a specific file
if self.__out_file: if self.__out_file:
output_file = os.path.join(self.__output_dir, self.__out_file) output_file = os.path.join(self.__output_dir, self.__out_file)
user_response = 'o' user_response = 'o'
if os.path.isfile(output_file): if os.path.isfile(output_file) and not self.__no_ask:
if self.__no_ask: msg = 'Do you want to overwrite %s?\n' % output_file
user_response = 'o' msg += ('Type "o" to overwrite.\n'
else: 'Type any other key to print to standard output.\n')
msg = 'Do you want to over-write %s?\n' % output_file
msg += 'Type "o" to over-write.\n'
msg += 'Type any other key to print to standard output.\n'
sys.stderr.write(msg) sys.stderr.write(msg)
user_response = raw_input() user_response = raw_input()
if user_response == 'o': if user_response == 'o':
read_obj = open(self.__file, 'r') with open(self.__file, 'r') as read_obj:
write_obj = open(output_file, 'w') with open(self.output_file, 'w') as write_obj:
line = 1 for line in read_obj:
while line:
line = read_obj.readline()
write_obj.write(line) write_obj.write(line)
read_obj.close()
write_obj.close()
else: else:
self.__output_to_standard_func() self.__output_to_standard_func()
def __output_to_file_func(self): def __output_to_file_func(self):
""" """
Required: Required:
@ -99,14 +97,11 @@ class Output:
Logic: Logic:
read one line at a time. Output to standard read one line at a time. Output to standard
""" """
read_obj = open(self.__file, 'r') with open(self.__file, 'r') as read_obj:
write_obj = open(self.__out_file, 'w') with open(self.__out_file, 'w') as write_obj:
line = 1 for line in read_obj:
while line:
line = read_obj.readline()
write_obj.write(line) write_obj.write(line)
read_obj.close()
write_obj.close()
def __output_to_standard_func(self): def __output_to_standard_func(self):
""" """
Required: Required:
@ -116,26 +111,24 @@ class Output:
Logic: Logic:
read one line at a time. Output to standard read one line at a time. Output to standard
""" """
read_obj = open(self.__file, 'r') with open(self.__file, 'r') as read_obj:
line = 1 for line in read_obj:
while line:
line = read_obj.readline()
sys.stdout.write(line) sys.stdout.write(line)
read_obj.close()
def __output_xml(self, in_file, out_file): # def __output_xml(self, in_file, out_file):
""" # """
output the ill-formed xml file # output the ill-formed xml file
""" # """
(utf8_encode, utf8_decode, utf8_reader, utf8_writer) = codecs.lookup("utf-8") # (utf8_encode, utf8_decode, utf8_reader, utf8_writer) = codecs.lookup("utf-8")
write_obj = utf8_writer(open(out_file, 'w')) # write_obj = utf8_writer(open(out_file, 'w'))
write_obj = open(out_file, 'w') # write_obj = open(out_file, 'w')
read_obj = utf8_writer(open(in_file, 'r')) # read_obj = utf8_writer(open(in_file, 'r'))
read_obj = open(in_file, 'r') # read_obj = open(in_file, 'r')
line = 1 # line = 1
while line: # while line:
line = read_obj.readline() # line = read_obj.readline()
if isinstance(line, type(u"")): # if isinstance(line, type(u"")):
line = line.encode("utf-8") # line = line.encode("utf-8")
write_obj.write(line) # write_obj.write(line)
read_obj.close() # read_obj.close()
write_obj.close() # write_obj.close()

View File

@ -11,31 +11,32 @@
# # # #
######################################################################### #########################################################################
import sys, os import sys, os
from calibre.ebooks.rtf2xml import copy from calibre.ebooks.rtf2xml import copy
from calibre.ptempfile import better_mktemp from calibre.ptempfile import better_mktemp
class Paragraphs: class Paragraphs:
""" """
================= =================
Purpose Purpose
================= =================
Write paragraph tags for a tokenized file. (This module won't be any use to use Write paragraph tags for a tokenized file. (This module won't be any use to use
to you unless you use it as part of the other modules.) to you unless you use it as part of the other modules.)
------------- -------------
Method Method
------------- -------------
RTF does not tell you when a paragraph begins. It only tells you when the RTF does not tell you when a paragraph begins. It only tells you when the
paragraph ends. paragraph ends.
In order to make paragraphs out of this limited info, the parser starts in the In order to make paragraphs out of this limited info, the parser starts in the
body of the documents and assumes it is not in a paragraph. It looks for clues body of the documents and assumes it is not in a paragraph. It looks for clues
to begin a paragraph. Text starts a paragraph; so does an inline field or to begin a paragraph. Text starts a paragraph; so does an inline field or
list-text. If an end of paragraph marker (\par) is found, then this indicates list-text. If an end of paragraph marker (\par) is found, then this indicates
a blank paragraph. a blank paragraph.
Once a paragraph is found, the state changes to 'paragraph.' In this state, Once a paragraph is found, the state changes to 'paragraph.' In this state,
clues are looked to for the end of a paragraph. The end of a paragraph marker clues are looked to for the end of a paragraph. The end of a paragraph marker
(\par) marks the end of a paragraph. So does the end of a footnote or heading; (\par) marks the end of a paragraph. So does the end of a footnote or heading;
a paragraph definintion; the end of a field-block; and the beginning of a a paragraph definition; the end of a field-block; and the beginning of a
section. (How about the end of a section or the end of a field-block?) section. (How about the end of a section or the end of a field-block?)
""" """
def __init__(self, def __init__(self,
in_file, in_file,
@ -60,6 +61,7 @@ section. (How about the end of a section or the end of a field-block?)
self.__write_empty_para = write_empty_para self.__write_empty_para = write_empty_para
self.__run_level = run_level self.__run_level = run_level
self.__write_to = better_mktemp() self.__write_to = better_mktemp()
def __initiate_values(self): def __initiate_values(self):
""" """
Initiate all values. Initiate all values.
@ -77,7 +79,7 @@ section. (How about the end of a section or the end of a field-block?)
self.__paragraph_dict = { self.__paragraph_dict = {
'cw<pf<par-end___' : self.__close_para_func, # end of paragraph 'cw<pf<par-end___' : self.__close_para_func, # end of paragraph
'mi<mk<headi_-end' : self.__close_para_func, # end of header or footer 'mi<mk<headi_-end' : self.__close_para_func, # end of header or footer
##'cw<pf<par-def___' : self.__close_para_func, # paragraph definition ## 'cw<pf<par-def___' : self.__close_para_func, # paragraph definition
# 'mi<mk<fld-bk-end' : self.__close_para_func, # end of field-block # 'mi<mk<fld-bk-end' : self.__close_para_func, # end of field-block
'mi<mk<fldbk-end_' : self.__close_para_func, # end of field-block 'mi<mk<fldbk-end_' : self.__close_para_func, # end of field-block
'mi<mk<body-close' : self.__close_para_func, # end of body 'mi<mk<body-close' : self.__close_para_func, # end of body
@ -99,6 +101,7 @@ section. (How about the end of a section or the end of a field-block?)
'mi<mk<pict-start' : self.__start_para_func, 'mi<mk<pict-start' : self.__start_para_func,
'cw<pf<page-break' : self.__empty_pgbk_func, # page break 'cw<pf<page-break' : self.__empty_pgbk_func, # page break
} }
def __before_body_func(self, line): def __before_body_func(self, line):
""" """
Required: Required:
@ -112,6 +115,7 @@ section. (How about the end of a section or the end of a field-block?)
if self.__token_info == 'mi<mk<body-open_': if self.__token_info == 'mi<mk<body-open_':
self.__state = 'not_paragraph' self.__state = 'not_paragraph'
self.__write_obj.write(line) self.__write_obj.write(line)
def __not_paragraph_func(self, line): def __not_paragraph_func(self, line):
""" """
Required: Required:
@ -127,6 +131,7 @@ section. (How about the end of a section or the end of a field-block?)
if action: if action:
action(line) action(line)
self.__write_obj.write(line) self.__write_obj.write(line)
def __paragraph_func(self, line): def __paragraph_func(self, line):
""" """
Required: Required:
@ -144,6 +149,7 @@ section. (How about the end of a section or the end of a field-block?)
action(line) action(line)
else: else:
self.__write_obj.write(line) self.__write_obj.write(line)
def __start_para_func(self, line): def __start_para_func(self, line):
""" """
Requires: Requires:
@ -160,6 +166,7 @@ section. (How about the end of a section or the end of a field-block?)
) )
self.__write_obj.write(self.__start2_marker) self.__write_obj.write(self.__start2_marker)
self.__state = 'paragraph' self.__state = 'paragraph'
def __empty_para_func(self, line): def __empty_para_func(self, line):
""" """
Requires: Requires:
@ -176,6 +183,7 @@ section. (How about the end of a section or the end of a field-block?)
'mi<tg<empty_____<para\n' 'mi<tg<empty_____<para\n'
) )
self.__write_obj.write(self.__end_marker) # marker for later parsing self.__write_obj.write(self.__end_marker) # marker for later parsing
def __empty_pgbk_func(self, line): def __empty_pgbk_func(self, line):
""" """
Requires: Requires:
@ -188,6 +196,7 @@ section. (How about the end of a section or the end of a field-block?)
self.__write_obj.write( self.__write_obj.write(
'mi<tg<empty_____<page-break\n' 'mi<tg<empty_____<page-break\n'
) )
def __close_para_func(self, line): def __close_para_func(self, line):
""" """
Requires: Requires:
@ -205,6 +214,7 @@ section. (How about the end of a section or the end of a field-block?)
self.__write_obj.write(self.__end_marker) # marker for later parser self.__write_obj.write(self.__end_marker) # marker for later parser
self.__write_obj.write(line) self.__write_obj.write(line)
self.__state = 'not_paragraph' self.__state = 'not_paragraph'
def __bogus_para__def_func(self, line): def __bogus_para__def_func(self, line):
""" """
Requires: Requires:
@ -215,6 +225,7 @@ section. (How about the end of a section or the end of a field-block?)
if a \pard occurs in a paragraph, I want to ignore it. (I believe) if a \pard occurs in a paragraph, I want to ignore it. (I believe)
""" """
self.__write_obj.write('mi<mk<bogus-pard\n') self.__write_obj.write('mi<mk<bogus-pard\n')
def make_paragraphs(self): def make_paragraphs(self):
""" """
Requires: Requires:
@ -229,20 +240,18 @@ section. (How about the end of a section or the end of a field-block?)
only other state is 'paragraph'. only other state is 'paragraph'.
""" """
self.__initiate_values() self.__initiate_values()
read_obj = open(self.__file, 'r') with open(self.__file, 'r') as read_obj:
self.__write_obj = open(self.__write_to, 'w') with open(self.__write_to, 'w') as self.__write_obj:
line_to_read = 1 for line in read_obj:
while line_to_read:
line_to_read = read_obj.readline()
line = line_to_read
self.__token_info = line[:16] self.__token_info = line[:16]
action = self.__state_dict.get(self.__state) action = self.__state_dict.get(self.__state)
if action == None: if action is None:
sys.stderr.write('no no matching state in module sections.py\n') try:
sys.stderr.write('no matching state in module paragraphs.py\n')
sys.stderr.write(self.__state + '\n') sys.stderr.write(self.__state + '\n')
except:
pass
action(line) action(line)
read_obj.close()
self.__write_obj.close()
copy_obj = copy.Copy(bug_handler = self.__bug_handler) copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy: if self.__copy:
copy_obj.copy_file(self.__write_to, "paragraphs.data") copy_obj.copy_file(self.__write_to, "paragraphs.data")

View File

@ -11,16 +11,24 @@
# # # #
######################################################################### #########################################################################
import sys,os import sys,os
from calibre.ebooks.rtf2xml import copy from calibre.ebooks.rtf2xml import copy
class Preamble: class Preamble:
""" """
Fix the reamaing parts of the preamble. This module does very little. It Fix the reamaing parts of the preamble. This module does very little. It
makes sure that no text gets put in the revision of list table. In the makes sure that no text gets put in the revision of list table. In the
future, when I understand how to interprett he revision table and list future, when I understand how to interpret the revision table and list
table, I will make these methods more functional. table, I will make these methods more functional.
""" """
def __init__(self, file, bug_handler, platform, default_font, code_page, def __init__(self, file,
copy=None, temp_dir=None): bug_handler,
platform,
default_font,
code_page,
copy=None,
temp_dir=None,
):
""" """
Required: Required:
file--file to parse file--file to parse
@ -44,6 +52,7 @@ class Preamble:
self.__write_to = os.path.join(temp_dir,"info_table_info.data") self.__write_to = os.path.join(temp_dir,"info_table_info.data")
else: else:
self.__write_to = "info_table_info.data" self.__write_to = "info_table_info.data"
def __initiate_values(self): def __initiate_values(self):
""" """
Initiate all values. Initiate all values.
@ -62,12 +71,14 @@ class Preamble:
'mi<mk<revtbl-beg' : self.__found_revision_table_func, 'mi<mk<revtbl-beg' : self.__found_revision_table_func,
'mi<mk<body-open_' : self.__found_body_func, 'mi<mk<body-open_' : self.__found_body_func,
} }
def __default_func(self, line): def __default_func(self, line):
action = self.__default_dict.get(self.__token_info) action = self.__default_dict.get(self.__token_info)
if action: if action:
action(line) action(line)
else: else:
self.__write_obj.write(line) self.__write_obj.write(line)
def __found_rtf_head_func(self, line): def __found_rtf_head_func(self, line):
""" """
Requires: Requires:
@ -84,8 +95,10 @@ class Preamble:
'<platform>%s\n' % (self.__default_font, self.__code_page, '<platform>%s\n' % (self.__default_font, self.__code_page,
self.__platform) self.__platform)
) )
def __found_list_table_func(self, line): def __found_list_table_func(self, line):
self.__state = 'list_table' self.__state = 'list_table'
def __list_table_func(self, line): def __list_table_func(self, line):
if self.__token_info == 'mi<mk<listabend_': if self.__token_info == 'mi<mk<listabend_':
self.__state = 'default' self.__state = 'default'
@ -93,8 +106,10 @@ class Preamble:
pass pass
else: else:
self.__write_obj.write(line) self.__write_obj.write(line)
def __found_revision_table_func(self, line): def __found_revision_table_func(self, line):
self.__state = 'revision' self.__state = 'revision'
def __revision_table_func(self, line): def __revision_table_func(self, line):
if self.__token_info == 'mi<mk<revtbl-end': if self.__token_info == 'mi<mk<revtbl-end':
self.__state = 'default' self.__state = 'default'
@ -102,11 +117,14 @@ class Preamble:
pass pass
else: else:
self.__write_obj.write(line) self.__write_obj.write(line)
def __found_body_func(self, line): def __found_body_func(self, line):
self.__state = 'body' self.__state = 'body'
self.__write_obj.write(line) self.__write_obj.write(line)
def __body_func(self, line): def __body_func(self, line):
self.__write_obj.write(line) self.__write_obj.write(line)
def fix_preamble(self): def fix_preamble(self):
""" """
Requires: Requires:
@ -119,20 +137,15 @@ class Preamble:
the list table. the list table.
""" """
self.__initiate_values() self.__initiate_values()
read_obj = open(self.__file, 'r') with open(self.__file, 'r') as read_obj:
self.__write_obj = open(self.__write_to, 'w') with open(self.__write_to, 'w') as self.__write_obj:
line_to_read = 1 for line in read_obj:
while line_to_read:
line_to_read = read_obj.readline()
line = line_to_read
self.__token_info = line[:16] self.__token_info = line[:16]
action = self.__state_dict.get(self.__state) action = self.__state_dict.get(self.__state)
if action == None: if action is None:
sys.stderr.write('no no matching state in module preamble_rest.py\n') sys.stderr.write(
sys.stderr.write(self.__state + '\n') 'no matching state in module preamble_rest.py\n' + self.__state + '\n')
action(line) action(line)
read_obj.close()
self.__write_obj.close()
copy_obj = copy.Copy(bug_handler = self.__bug_handler) copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy: if self.__copy:
copy_obj.copy_file(self.__write_to, "preamble_div.data") copy_obj.copy_file(self.__write_to, "preamble_div.data")

View File

@ -11,43 +11,44 @@
# # # #
######################################################################### #########################################################################
import sys, os import sys, os
from calibre.ebooks.rtf2xml import copy from calibre.ebooks.rtf2xml import copy
from calibre.ptempfile import better_mktemp from calibre.ptempfile import better_mktemp
class Sections: class Sections:
""" """
================= =================
Purpose Purpose
================= =================
Write section tags for a tokenized file. (This module won't be any use to use Write section tags for a tokenized file. (This module won't be any use to use
to you unless you use it as part of the other modules.) to you unless you use it as part of the other modules.)
--------------- ---------------
logic logic
--------------- ---------------
The tags for the first section breaks have already been written. The tags for the first section breaks have already been written.
RTF stores section breaks with the \sect tag. Each time this tag is RTF stores section breaks with the \sect tag. Each time this tag is
encountered, add one to the counter. encountered, add one to the counter.
When I encounter the \sectd tag, I want to collect all the appropriate tokens When I encounter the \sectd tag, I want to collect all the appropriate tokens
that describe the section. When I reach a \pard, I know I an stop collecting that describe the section. When I reach a \pard, I know I an stop collecting
tokens and write the section tags. tokens and write the section tags.
The exception to this method occurs when sections occur in field blocks, such The exception to this method occurs when sections occur in field blocks, such
as the index. Normally, two section break occur within the index and other as the index. Normally, two section break occur within the index and other
field-blocks. (If less or more section breaks occurr, this code may not work.) field-blocks. (If less or more section breaks occurr, this code may not work.)
I want the sections to occurr outside of the index. That is, the index I want the sections to occur outside of the index. That is, the index
should be nested inside one section tag. After the index is complete, a new should be nested inside one section tag. After the index is complete, a new
section should begin. section should begin.
In order to write the sections outside of the field blocks, I have to store In order to write the sections outside of the field blocks, I have to store
all of the field block as a string. When I ecounter the \sect tag, add one to all of the field block as a string. When I ecounter the \sect tag, add one to
the section counter, but store this number in a list. Likewise, store the the section counter, but store this number in a list. Likewise, store the
information describing the section in another list. information describing the section in another list.
When I reach the end of the field block, choose the first item from the When I reach the end of the field block, choose the first item from the
numbered list as the section number. Choose the first item in the description numbered list as the section number. Choose the first item in the description
list as the values and attributes of the section. Enclose the field string list as the values and attributes of the section. Enclose the field string
between the section tags. between the section tags.
Start a new section outside the field-block strings. Use the second number in Start a new section outside the field-block strings. Use the second number in
the list; use the second item in the description list. the list; use the second item in the description list.
CHANGE (2004-04-26) No longer write sections that occurr in field-blocks. CHANGE (2004-04-26) No longer write sections that occurr in field-blocks.
Instead, ingore all section information in a field-block. Instead, ingore all section information in a field-block.
""" """
def __init__(self, def __init__(self,
in_file, in_file,

View File

@ -137,8 +137,9 @@ def _config(): # {{{
c.add_opt('LRF_ebook_viewer_options', default=None, c.add_opt('LRF_ebook_viewer_options', default=None,
help=_('Options for the LRF ebook viewer')) help=_('Options for the LRF ebook viewer'))
c.add_opt('internally_viewed_formats', default=['LRF', 'EPUB', 'LIT', c.add_opt('internally_viewed_formats', default=['LRF', 'EPUB', 'LIT',
'MOBI', 'PRC', 'AZW', 'HTML', 'FB2', 'PDB', 'RB', 'SNB', 'HTMLZ'], 'MOBI', 'PRC', 'POBI', 'AZW', 'AZW3', 'HTML', 'FB2', 'PDB', 'RB',
help=_('Formats that are viewed using the internal viewer')) 'SNB', 'HTMLZ'], help=_(
'Formats that are viewed using the internal viewer'))
c.add_opt('column_map', default=ALL_COLUMNS, c.add_opt('column_map', default=ALL_COLUMNS,
help=_('Columns to be displayed in the book list')) help=_('Columns to be displayed in the book list'))
c.add_opt('autolaunch_server', default=False, help=_('Automatically launch content server on application startup')) c.add_opt('autolaunch_server', default=False, help=_('Automatically launch content server on application startup'))

View File

@ -10,7 +10,7 @@ from functools import partial
from PyQt4.Qt import (QMenu, Qt, QInputDialog, QToolButton, QDialog, from PyQt4.Qt import (QMenu, Qt, QInputDialog, QToolButton, QDialog,
QDialogButtonBox, QGridLayout, QLabel, QLineEdit, QIcon, QSize, QDialogButtonBox, QGridLayout, QLabel, QLineEdit, QIcon, QSize,
QCoreApplication) QCoreApplication, pyqtSignal)
from calibre import isbytestring, sanitize_file_name_unicode from calibre import isbytestring, sanitize_file_name_unicode
from calibre.constants import filesystem_encoding, iswindows from calibre.constants import filesystem_encoding, iswindows
@ -142,6 +142,7 @@ class ChooseLibraryAction(InterfaceAction):
dont_add_to = frozenset(['context-menu-device']) dont_add_to = frozenset(['context-menu-device'])
action_add_menu = True action_add_menu = True
action_menu_clone_qaction = _('Switch/create library...') action_menu_clone_qaction = _('Switch/create library...')
restore_view_state = pyqtSignal(object)
def genesis(self): def genesis(self):
self.base_text = _('%d books') self.base_text = _('%d books')
@ -206,6 +207,17 @@ class ChooseLibraryAction(InterfaceAction):
self.maintenance_menu.addAction(ac) self.maintenance_menu.addAction(ac)
self.choose_menu.addMenu(self.maintenance_menu) self.choose_menu.addMenu(self.maintenance_menu)
self.view_state_map = {}
self.restore_view_state.connect(self._restore_view_state,
type=Qt.QueuedConnection)
@property
def preserve_state_on_switch(self):
ans = getattr(self, '_preserve_state_on_switch', None)
if ans is None:
self._preserve_state_on_switch = ans = \
self.gui.library_view.preserve_state(require_selected_ids=False)
return ans
def pick_random(self, *args): def pick_random(self, *args):
self.gui.iactions['Pick Random Book'].pick_random() self.gui.iactions['Pick Random Book'].pick_random()
@ -221,6 +233,13 @@ class ChooseLibraryAction(InterfaceAction):
def library_changed(self, db): def library_changed(self, db):
self.stats.library_used(db) self.stats.library_used(db)
self.build_menus() self.build_menus()
state = self.view_state_map.get(self.stats.canonicalize_path(
db.library_path), None)
if state is not None:
self.restore_view_state.emit(state)
def _restore_view_state(self, state):
self.preserve_state_on_switch.state = state
def initialization_complete(self): def initialization_complete(self):
self.library_changed(self.gui.library_view.model().db) self.library_changed(self.gui.library_view.model().db)
@ -401,8 +420,11 @@ class ChooseLibraryAction(InterfaceAction):
def switch_requested(self, location): def switch_requested(self, location):
if not self.change_library_allowed(): if not self.change_library_allowed():
return return
db = self.gui.library_view.model().db
current_lib = self.stats.canonicalize_path(db.library_path)
self.view_state_map[current_lib] = self.preserve_state_on_switch.state
loc = location.replace('/', os.sep) loc = location.replace('/', os.sep)
exists = self.gui.library_view.model().db.exists_at(loc) exists = db.exists_at(loc)
if not exists: if not exists:
d = MovedDialog(self.stats, location, self.gui) d = MovedDialog(self.stats, location, self.gui)
ret = d.exec_() ret = d.exec_()

View File

@ -6,6 +6,7 @@ __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
from functools import partial from functools import partial
from collections import Counter
from PyQt4.Qt import QObject, QTimer from PyQt4.Qt import QObject, QTimer
@ -117,13 +118,14 @@ class DeleteAction(InterfaceAction):
def _get_selected_formats(self, msg, ids): def _get_selected_formats(self, msg, ids):
from calibre.gui2.dialogs.select_formats import SelectFormats from calibre.gui2.dialogs.select_formats import SelectFormats
fmts = set([]) c = Counter()
db = self.gui.library_view.model().db db = self.gui.library_view.model().db
for x in ids: for x in ids:
fmts_ = db.formats(x, index_is_id=True, verify_formats=False) fmts_ = db.formats(x, index_is_id=True, verify_formats=False)
if fmts_: if fmts_:
fmts.update(frozenset([x.lower() for x in fmts_.split(',')])) for x in frozenset([x.lower() for x in fmts_.split(',')]):
d = SelectFormats(list(sorted(fmts)), msg, parent=self.gui) c[x] += 1
d = SelectFormats(c, msg, parent=self.gui)
if d.exec_() != d.Accepted: if d.exec_() != d.Accepted:
return None return None
return d.selected_formats return d.selected_formats

View File

@ -12,11 +12,11 @@ from PyQt4.Qt import (QDialog, QVBoxLayout, QHBoxLayout, QRadioButton, QFrame,
from calibre import as_unicode from calibre import as_unicode
from calibre.constants import isosx from calibre.constants import isosx
from calibre.gui2 import error_dialog, question_dialog, open_local_file from calibre.gui2 import error_dialog, question_dialog, open_local_file, gprefs
from calibre.gui2.actions import InterfaceAction from calibre.gui2.actions import InterfaceAction
from calibre.ptempfile import (PersistentTemporaryDirectory, from calibre.ptempfile import (PersistentTemporaryDirectory,
PersistentTemporaryFile) PersistentTemporaryFile)
from calibre.utils.config import prefs from calibre.utils.config import prefs, tweaks
class TweakBook(QDialog): class TweakBook(QDialog):
@ -32,11 +32,16 @@ class TweakBook(QDialog):
index_is_id=True)) index_is_id=True))
button = self.fmt_choice_buttons[0] button = self.fmt_choice_buttons[0]
button_map = {unicode(x.text()):x for x in self.fmt_choice_buttons}
of = prefs['output_format'].upper() of = prefs['output_format'].upper()
for x in self.fmt_choice_buttons: df = tweaks.get('default_tweak_format', None)
if unicode(x.text()) == of: lf = gprefs.get('last_tweak_format', None)
button = x if df and df.lower() == 'remember' and lf in button_map:
break button = button_map[lf]
elif df and df.upper() in button_map:
button = button_map[df.upper()]
elif of in button_map:
button = button_map[of]
button.setChecked(True) button.setChecked(True)
self.init_state() self.init_state()
@ -148,6 +153,8 @@ class TweakBook(QDialog):
def explode(self): def explode(self):
self.show_msg(_('Exploding, please wait...')) self.show_msg(_('Exploding, please wait...'))
if len(self.fmt_choice_buttons) > 1:
gprefs.set('last_tweak_format', self.current_format.upper())
QTimer.singleShot(5, self.do_explode) QTimer.singleShot(5, self.do_explode)
def ask_question(self, msg): def ask_question(self, msg):

View File

@ -161,8 +161,14 @@ class EditorWidget(QWebView): # {{{
self.page().setContentEditable(True) self.page().setContentEditable(True)
def clear_text(self, *args): def clear_text(self, *args):
us = self.page().undoStack()
us.beginMacro('clear all text')
self.action_select_all.trigger() self.action_select_all.trigger()
self.action_cut.trigger() self.action_remove_format.trigger()
self.exec_command('delete')
us.endMacro()
self.set_font_style()
self.setFocus(Qt.OtherFocusReason)
def link_clicked(self, url): def link_clicked(self, url):
open_url(url) open_url(url)
@ -262,6 +268,10 @@ class EditorWidget(QWebView): # {{{
def fset(self, val): def fset(self, val):
self.setHtml(val) self.setHtml(val)
self.set_font_style()
return property(fget=fget, fset=fset)
def set_font_style(self):
fi = QFontInfo(QApplication.font(self)) fi = QFontInfo(QApplication.font(self))
f = fi.pixelSize() + 1 + int(tweaks['change_book_details_font_size_by']) f = fi.pixelSize() + 1 + int(tweaks['change_book_details_font_size_by'])
fam = unicode(fi.family()).strip().replace('"', '') fam = unicode(fi.family()).strip().replace('"', '')
@ -274,8 +284,6 @@ class EditorWidget(QWebView): # {{{
body.setAttribute('style', style) body.setAttribute('style', style)
self.page().setContentEditable(True) self.page().setContentEditable(True)
return property(fget=fget, fset=fset)
def keyPressEvent(self, ev): def keyPressEvent(self, ev):
if ev.key() in (Qt.Key_Tab, Qt.Key_Escape, Qt.Key_Backtab): if ev.key() in (Qt.Key_Tab, Qt.Key_Escape, Qt.Key_Backtab):
ev.ignore() ev.ignore()
@ -627,4 +635,6 @@ if __name__ == '__main__':
w = Editor() w = Editor()
w.resize(800, 600) w.resize(800, 600)
w.show() w.show()
w.html = '<b>testing</b>'
app.exec_()
#print w.html #print w.html

View File

@ -126,7 +126,8 @@ class BulkConfig(Config):
def setup_output_formats(self, db, preferred_output_format): def setup_output_formats(self, db, preferred_output_format):
if preferred_output_format: if preferred_output_format:
preferred_output_format = preferred_output_format.lower() preferred_output_format = preferred_output_format.lower()
output_formats = sorted(available_output_formats()) output_formats = sorted(available_output_formats(),
key=lambda x:{'EPUB':'!A', 'MOBI':'!B'}.get(x.upper(), x))
output_formats.remove('oeb') output_formats.remove('oeb')
preferred_output_format = preferred_output_format if \ preferred_output_format = preferred_output_format if \
preferred_output_format and preferred_output_format \ preferred_output_format and preferred_output_format \

View File

@ -109,12 +109,18 @@
</item> </item>
<item row="0" column="1"> <item row="0" column="1">
<widget class="QDoubleSpinBox" name="opt_margin_left"> <widget class="QDoubleSpinBox" name="opt_margin_left">
<property name="specialValueText">
<string>No margin</string>
</property>
<property name="suffix"> <property name="suffix">
<string> pt</string> <string> pt</string>
</property> </property>
<property name="decimals"> <property name="decimals">
<number>1</number> <number>1</number>
</property> </property>
<property name="minimum">
<double>-1.000000000000000</double>
</property>
<property name="maximum"> <property name="maximum">
<double>200.000000000000000</double> <double>200.000000000000000</double>
</property> </property>
@ -132,12 +138,18 @@
</item> </item>
<item row="1" column="1"> <item row="1" column="1">
<widget class="QDoubleSpinBox" name="opt_margin_top"> <widget class="QDoubleSpinBox" name="opt_margin_top">
<property name="specialValueText">
<string>No margin</string>
</property>
<property name="suffix"> <property name="suffix">
<string> pt</string> <string> pt</string>
</property> </property>
<property name="decimals"> <property name="decimals">
<number>1</number> <number>1</number>
</property> </property>
<property name="minimum">
<double>-1.000000000000000</double>
</property>
<property name="maximum"> <property name="maximum">
<double>200.000000000000000</double> <double>200.000000000000000</double>
</property> </property>
@ -155,12 +167,18 @@
</item> </item>
<item row="2" column="1"> <item row="2" column="1">
<widget class="QDoubleSpinBox" name="opt_margin_right"> <widget class="QDoubleSpinBox" name="opt_margin_right">
<property name="specialValueText">
<string>No margin</string>
</property>
<property name="suffix"> <property name="suffix">
<string> pt</string> <string> pt</string>
</property> </property>
<property name="decimals"> <property name="decimals">
<number>1</number> <number>1</number>
</property> </property>
<property name="minimum">
<double>-1.000000000000000</double>
</property>
<property name="maximum"> <property name="maximum">
<double>200.000000000000000</double> <double>200.000000000000000</double>
</property> </property>
@ -178,12 +196,18 @@
</item> </item>
<item row="3" column="1"> <item row="3" column="1">
<widget class="QDoubleSpinBox" name="opt_margin_bottom"> <widget class="QDoubleSpinBox" name="opt_margin_bottom">
<property name="specialValueText">
<string>No margin</string>
</property>
<property name="suffix"> <property name="suffix">
<string> pt</string> <string> pt</string>
</property> </property>
<property name="decimals"> <property name="decimals">
<number>1</number> <number>1</number>
</property> </property>
<property name="minimum">
<double>-1.000000000000000</double>
</property>
<property name="maximum"> <property name="maximum">
<double>200.000000000000000</double> <double>200.000000000000000</double>
</property> </property>

View File

@ -242,7 +242,8 @@ class Config(ResizableDialog, Ui_Dialog):
preferred_output_format): preferred_output_format):
if preferred_output_format: if preferred_output_format:
preferred_output_format = preferred_output_format.lower() preferred_output_format = preferred_output_format.lower()
output_formats = sorted(available_output_formats()) output_formats = sorted(available_output_formats(),
key=lambda x:{'EPUB':'!A', 'MOBI':'!B'}.get(x.upper(), x))
output_formats.remove('oeb') output_formats.remove('oeb')
input_format, input_formats = get_input_format_for_book(db, book_id, input_format, input_formats = get_input_format_for_book(db, book_id,
preferred_input_format) preferred_input_format)

View File

@ -349,7 +349,8 @@ class Text(Base):
return d.exec_() return d.exec_()
def edit(self): def edit(self):
if self.getter() != self.initial_val: if (self.getter() != self.initial_val and (self.getter() or
self.initial_val)):
d = self._save_dialog(self.parent, _('Values changed'), d = self._save_dialog(self.parent, _('Values changed'),
_('You have changed the values. In order to use this ' _('You have changed the values. In order to use this '
'editor, you must either discard or apply these ' 'editor, you must either discard or apply these '

View File

@ -182,7 +182,8 @@ class SearchDialog(QDialog, Ui_Dialog):
global box_values global box_values
box_values = copy.deepcopy(self.box_last_values) box_values = copy.deepcopy(self.box_last_values)
if general: if general:
ans.append(unicode(self.general_combo.currentText()) + ':"' + general + '"') ans.append(unicode(self.general_combo.currentText()) + ':"' +
self.mc + general + '"')
if ans: if ans:
return ' and '.join(ans) return ' and '.join(ans)
return '' return ''

Some files were not shown because too many files have changed in this diff Show More