mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge from trunk
This commit is contained in:
commit
99c57ac10d
@ -16,7 +16,6 @@ resources/ebook-convert-complete.pickle
|
|||||||
resources/builtin_recipes.xml
|
resources/builtin_recipes.xml
|
||||||
resources/builtin_recipes.zip
|
resources/builtin_recipes.zip
|
||||||
resources/template-functions.json
|
resources/template-functions.json
|
||||||
resources/display/*.js
|
|
||||||
setup/installer/windows/calibre/build.log
|
setup/installer/windows/calibre/build.log
|
||||||
src/calibre/translations/.errors
|
src/calibre/translations/.errors
|
||||||
src/cssutils/.svn/
|
src/cssutils/.svn/
|
||||||
|
@ -19,6 +19,67 @@
|
|||||||
# new recipes:
|
# new recipes:
|
||||||
# - title:
|
# - title:
|
||||||
|
|
||||||
|
- version: 0.8.51
|
||||||
|
date: 2012-05-11
|
||||||
|
|
||||||
|
new features:
|
||||||
|
- title: "When switching libraries preserve the position and selected books if you switch back to a previously opened library."
|
||||||
|
tickets: [994514]
|
||||||
|
|
||||||
|
- title: "Conversion pipeline: Filter out the useless font-face rules inserted by Microsoft Word for every font on the system"
|
||||||
|
|
||||||
|
- title: "Driver for Motorola XT875 and Pandigital SuperNova"
|
||||||
|
tickets: [996890]
|
||||||
|
|
||||||
|
- title: "Add a colour swatch the the dialog for creating column coloring rules, to ease selection of colors"
|
||||||
|
tickets: [994811]
|
||||||
|
|
||||||
|
- title: "EPUB Output: Consolidate internal CSS generated by calibre into external stylesheets for ease of editing the EPUB"
|
||||||
|
|
||||||
|
- title: "List EPUB and MOBI at the top of the dropdown list fo formats to convert to, as they are the most common choices"
|
||||||
|
tickets: [994838]
|
||||||
|
|
||||||
|
bug fixes:
|
||||||
|
- title: "E-book viewer: Improve performance when switching between normal and fullscreen views."
|
||||||
|
tickets: [996102]
|
||||||
|
|
||||||
|
- title: "Edit metadata dialog: When running download metadata do not insert duplicate tags into the list of tags"
|
||||||
|
|
||||||
|
- title: "KF8 Input: Do not error out if the file has a few invalidly encoded bytes."
|
||||||
|
tickets: [997034]
|
||||||
|
|
||||||
|
- title: "Fix download of news in AZW3 format not working"
|
||||||
|
tickets: [996439]
|
||||||
|
|
||||||
|
- title: "Pocketbook driver: Update for new PB 611 firmware."
|
||||||
|
tickets: [903079]
|
||||||
|
|
||||||
|
- title: "ebook-convert: Error out if the user prvides extra command line args instead of silently ignoring them"
|
||||||
|
tickets: [994939]
|
||||||
|
|
||||||
|
- title: "EPUB Output: Do not self close any container tags to prevent artifacts when EPUBs are viewed using buggy browser based viewers."
|
||||||
|
tickets: [994861]
|
||||||
|
|
||||||
|
- title: "Fix regression in 0.8.50 that broke the conversion of HTML files that contained non-ascii font-face declarations, typically produced by Microsoft Word"
|
||||||
|
|
||||||
|
improved recipes:
|
||||||
|
- Mainichi news
|
||||||
|
- derStandard
|
||||||
|
- Endgadget Japan
|
||||||
|
|
||||||
|
new recipes:
|
||||||
|
- title: Mainichi English
|
||||||
|
author: Hiroshi Miura
|
||||||
|
|
||||||
|
- title: The Grid TO
|
||||||
|
author: Yusuf W
|
||||||
|
|
||||||
|
- title: National Geographic (Italy)
|
||||||
|
author: faber1971
|
||||||
|
|
||||||
|
- title: Rebelion
|
||||||
|
author: Marc Busque
|
||||||
|
|
||||||
- version: 0.8.50
|
- version: 0.8.50
|
||||||
date: 2012-05-04
|
date: 2012-05-04
|
||||||
|
|
||||||
|
26
recipes/ads_of_the_world.recipe
Normal file
26
recipes/ads_of_the_world.recipe
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AdvancedUserRecipe1336986047(BasicNewsRecipe):
|
||||||
|
title = u'Ads of the World'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
auto_cleanup = False
|
||||||
|
description = 'The best international advertising campaigns'
|
||||||
|
language = 'en'
|
||||||
|
__author__ = 'faber1971'
|
||||||
|
|
||||||
|
no_stylesheets = True
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'id':'primary'})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='ul', attrs={'class':'links inline'})
|
||||||
|
,dict(name='div', attrs={'class':'form-item'})
|
||||||
|
,dict(name='div', attrs={'id':['options', 'comments']})
|
||||||
|
,dict(name='ul', attrs={'id':'nodePager'})
|
||||||
|
]
|
||||||
|
|
||||||
|
reverse_article_order = True
|
||||||
|
masthead_url = 'http://bigcatgroup.co.uk/files/2011/01/05-ads-of-the-world.png'
|
||||||
|
feeds = [(u'Ads of the world', u'http://feeds.feedburner.com/adsoftheworld-latest')]
|
43
recipes/air_force_times.recipe
Normal file
43
recipes/air_force_times.recipe
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AirForceTimes(BasicNewsRecipe):
|
||||||
|
title = 'Air Force Times'
|
||||||
|
__author__ = 'jde'
|
||||||
|
__date__ = '16 May 2012'
|
||||||
|
__version__ = '1.0'
|
||||||
|
description = 'News of the U.S. Air Force'
|
||||||
|
language = 'en'
|
||||||
|
publisher = 'AirForceTimes.com'
|
||||||
|
category = 'news, U.S. Air Force'
|
||||||
|
tags = 'news, U.S. Air Force'
|
||||||
|
cover_url = 'http://www.airforcetimes.com/images/logo_airforcetimes_alert.jpg'
|
||||||
|
masthead_url = 'http://www.airforcetimes.com/images/logo_airforcetimes_alert.jpg'
|
||||||
|
oldest_article = 7 #days
|
||||||
|
max_articles_per_feed = 25
|
||||||
|
publication_type = 'newspaper'
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
encoding = None
|
||||||
|
recursions = 0
|
||||||
|
needs_subscription = False
|
||||||
|
remove_javascript = True
|
||||||
|
remove_empty_feeds = True
|
||||||
|
auto_cleanup = True
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
|
||||||
|
('News', 'http://www.airforcetimes.com/rss_news.php'),
|
||||||
|
('Benefits', 'http://www.airforcetimes.com/rss_benefits.php'),
|
||||||
|
('Money', 'http://www.airforcetimes.com/rss_money.php'),
|
||||||
|
('Careers & Education', 'http://www.airforcetimes.com/rss_careers.php'),
|
||||||
|
('Community', 'http://www.airforcetimes.com/rss_community.php'),
|
||||||
|
('Off Duty', 'http://www.airforcetimes.com/rss_off_duty.php'),
|
||||||
|
('Entertainment', 'http://www.airforcetimes.com/rss_entertainment.php'),
|
||||||
|
('Guard & Reserve', 'http://www.airforcetimes.com/rss_guard.php'),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
42
recipes/army_times.recipe
Normal file
42
recipes/army_times.recipe
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
class ArmyTimes(BasicNewsRecipe):
|
||||||
|
title = 'Army Times'
|
||||||
|
__author__ = 'jde'
|
||||||
|
__date__ = '16 May 2012'
|
||||||
|
__version__ = '1.0'
|
||||||
|
description = 'News of the U.S. Army'
|
||||||
|
language = 'en'
|
||||||
|
publisher = 'ArmyTimes.com'
|
||||||
|
category = 'news, U.S. Army'
|
||||||
|
tags = 'news, U.S. Army'
|
||||||
|
cover_url = 'http://www.armytimes.com/images/logo_armytimes_alert.jpg'
|
||||||
|
masthead_url = 'http://www.armytimes.com/images/logo_armytimes_alert.jpg'
|
||||||
|
oldest_article = 7 #days
|
||||||
|
max_articles_per_feed = 25
|
||||||
|
publication_type = 'newspaper'
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
encoding = None
|
||||||
|
recursions = 0
|
||||||
|
needs_subscription = False
|
||||||
|
remove_javascript = True
|
||||||
|
remove_empty_feeds = True
|
||||||
|
auto_cleanup = True
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
|
||||||
|
('News', 'http://www.armytimes.com/rss_news.php'),
|
||||||
|
('Benefits', 'http://www.armytimes.com/rss_benefits.php'),
|
||||||
|
('Money', 'http://www.armytimes.com/rss_money.php'),
|
||||||
|
('Careers & Education', 'http://www.armytimes.com/rss_careers.php'),
|
||||||
|
('Community', 'http://www.armytimes.com/rss_community.php'),
|
||||||
|
('Off Duty', 'http://www.armytimes.com/rss_off_duty.php'),
|
||||||
|
('Entertainment', 'http://www.armytimes.com/rss_entertainment.php'),
|
||||||
|
('Guard & Reserve', 'http://www.armytimes.com/rss_guard.php'),
|
||||||
|
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
arstechnica.com
|
arstechnica.com
|
||||||
'''
|
'''
|
||||||
@ -12,22 +12,24 @@ class ArsTechnica(BasicNewsRecipe):
|
|||||||
title = u'Ars Technica'
|
title = u'Ars Technica'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
__author__ = 'Darko Miletic, Sujata Raman, Alexis Rohou'
|
__author__ = 'Darko Miletic, Sujata Raman, Alexis Rohou'
|
||||||
description = 'The art of technology'
|
description = 'Ars Technica: Serving the technologist for 1.2 decades'
|
||||||
publisher = 'Ars Technica'
|
publisher = 'Conde Nast Publications'
|
||||||
category = 'news, IT, technology'
|
category = 'news, IT, technology'
|
||||||
oldest_article = 5
|
oldest_article = 5
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
extra_css = '''
|
remove_empty_feeds = True
|
||||||
body {font-family: Arial,Helvetica,sans-serif}
|
publication_type = 'newsportal'
|
||||||
.title{text-align: left}
|
extra_css = '''
|
||||||
.byline{font-weight: bold; line-height: 1em; font-size: 0.625em; text-decoration: none}
|
body {font-family: Arial,sans-serif}
|
||||||
.news-item-figure-caption-text{font-size:small; font-style:italic}
|
.heading{font-family: "Times New Roman",serif}
|
||||||
.news-item-figure-caption-byline{font-size:small; font-style:italic; font-weight:bold}
|
.byline{font-weight: bold; line-height: 1em; font-size: 0.625em; text-decoration: none}
|
||||||
'''
|
img{display: block}
|
||||||
ignoreEtcArticles = True # Etc feed items can be ignored, as they're not real stories
|
.caption-text{font-size:small; font-style:italic}
|
||||||
|
.caption-byline{font-size:small; font-style:italic; font-weight:bold}
|
||||||
|
'''
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comments' : description
|
'comments' : description
|
||||||
@ -36,93 +38,64 @@ class ArsTechnica(BasicNewsRecipe):
|
|||||||
,'publisher' : publisher
|
,'publisher' : publisher
|
||||||
}
|
}
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
#preprocess_regexps = [
|
dict(attrs={'class':'standalone'})
|
||||||
# (re.compile(r'<div class="news-item-figure', re.DOTALL|re.IGNORECASE),lambda match: '<div class="news-item-figure"')
|
,dict(attrs={'id':'article-guts'})
|
||||||
# ,(re.compile(r'</title>.*?</head>', re.DOTALL|re.IGNORECASE),lambda match: '</title></head>')
|
]
|
||||||
# ]
|
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':['story','etc-story']})]
|
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name=['object','link','embed'])
|
dict(name=['object','link','embed','iframe','meta'])
|
||||||
,dict(name='div', attrs={'class':'read-more-link'})
|
,dict(attrs={'class':'corner-info'})
|
||||||
]
|
]
|
||||||
#remove_attributes=['width','height']
|
remove_attributes = ['lang']
|
||||||
|
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Infinite Loop (Apple content)' , u'http://feeds.arstechnica.com/arstechnica/apple/' )
|
(u'Infinite Loop (Apple content)' , u'http://feeds.arstechnica.com/arstechnica/apple/' )
|
||||||
,(u'Opposable Thumbs (Gaming content)' , u'http://feeds.arstechnica.com/arstechnica/gaming/' )
|
,(u'Opposable Thumbs (Gaming content)' , u'http://feeds.arstechnica.com/arstechnica/gaming/' )
|
||||||
,(u'Gear and Gadgets' , u'http://feeds.arstechnica.com/arstechnica/gadgets/' )
|
,(u'Gear and Gadgets' , u'http://feeds.arstechnica.com/arstechnica/gadgets/' )
|
||||||
,(u'Chipster (Hardware content)' , u'http://feeds.arstechnica.com/arstechnica/hardware/' )
|
|
||||||
,(u'Uptime (IT content)' , u'http://feeds.arstechnica.com/arstechnica/business/' )
|
,(u'Uptime (IT content)' , u'http://feeds.arstechnica.com/arstechnica/business/' )
|
||||||
,(u'Open Ended (Open Source content)' , u'http://feeds.arstechnica.com/arstechnica/open-source/')
|
,(u'Open Ended (Open Source content)' , u'http://feeds.arstechnica.com/arstechnica/open-source/')
|
||||||
,(u'One Microsoft Way' , u'http://feeds.arstechnica.com/arstechnica/microsoft/' )
|
,(u'One Microsoft Way' , u'http://feeds.arstechnica.com/arstechnica/microsoft/' )
|
||||||
,(u'Nobel Intent (Science content)' , u'http://feeds.arstechnica.com/arstechnica/science/' )
|
,(u'Scientific method (Science content)' , u'http://feeds.arstechnica.com/arstechnica/science/' )
|
||||||
,(u'Law & Disorder (Tech policy content)' , u'http://feeds.arstechnica.com/arstechnica/tech-policy/')
|
,(u'Law & Disorder (Tech policy content)' , u'http://feeds.arstechnica.com/arstechnica/tech-policy/')
|
||||||
]
|
]
|
||||||
|
|
||||||
# This deals with multi-page stories
|
|
||||||
def append_page(self, soup, appendtag, position):
|
def append_page(self, soup, appendtag, position):
|
||||||
pager = soup.find('div',attrs={'class':'pager'})
|
pager = soup.find(attrs={'class':'numbers'})
|
||||||
if pager:
|
if pager:
|
||||||
for atag in pager.findAll('a',href=True):
|
nexttag = pager.find(attrs={'class':'next'})
|
||||||
str = self.tag_to_string(atag)
|
if nexttag:
|
||||||
if str.startswith('Next'):
|
nurl = nexttag.parent['href']
|
||||||
nurl = 'http://arstechnica.com' + atag['href']
|
rawc = self.index_to_soup(nurl,True)
|
||||||
rawc = self.index_to_soup(nurl,True)
|
soup2 = BeautifulSoup(rawc, fromEncoding=self.encoding)
|
||||||
soup2 = BeautifulSoup(rawc, fromEncoding=self.encoding)
|
texttag = soup2.find(attrs={'id':'article-guts'})
|
||||||
|
newpos = len(texttag.contents)
|
||||||
readmoretag = soup2.find('div', attrs={'class':'read-more-link'})
|
self.append_page(soup2,texttag,newpos)
|
||||||
if readmoretag:
|
texttag.extract()
|
||||||
readmoretag.extract()
|
pager.extract()
|
||||||
texttag = soup2.find('div', attrs={'class':'body'})
|
appendtag.insert(position,texttag)
|
||||||
for it in texttag.findAll(style=True):
|
|
||||||
del it['style']
|
|
||||||
|
|
||||||
newpos = len(texttag.contents)
|
|
||||||
self.append_page(soup2,texttag,newpos)
|
|
||||||
texttag.extract()
|
|
||||||
pager.extract()
|
|
||||||
appendtag.insert(position,texttag)
|
|
||||||
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
# Adds line breaks near the byline (not sure why this is needed)
|
|
||||||
ftag = soup.find('div', attrs={'class':'byline'})
|
|
||||||
if ftag:
|
|
||||||
brtag = Tag(soup,'br')
|
|
||||||
brtag2 = Tag(soup,'br')
|
|
||||||
ftag.insert(4,brtag)
|
|
||||||
ftag.insert(5,brtag2)
|
|
||||||
|
|
||||||
# Remove style items
|
|
||||||
for item in soup.findAll(style=True):
|
|
||||||
del item['style']
|
|
||||||
|
|
||||||
# Remove id
|
|
||||||
for item in soup.findAll(id=True):
|
|
||||||
del item['id']
|
|
||||||
|
|
||||||
# For some reason, links to authors don't have the domainname
|
|
||||||
a_author = soup.find('a',{'href':re.compile("^/author")})
|
|
||||||
if a_author:
|
|
||||||
a_author['href'] = 'http://arstechnica.com'+a_author['href']
|
|
||||||
|
|
||||||
# within div class news-item-figure, we need to grab images
|
|
||||||
|
|
||||||
# Deal with multi-page stories
|
|
||||||
self.append_page(soup, soup.body, 3)
|
self.append_page(soup, soup.body, 3)
|
||||||
|
for item in soup.findAll('a'):
|
||||||
|
limg = item.find('img')
|
||||||
|
if item.string is not None:
|
||||||
|
str = item.string
|
||||||
|
item.replaceWith(str)
|
||||||
|
else:
|
||||||
|
if limg:
|
||||||
|
item.name = 'div'
|
||||||
|
item.attrs = []
|
||||||
|
else:
|
||||||
|
str = self.tag_to_string(item)
|
||||||
|
item.replaceWith(str)
|
||||||
|
for item in soup.findAll('img'):
|
||||||
|
if not item.has_key('alt'):
|
||||||
|
item['alt'] = 'image'
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def get_article_url(self, article):
|
def preprocess_raw_html(self, raw, url):
|
||||||
# If the article title starts with Etc:, don't return it
|
return '<html><head>'+raw[raw.find('</head>'):]
|
||||||
if self.ignoreEtcArticles:
|
|
||||||
article_title = article.get('title',None)
|
|
||||||
if re.match('Etc: ',article_title) is not None:
|
|
||||||
return None
|
|
||||||
|
|
||||||
# The actual article is in a guid tag
|
|
||||||
return article.get('guid', None).rpartition('?')[0]
|
|
||||||
|
|
@ -7,10 +7,11 @@ __copyright__ = '2009, Gerhard Aigner <gerhard.aigner at gmail.com>'
|
|||||||
''' http://www.derstandard.at - Austrian Newspaper '''
|
''' http://www.derstandard.at - Austrian Newspaper '''
|
||||||
import re
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from time import strftime
|
||||||
|
|
||||||
class DerStandardRecipe(BasicNewsRecipe):
|
class DerStandardRecipe(BasicNewsRecipe):
|
||||||
title = u'derStandard'
|
title = u'derStandard'
|
||||||
__author__ = 'Gerhard Aigner and Sujata Raman and Marcel Jira'
|
__author__ = 'Gerhard Aigner and Sujata Raman and Marcel Jira and Peter Reschenhofer'
|
||||||
description = u'Nachrichten aus Österreich'
|
description = u'Nachrichten aus Österreich'
|
||||||
publisher ='derStandard.at'
|
publisher ='derStandard.at'
|
||||||
category = 'news, politics, nachrichten, Austria'
|
category = 'news, politics, nachrichten, Austria'
|
||||||
@ -88,3 +89,41 @@ class DerStandardRecipe(BasicNewsRecipe):
|
|||||||
for t in soup.findAll(['ul', 'li']):
|
for t in soup.findAll(['ul', 'li']):
|
||||||
t.name = 'div'
|
t.name = 'div'
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
|
def get_cover_url(self):
|
||||||
|
highResolution = True
|
||||||
|
|
||||||
|
date = strftime("%Y/%Y%m%d")
|
||||||
|
# it is also possible for the past
|
||||||
|
#date = '2012/20120503'
|
||||||
|
|
||||||
|
urlP1 = 'http://epaper.derstandarddigital.at/'
|
||||||
|
urlP2 = 'data_ep/STAN/' + date
|
||||||
|
urlP3 = '/V.B1/'
|
||||||
|
urlP4 = 'paper.htm'
|
||||||
|
urlHTML = urlP1 + urlP2 + urlP3 + urlP4
|
||||||
|
|
||||||
|
br = self.clone_browser(self.browser)
|
||||||
|
htmlF = br.open_novisit(urlHTML)
|
||||||
|
htmlC = htmlF.read()
|
||||||
|
|
||||||
|
|
||||||
|
# URL EXAMPLE: data_ep/STAN/2012/20120504/V.B1/pages/A3B6798F-2751-4D8D-A103-C5EF22F7ACBE.htm
|
||||||
|
# consists of part2 + part3 + 'pages/' + code
|
||||||
|
# 'pages/' has length 6, code has lenght 36
|
||||||
|
|
||||||
|
index = htmlC.find(urlP2) + len(urlP2 + urlP3) + 6
|
||||||
|
code = htmlC[index:index + 36]
|
||||||
|
|
||||||
|
|
||||||
|
# URL EXAMPLE HIGH RESOLUTION: http://epaper.derstandarddigital.at/data_ep/STAN/2012/20120504/pagejpg/A3B6798F-2751-4D8D-A103-C5EF22F7ACBE_b.png
|
||||||
|
# URL EXAMPLE LOW RESOLUTION: http://epaper.derstandarddigital.at/data_ep/STAN/2012/20120504/pagejpg/2AB52F71-11C1-4859-9114-CDCD79BEFDCB.png
|
||||||
|
|
||||||
|
urlPic = urlP1 + urlP2 + '/pagejpg/' + code
|
||||||
|
|
||||||
|
if highResolution:
|
||||||
|
urlPic = urlPic + '_b'
|
||||||
|
|
||||||
|
urlPic = urlPic + '.png'
|
||||||
|
|
||||||
|
return urlPic
|
||||||
|
30
recipes/economico.recipe
Normal file
30
recipes/economico.recipe
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Economico(BasicNewsRecipe):
|
||||||
|
title = u'Economico'
|
||||||
|
language = 'pt'
|
||||||
|
__author__ = 'Krittika Goyal'
|
||||||
|
oldest_article = 1 #days
|
||||||
|
max_articles_per_feed = 25
|
||||||
|
encoding = 'utf-8'
|
||||||
|
use_embedded_content = False
|
||||||
|
|
||||||
|
no_stylesheets = True
|
||||||
|
auto_cleanup = True
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
('Ultima Hora',
|
||||||
|
'http://economico.sapo.pt/rss/ultimas'),
|
||||||
|
('Em Foco',
|
||||||
|
'http://economico.sapo.pt/rss/emfoco'),
|
||||||
|
('Mercados',
|
||||||
|
'http://economico.sapo.pt/rss/mercados'),
|
||||||
|
('Empresas',
|
||||||
|
'http://economico.sapo.pt/rss/empresas'),
|
||||||
|
('Economia',
|
||||||
|
'http://economico.sapo.pt/rss/economia'),
|
||||||
|
('Politica',
|
||||||
|
'http://economico.sapo.pt/rss/politica'),
|
||||||
|
]
|
||||||
|
|
@ -17,7 +17,25 @@ class EndgadgetJapan(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
language = 'ja'
|
language = 'ja'
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
feeds = [(u'engadget', u'http://japanese.engadget.com/rss.xml')]
|
index = 'http://japanese.engadget.com/'
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
|
remove_tags_before = dict(name="h1", attrs={'class':"post_title"})
|
||||||
|
remove_tags_after = dict(name='div', attrs={'class':'post_body'})
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
feeds = []
|
||||||
|
newsarticles = []
|
||||||
|
soup = self.index_to_soup(self.index)
|
||||||
|
for topstories in soup.findAll('div',attrs={'class':'post_content'}):
|
||||||
|
itt = topstories.find('h4')
|
||||||
|
itema = itt.find('a',href=True)
|
||||||
|
newsarticles.append({
|
||||||
|
'title' :itema.string
|
||||||
|
,'date' :''
|
||||||
|
,'url' :itema['href']
|
||||||
|
,'description':''
|
||||||
|
})
|
||||||
|
feeds.append(('Latest Posts', newsarticles))
|
||||||
|
return feeds
|
||||||
|
|
||||||
remove_tags_before = dict(name="div", attrs={'id':"content_wrap"})
|
|
||||||
remove_tags_after = dict(name='h3', attrs={'id':'addcomments'})
|
|
||||||
|
82
recipes/folha.recipe
Normal file
82
recipes/folha.recipe
Normal file
@ -0,0 +1,82 @@
|
|||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
www.folha.uol.com.br
|
||||||
|
'''
|
||||||
|
import urllib
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Folha_de_s_paulo(BasicNewsRecipe):
|
||||||
|
title = u'Folha de São Paulo - portal'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'Um Jornala a servicao do Brasil'
|
||||||
|
publisher = 'Folhapress'
|
||||||
|
category = 'news, politics, Brasil'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 200
|
||||||
|
no_stylesheets = True
|
||||||
|
encoding = 'cp1252'
|
||||||
|
use_embedded_content = False
|
||||||
|
language = 'pt_BR'
|
||||||
|
remove_empty_feeds = True
|
||||||
|
publication_type = 'newspaper'
|
||||||
|
masthead_url = 'http://f.i.uol.com.br/fsp/furniture/images/lgo-fsp-430x50-ffffff.gif'
|
||||||
|
extra_css = """
|
||||||
|
body{font-family: Arial,Helvetica,sans-serif }
|
||||||
|
img{margin-bottom: 0.4em; display:block}
|
||||||
|
"""
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
remove_tags = [dict(name=['meta','link','base','iframe','embed','object'])]
|
||||||
|
keep_only_tags = [dict(attrs={'id':'articleNew'})]
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Poder' , u'http://feeds.folha.uol.com.br/poder/rss091.xml' )
|
||||||
|
,(u'Mundo' , u'http://feeds.folha.uol.com.br/mundo/rss091.xml' )
|
||||||
|
,(u'Mercado' , u'http://feeds.folha.uol.com.br/mercado/rss091.xml' )
|
||||||
|
,(u'Cotidiano' , u'http://feeds.folha.uol.com.br/cotidiano/rss091.xml' )
|
||||||
|
,(u'Esporte' , u'http://feeds.folha.uol.com.br/esporte/rss091.xml' )
|
||||||
|
,(u'Ilustrada' , u'http://feeds.folha.uol.com.br/ilustrada/rss091.xml' )
|
||||||
|
,(u'F5' , u'http://feeds.folha.uol.com.br/f5/rss091.xml' )
|
||||||
|
,(u'Ciência' , u'http://feeds.folha.uol.com.br/ciencia/rss091.xml' )
|
||||||
|
,(u'Tec' , u'http://feeds.folha.uol.com.br/tec/rss091.xml' )
|
||||||
|
,(u'Ambiente' , u'http://feeds.folha.uol.com.br/ambiente/rss091.xml' )
|
||||||
|
,(u'Bichos' , u'http://feeds.folha.uol.com.br/bichos/rss091.xml' )
|
||||||
|
,(u'Celebridades' , u'http://feeds.folha.uol.com.br/celebridades/rss091.xml' )
|
||||||
|
,(u'Comida' , u'http://feeds.folha.uol.com.br/comida/rss091.xml' )
|
||||||
|
,(u'Equilibrio' , u'http://feeds.folha.uol.com.br/equilibrioesaude/rss091.xml' )
|
||||||
|
,(u'Folhateen' , u'http://feeds.folha.uol.com.br/folhateen/rss091.xml' )
|
||||||
|
,(u'Folhinha' , u'http://feeds.folha.uol.com.br/folhinha/rss091.xml' )
|
||||||
|
,(u'Ilustrissima' , u'http://feeds.folha.uol.com.br/ilustrissima/rss091.xml' )
|
||||||
|
,(u'Saber' , u'http://feeds.folha.uol.com.br/saber/rss091.xml' )
|
||||||
|
,(u'Turismo' , u'http://feeds.folha.uol.com.br/turismo/rss091.xml' )
|
||||||
|
,(u'Panel do Leitor', u'http://feeds.folha.uol.com.br/folha/paineldoleitor/rss091.xml')
|
||||||
|
,(u'Publifolha' , u'http://feeds.folha.uol.com.br/folha/publifolha/rss091.xml' )
|
||||||
|
,(u'Em cima da hora', u'http://feeds.folha.uol.com.br/emcimadahora/rss091.xml' )
|
||||||
|
]
|
||||||
|
|
||||||
|
def get_article_url(self, article):
|
||||||
|
url = BasicNewsRecipe.get_article_url(self, article)
|
||||||
|
curl = url.partition('/*')[2]
|
||||||
|
return curl
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return 'http://tools.folha.com.br/print?site=emcimadahora&url=' + urllib.quote_plus(url)
|
||||||
|
|
||||||
|
def get_cover_url(self):
|
||||||
|
soup = self.index_to_soup('http://www.folha.uol.com.br/')
|
||||||
|
cont = soup.find('div', attrs={'id':'newspaper'})
|
||||||
|
if cont:
|
||||||
|
ai = cont.find('a', href='http://www1.folha.uol.com.br/fsp/')
|
||||||
|
if ai:
|
||||||
|
return ai.img['src']
|
||||||
|
return None
|
@ -8,7 +8,7 @@ from urllib2 import Request, urlopen, URLError
|
|||||||
class FolhaOnline(BasicNewsRecipe):
|
class FolhaOnline(BasicNewsRecipe):
|
||||||
THUMBALIZR_API = '' # ---->Get your at http://www.thumbalizr.com/ and put here
|
THUMBALIZR_API = '' # ---->Get your at http://www.thumbalizr.com/ and put here
|
||||||
LANGUAGE = 'pt_br'
|
LANGUAGE = 'pt_br'
|
||||||
language = 'pt'
|
language = 'pt_BR'
|
||||||
LANGHTM = 'pt-br'
|
LANGHTM = 'pt-br'
|
||||||
ENCODING = 'cp1252'
|
ENCODING = 'cp1252'
|
||||||
ENCHTM = 'iso-8859-1'
|
ENCHTM = 'iso-8859-1'
|
||||||
|
@ -14,7 +14,7 @@ class FSP(BasicNewsRecipe):
|
|||||||
HOMEPAGE = 'http://www1.folha.uol.com.br/fsp/'
|
HOMEPAGE = 'http://www1.folha.uol.com.br/fsp/'
|
||||||
masthead_url = 'http://f.i.uol.com.br/fsp/furniture/images/lgo-fsp-430x50-ffffff.gif'
|
masthead_url = 'http://f.i.uol.com.br/fsp/furniture/images/lgo-fsp-430x50-ffffff.gif'
|
||||||
|
|
||||||
language = 'pt'
|
language = 'pt_BR'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
max_articles_per_feed = 40
|
max_articles_per_feed = 40
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
|
@ -6,21 +6,20 @@ __copyright__ = '2011, Piotr Kontek, piotr.kontek@gmail.com'
|
|||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ptempfile import PersistentTemporaryFile
|
from calibre.ptempfile import PersistentTemporaryFile
|
||||||
|
from datetime import date
|
||||||
import re
|
import re
|
||||||
|
|
||||||
class GN(BasicNewsRecipe):
|
class GN(BasicNewsRecipe):
|
||||||
EDITION = 0
|
EDITION = 0
|
||||||
|
|
||||||
__author__ = 'Piotr Kontek'
|
__author__ = 'Piotr Kontek'
|
||||||
|
title = u'Gość niedzielny'
|
||||||
description = 'Weekly magazine'
|
description = 'Weekly magazine'
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
temp_files = []
|
temp_files = []
|
||||||
simultaneous_downloads = 1
|
|
||||||
masthead_url = 'http://gosc.pl/files/11/03/12/949089_top.gif'
|
|
||||||
title = u'Gość niedzielny'
|
|
||||||
|
|
||||||
articles_are_obfuscated = True
|
articles_are_obfuscated = True
|
||||||
|
|
||||||
@ -56,22 +55,28 @@ class GN(BasicNewsRecipe):
|
|||||||
self.temp_files[-1].close()
|
self.temp_files[-1].close()
|
||||||
return self.temp_files[-1].name
|
return self.temp_files[-1].name
|
||||||
|
|
||||||
def find_last_issue(self):
|
def find_last_issue(self, year):
|
||||||
soup = self.index_to_soup('http://gosc.pl/wyszukaj/wydania/3.Gosc-Niedzielny')
|
soup = self.index_to_soup('http://gosc.pl/wyszukaj/wydania/3.Gosc-Niedzielny/rok/' + str(year))
|
||||||
#szukam zdjęcia i linka do porzedniego pełnego numeru
|
|
||||||
|
#szukam zdjęcia i linka do poprzedniego pełnego numeru
|
||||||
first = True
|
first = True
|
||||||
for d in soup.findAll('div', attrs={'class':'l release_preview_l'}):
|
for d in soup.findAll('div', attrs={'class':'l release_preview_l'}):
|
||||||
img = d.find('img')
|
img = d.find('img')
|
||||||
if img != None:
|
if img != None:
|
||||||
a = img.parent
|
a = img.parent
|
||||||
self.EDITION = a['href']
|
self.EDITION = a['href']
|
||||||
|
self.title = img['alt']
|
||||||
self.cover_url = 'http://www.gosc.pl' + img['src']
|
self.cover_url = 'http://www.gosc.pl' + img['src']
|
||||||
if not first:
|
if year != date.today().year or not first:
|
||||||
break
|
break
|
||||||
first = False
|
first = False
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
self.find_last_issue()
|
year = date.today().year
|
||||||
|
self.find_last_issue(year)
|
||||||
|
##jeśli to pierwszy numer w roku trzeba pobrać poprzedni rok
|
||||||
|
if self.EDITION == 0:
|
||||||
|
self.find_last_issue(year-1)
|
||||||
soup = self.index_to_soup('http://www.gosc.pl' + self.EDITION)
|
soup = self.index_to_soup('http://www.gosc.pl' + self.EDITION)
|
||||||
feeds = []
|
feeds = []
|
||||||
#wstepniak
|
#wstepniak
|
||||||
|
79
recipes/grid_to.recipe
Normal file
79
recipes/grid_to.recipe
Normal file
@ -0,0 +1,79 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class TheGridTO(BasicNewsRecipe):
|
||||||
|
#: The title to use for the ebook
|
||||||
|
title = u'The Grid TO'
|
||||||
|
|
||||||
|
#: A couple of lines that describe the content this recipe downloads.
|
||||||
|
#: This will be used primarily in a GUI that presents a list of recipes.
|
||||||
|
description = (u'The Grid is a weekly city magazine and daily website providing a fresh, '
|
||||||
|
'accessible voice for Toronto.')
|
||||||
|
|
||||||
|
#: The author of this recipe
|
||||||
|
__author__ = u'Yusuf W'
|
||||||
|
|
||||||
|
#: The language that the news is in. Must be an ISO-639 code either
|
||||||
|
#: two or three characters long
|
||||||
|
language = 'en_CA'
|
||||||
|
|
||||||
|
#: Publication type
|
||||||
|
#: Set to newspaper, magazine or blog
|
||||||
|
publication_type = 'newspaper'
|
||||||
|
|
||||||
|
#: Convenient flag to disable loading of stylesheets for websites
|
||||||
|
#: that have overly complex stylesheets unsuitable for conversion
|
||||||
|
#: to ebooks formats
|
||||||
|
#: If True stylesheets are not downloaded and processed
|
||||||
|
no_stylesheets = True
|
||||||
|
|
||||||
|
#: List of tags to be removed. Specified tags are removed from downloaded HTML.
|
||||||
|
remove_tags_before = dict(name='div', id='content')
|
||||||
|
remove_tags_after = dict(name='div', id='content')
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div', attrs={'class':'right-content pull-right'}),
|
||||||
|
dict(name='div', attrs={'class':'right-content'}),
|
||||||
|
dict(name='div', attrs={'class':'ftr-line'}),
|
||||||
|
dict(name='div', attrs={'class':'pull-right'}),
|
||||||
|
dict(name='div', id='comments'),
|
||||||
|
dict(name='div', id='tags')
|
||||||
|
]
|
||||||
|
|
||||||
|
#: Keep only the specified tags and their children.
|
||||||
|
#keep_only_tags = [dict(name='div', id='content')]
|
||||||
|
|
||||||
|
cover_margins = (0, 0, '#ffffff')
|
||||||
|
|
||||||
|
INDEX = 'http://www.thegridto.com'
|
||||||
|
|
||||||
|
def get_cover_url(self):
|
||||||
|
soup = self.index_to_soup(self.INDEX)
|
||||||
|
cover_url = soup.find(attrs={'class':'article-block latest-issue'}).find('img')['src']
|
||||||
|
|
||||||
|
return cover_url
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
|
||||||
|
# Get the latest issue
|
||||||
|
soup = self.index_to_soup(self.INDEX)
|
||||||
|
a = soup.find('div', attrs={'class': 'full-content stuff-ftr'}).findAll('a')[2]
|
||||||
|
|
||||||
|
# Parse the index of the latest issue
|
||||||
|
self.INDEX = self.INDEX + a['href']
|
||||||
|
soup = self.index_to_soup(self.INDEX)
|
||||||
|
|
||||||
|
feeds = []
|
||||||
|
for section in ['city', 'life', 'culture']:
|
||||||
|
section_class = 'left-content article-listing ' + section + ' pull-left'
|
||||||
|
div = soup.find(attrs={'class': section_class})
|
||||||
|
|
||||||
|
articles = []
|
||||||
|
for tag in div.findAllNext(attrs={'class':'search-block'}):
|
||||||
|
a = tag.findAll('a', href=True)[1]
|
||||||
|
|
||||||
|
title = self.tag_to_string(a)
|
||||||
|
url = a['href']
|
||||||
|
|
||||||
|
articles.append({'title': title, 'url': url, 'description':'', 'date':''})
|
||||||
|
|
||||||
|
feeds.append((section, articles))
|
||||||
|
return feeds
|
22
recipes/heavy_metal_it.recipe
Normal file
22
recipes/heavy_metal_it.recipe
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
|
class AdvancedUserRecipe1336289226(BasicNewsRecipe):
|
||||||
|
title = u'Heavy Metal'
|
||||||
|
oldest_article = 15
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
auto_cleanup = False
|
||||||
|
masthead_url = 'http://net-static2.tccstatic.com/template/tmw/img/tj.gif'
|
||||||
|
feeds = [(u'Heavy Metal', u'http://www.heavy-metal.it/feed/')]
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'class':'entry'})
|
||||||
|
]
|
||||||
|
remove_tags_after = [
|
||||||
|
dict(name='div', attrs={'class':'sociable'})
|
||||||
|
]
|
||||||
|
description = 'An Heavy metal Italian magazine'
|
||||||
|
__author__ = 'faber1971'
|
||||||
|
language = 'it'
|
||||||
|
|
||||||
|
__version__ = 'v1.0'
|
||||||
|
__date__ = '6, May 2012'
|
BIN
recipes/icons/folha.png
Normal file
BIN
recipes/icons/folha.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 1.6 KiB |
BIN
recipes/icons/strategic_culture.png
Normal file
BIN
recipes/icons/strategic_culture.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 648 B |
@ -20,6 +20,8 @@ class JijiDotCom(BasicNewsRecipe):
|
|||||||
top_url = 'http://www.jiji.com/'
|
top_url = 'http://www.jiji.com/'
|
||||||
|
|
||||||
feeds = [(u'\u30cb\u30e5\u30fc\u30b9', u'http://www.jiji.com/rss/ranking.rdf')]
|
feeds = [(u'\u30cb\u30e5\u30fc\u30b9', u'http://www.jiji.com/rss/ranking.rdf')]
|
||||||
|
|
||||||
|
remove_tags_before = dict(id="article-area")
|
||||||
remove_tags_after = dict(id="ad_google")
|
remove_tags_after = dict(id="ad_google")
|
||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
|
24
recipes/juve_la_stampa.recipe
Normal file
24
recipes/juve_la_stampa.recipe
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AdvancedUserRecipe1336504510(BasicNewsRecipe):
|
||||||
|
title = u'Juve - La Stampa'
|
||||||
|
oldest_article = 1
|
||||||
|
language = 'it'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
auto_cleanup = True
|
||||||
|
masthead_url = 'http://www3.lastampa.it/fileadmin/media/sport/quijuve/top_quijuve.jpg'
|
||||||
|
feeds = [(u'Qui Juve - La Stampa', u'http://feed43.com/2352784107537677.xml')]
|
||||||
|
remove_tags = [dict(name='div',attrs={'class':['article-toolbar', 'sezione sezione-news', 'intestazione']})]
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
div.dettaglio div.immagine_girata p.news-single-imgcaption {color: #000000; font-family: "Georgia", "Times", serif; font-size: 7px; font-weight: 400;line-height: 1.2; padding-bottom: 12px; text-transform: none; }
|
||||||
|
.sezione {color: #000000; font-family: "Georgia", "Times", serif; font-size: 7px; font-weight: 400;line-height: 1.2; padding-bottom: 12px; text-transform: none; }
|
||||||
|
body {color: #000000; font-family: "Georgia", "Times", serif; font-size: 7px; font-weight: 400;line-height: 1.2; padding-bottom: 12px; text-transform: none; }
|
||||||
|
h3 {color: #000000; font-family: "Georgia", "Times", serif; font-size: 22px; font-weight: 400;line-height: 1.2; padding-bottom: 12px; text-transform: none; }
|
||||||
|
div.dettaglio h2.catenaccio {color: #000000; font-family: "Georgia", "Times", serif; font-size: 18px; font-weight: 400;line-height: 1.2; padding-bottom: 12px; text-transform: none; }
|
||||||
|
'''
|
||||||
|
description = 'News about Juventus from La Stampa'
|
||||||
|
__author__ = 'faber1971'
|
||||||
|
|
||||||
|
__version__ = 'v1.0'
|
||||||
|
__date__ = '8, May 2012'
|
@ -1,7 +1,7 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__author__ = 'Lorenzo Vigentini, based on Darko Miletic, Gabriele Marini'
|
__author__ = 'Lorenzo Vigentini, based on Darko Miletic, Gabriele Marini; minor fixes by faber1971'
|
||||||
__copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>, Lorenzo Vigentini <l.vigentini at gmail.com>'
|
__copyright__ = '2009-2012, Darko Miletic <darko.miletic at gmail.com>, Lorenzo Vigentini <l.vigentini at gmail.com>, faber1971'
|
||||||
description = 'Italian daily newspaper - v1.01 (04, January 2010); 16.05.2010 new version; 17.10.2011 new version; 14.12.2011 new version'
|
description = 'Italian daily newspaper - v1.02 (04, January 2010); 16.05.2010 new version; 17.10.2011 new version; 14.12.2011 new version; 11.05.2012 new version'
|
||||||
|
|
||||||
'''
|
'''
|
||||||
http://www.repubblica.it/
|
http://www.repubblica.it/
|
||||||
@ -12,14 +12,14 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
|
|
||||||
class LaRepubblica(BasicNewsRecipe):
|
class LaRepubblica(BasicNewsRecipe):
|
||||||
title = 'La Repubblica'
|
title = 'La Repubblica'
|
||||||
__author__ = 'Lorenzo Vigentini, Gabriele Marini, Darko Miletic'
|
__author__ = 'Lorenzo Vigentini, Gabriele Marini, Darko Miletic, faber1971'
|
||||||
description = 'il quotidiano online con tutte le notizie in tempo reale. News e ultime notizie. Tutti i settori: politica, cronaca, economia, sport, esteri, scienza, tecnologia, internet, spettacoli, musica, cultura, arte, mostre, libri, dvd, vhs, concerti, cinema, attori, attrici, recensioni, chat, cucina, mappe. Le citta di Repubblica: Roma, Milano, Bologna, Firenze, Palermo, Napoli, Bari, Torino.'
|
description = 'il quotidiano online con tutte le notizie in tempo reale. News e ultime notizie. Tutti i settori: politica, cronaca, economia, sport, esteri, scienza, tecnologia, internet, spettacoli, musica, cultura, arte, mostre, libri, dvd, vhs, concerti, cinema, attori, attrici, recensioni, chat, cucina, mappe. Le citta di Repubblica: Roma, Milano, Bologna, Firenze, Palermo, Napoli, Bari, Torino.'
|
||||||
masthead_url = 'http://www.repubblica.it/static/images/homepage/2010/la-repubblica-logo-home-payoff.png'
|
masthead_url = 'http://www.repubblica.it/static/images/homepage/2010/la-repubblica-logo-home-payoff.png'
|
||||||
publisher = 'Gruppo editoriale L\'Espresso'
|
publisher = 'Gruppo editoriale L\'Espresso'
|
||||||
category = 'News, politics, culture, economy, general interest'
|
category = 'News, politics, culture, economy, general interest'
|
||||||
language = 'it'
|
language = 'it'
|
||||||
timefmt = '[%a, %d %b, %Y]'
|
timefmt = '[%a, %d %b, %Y]'
|
||||||
oldest_article = 5
|
oldest_article = 1
|
||||||
encoding = 'utf8'
|
encoding = 'utf8'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
@ -59,6 +59,7 @@ class LaRepubblica(BasicNewsRecipe):
|
|||||||
dict(attrs={'class':'articolo'}),
|
dict(attrs={'class':'articolo'}),
|
||||||
dict(attrs={'class':'body-text'}),
|
dict(attrs={'class':'body-text'}),
|
||||||
dict(name='p', attrs={'class':'disclaimer clearfix'}),
|
dict(name='p', attrs={'class':'disclaimer clearfix'}),
|
||||||
|
dict(name='div', attrs={'id':'main'}),
|
||||||
dict(attrs={'id':'contA'})
|
dict(attrs={'id':'contA'})
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -67,7 +68,7 @@ class LaRepubblica(BasicNewsRecipe):
|
|||||||
dict(name=['object','link','meta','iframe','embed']),
|
dict(name=['object','link','meta','iframe','embed']),
|
||||||
dict(name='span',attrs={'class':'linkindice'}),
|
dict(name='span',attrs={'class':'linkindice'}),
|
||||||
dict(name='div', attrs={'class':['bottom-mobile','adv adv-middle-inline']}),
|
dict(name='div', attrs={'class':['bottom-mobile','adv adv-middle-inline']}),
|
||||||
dict(name='div', attrs={'id':['rssdiv','blocco','fb-like-head']}),
|
dict(name='div', attrs={'id':['rssdiv','blocco','fb-like-head', 'sidebar']}),
|
||||||
dict(name='div', attrs={'class':['utility','fb-like-button','archive-button']}),
|
dict(name='div', attrs={'class':['utility','fb-like-button','archive-button']}),
|
||||||
dict(name='div', attrs={'class':'generalbox'}),
|
dict(name='div', attrs={'class':'generalbox'}),
|
||||||
dict(name='ul', attrs={'id':'hystory'})
|
dict(name='ul', attrs={'id':'hystory'})
|
||||||
@ -88,11 +89,12 @@ class LaRepubblica(BasicNewsRecipe):
|
|||||||
(u'Sport', u'http://www.repubblica.it/rss/sport/rss2.0.xml'),
|
(u'Sport', u'http://www.repubblica.it/rss/sport/rss2.0.xml'),
|
||||||
(u'Calcio', u'http://www.repubblica.it/rss/sport/calcio/rss2.0.xml'),
|
(u'Calcio', u'http://www.repubblica.it/rss/sport/calcio/rss2.0.xml'),
|
||||||
(u'Motori', u'http://www.repubblica.it/rss/motori/rss2.0.xml'),
|
(u'Motori', u'http://www.repubblica.it/rss/motori/rss2.0.xml'),
|
||||||
(u'Edizione Roma', u'http://roma.repubblica.it/rss/rss2.0.xml'),
|
(u'Roma', u'http://roma.repubblica.it/rss/rss2.0.xml'),
|
||||||
(u'Edizione Torino', u'http://torino.repubblica.it/rss/rss2.0.xml'),
|
(u'Torino', u'http://torino.repubblica.it/rss/rss2.0.xml'),
|
||||||
(u'Edizione Milano', u'feed://milano.repubblica.it/rss/rss2.0.xml'),
|
(u'Milano', u'feed://milano.repubblica.it/rss/rss2.0.xml'),
|
||||||
(u'Edizione Napoli', u'feed://napoli.repubblica.it/rss/rss2.0.xml'),
|
(u'Napoli', u'feed://napoli.repubblica.it/rss/rss2.0.xml'),
|
||||||
(u'Edizione Palermo', u'feed://palermo.repubblica.it/rss/rss2.0.xml')
|
(u'Bari', u'http://bari.repubblica.it/rss/rss2.0.xml'),
|
||||||
|
(u'Palermo', u'feed://palermo.repubblica.it/rss/rss2.0.xml')
|
||||||
]
|
]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
|
@ -16,12 +16,12 @@ class MainichiDailyNews(BasicNewsRecipe):
|
|||||||
publisher = 'Mainichi Daily News'
|
publisher = 'Mainichi Daily News'
|
||||||
category = 'news, japan'
|
category = 'news, japan'
|
||||||
language = 'ja'
|
language = 'ja'
|
||||||
|
index = 'http://mainichi.jp/select/'
|
||||||
feeds = [(u'daily news', u'http://mainichi.jp/rss/etc/flash.rss')]
|
remove_javascript = True
|
||||||
|
masthead_title = u'MAINICHI DAILY NEWS'
|
||||||
|
|
||||||
remove_tags_before = {'class':"NewsTitle"}
|
remove_tags_before = {'class':"NewsTitle"}
|
||||||
remove_tags = [{'class':"RelatedArticle"}]
|
remove_tags_after = {'class':"NewsBody clr"}
|
||||||
remove_tags_after = {'class':"Credit"}
|
|
||||||
|
|
||||||
def parse_feeds(self):
|
def parse_feeds(self):
|
||||||
|
|
||||||
@ -32,9 +32,30 @@ class MainichiDailyNews(BasicNewsRecipe):
|
|||||||
for a,curarticle in enumerate(curfeed.articles):
|
for a,curarticle in enumerate(curfeed.articles):
|
||||||
if re.search(r'pheedo.jp', curarticle.url):
|
if re.search(r'pheedo.jp', curarticle.url):
|
||||||
delList.append(curarticle)
|
delList.append(curarticle)
|
||||||
|
if re.search(r'rssad.jp', curarticle.url):
|
||||||
|
delList.append(curarticle)
|
||||||
if len(delList)>0:
|
if len(delList)>0:
|
||||||
for d in delList:
|
for d in delList:
|
||||||
index = curfeed.articles.index(d)
|
index = curfeed.articles.index(d)
|
||||||
curfeed.articles[index:index+1] = []
|
curfeed.articles[index:index+1] = []
|
||||||
|
|
||||||
return feeds
|
return feeds
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
feeds = []
|
||||||
|
soup = self.index_to_soup(self.index)
|
||||||
|
topstories = soup.find('ul',attrs={'class':'MaiLink'})
|
||||||
|
if topstories:
|
||||||
|
newsarticles = []
|
||||||
|
for itt in topstories.findAll('li'):
|
||||||
|
itema = itt.find('a',href=True)
|
||||||
|
if itema:
|
||||||
|
newsarticles.append({
|
||||||
|
'title' :itema.string
|
||||||
|
,'date' :''
|
||||||
|
,'url' :itema['href']
|
||||||
|
,'description':''
|
||||||
|
})
|
||||||
|
feeds.append(('latest', newsarticles))
|
||||||
|
return feeds
|
||||||
|
|
||||||
|
67
recipes/mainichi_en.recipe
Normal file
67
recipes/mainichi_en.recipe
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||||
|
'''
|
||||||
|
www.mainichi.jp
|
||||||
|
'''
|
||||||
|
|
||||||
|
import re
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class MainichiEnglishNews(BasicNewsRecipe):
|
||||||
|
title = u'The Mainichi'
|
||||||
|
__author__ = 'Hiroshi Miura'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 40
|
||||||
|
description = 'Japanese traditional newspaper Mainichi news in English'
|
||||||
|
publisher = 'Mainichi News'
|
||||||
|
category = 'news, japan'
|
||||||
|
language = 'en_JP'
|
||||||
|
index = 'http://mainichi.jp/english/english/index.html'
|
||||||
|
remove_javascript = True
|
||||||
|
masthead_url = 'http://mainichi.jp/english/images/themainichi.png'
|
||||||
|
|
||||||
|
remove_tags_before = {'class':"NewsTitle"}
|
||||||
|
remove_tags_after = {'class':"NewsBody clr"}
|
||||||
|
|
||||||
|
def parse_feeds(self):
|
||||||
|
|
||||||
|
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||||
|
|
||||||
|
for curfeed in feeds:
|
||||||
|
delList = []
|
||||||
|
for a,curarticle in enumerate(curfeed.articles):
|
||||||
|
if re.search(r'pheedo.jp', curarticle.url):
|
||||||
|
delList.append(curarticle)
|
||||||
|
if re.search(r'rssad.jp', curarticle.url):
|
||||||
|
delList.append(curarticle)
|
||||||
|
if len(delList)>0:
|
||||||
|
for d in delList:
|
||||||
|
index = curfeed.articles.index(d)
|
||||||
|
curfeed.articles[index:index+1] = []
|
||||||
|
|
||||||
|
return feeds
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
feeds = []
|
||||||
|
soup = self.index_to_soup(self.index)
|
||||||
|
for section in soup.findAll('section'):
|
||||||
|
newsarticles = []
|
||||||
|
section_name = 'news'
|
||||||
|
hds = section.find('div', attrs={'class':'CategoryHead clr'})
|
||||||
|
if hds:
|
||||||
|
section_item = hds.find('h1')
|
||||||
|
if section_item:
|
||||||
|
section_name = section_item.find('a').string
|
||||||
|
items = section.find('ul', attrs={'class':'MaiLink'})
|
||||||
|
for item in items.findAll('li'):
|
||||||
|
if item:
|
||||||
|
itema = item.find('a')
|
||||||
|
newsarticles.append({
|
||||||
|
'title' :itema.string
|
||||||
|
,'date' :''
|
||||||
|
,'url' :itema['href']
|
||||||
|
,'description':''
|
||||||
|
})
|
||||||
|
feeds.append((section_name, newsarticles))
|
||||||
|
return feeds
|
||||||
|
|
@ -1,34 +0,0 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
import re
|
|
||||||
|
|
||||||
class MainichiDailyITNews(BasicNewsRecipe):
|
|
||||||
title = u'\u6bce\u65e5\u65b0\u805e(IT&\u5bb6\u96fb)'
|
|
||||||
__author__ = 'Hiroshi Miura'
|
|
||||||
oldest_article = 2
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
description = 'Japanese traditional newspaper Mainichi Daily News - IT and electronics'
|
|
||||||
publisher = 'Mainichi Daily News'
|
|
||||||
category = 'news, Japan, IT, Electronics'
|
|
||||||
language = 'ja'
|
|
||||||
|
|
||||||
feeds = [(u'IT News', u'http://mainichi.pheedo.jp/f/mainichijp_electronics')]
|
|
||||||
|
|
||||||
remove_tags_before = {'class':"NewsTitle"}
|
|
||||||
remove_tags = [{'class':"RelatedArticle"}]
|
|
||||||
remove_tags_after = {'class':"Credit"}
|
|
||||||
|
|
||||||
def parse_feeds(self):
|
|
||||||
|
|
||||||
feeds = BasicNewsRecipe.parse_feeds(self)
|
|
||||||
|
|
||||||
for curfeed in feeds:
|
|
||||||
delList = []
|
|
||||||
for a,curarticle in enumerate(curfeed.articles):
|
|
||||||
if re.search(r'pheedo.jp', curarticle.url):
|
|
||||||
delList.append(curarticle)
|
|
||||||
if len(delList)>0:
|
|
||||||
for d in delList:
|
|
||||||
index = curfeed.articles.index(d)
|
|
||||||
curfeed.articles[index:index+1] = []
|
|
||||||
|
|
||||||
return feeds
|
|
59
recipes/mainichi_science_news.recipe
Normal file
59
recipes/mainichi_science_news.recipe
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||||
|
'''
|
||||||
|
www.mainichi.jp
|
||||||
|
'''
|
||||||
|
|
||||||
|
import re
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class MainichiDailyScienceNews(BasicNewsRecipe):
|
||||||
|
title = u'\u6bce\u65e5\u65b0\u805e(Science)'
|
||||||
|
__author__ = 'Hiroshi Miura'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 20
|
||||||
|
description = 'Japanese traditional newspaper Mainichi Daily News - science'
|
||||||
|
publisher = 'Mainichi Daily News'
|
||||||
|
category = 'news, japan'
|
||||||
|
language = 'ja'
|
||||||
|
index = 'http://mainichi.jp/select/science'
|
||||||
|
remove_javascript = True
|
||||||
|
masthead_title = u'MAINICHI DAILY NEWS'
|
||||||
|
|
||||||
|
remove_tags_before = {'class':"NewsTitle"}
|
||||||
|
remove_tags_after = {'class':"NewsBody clr"}
|
||||||
|
|
||||||
|
def parse_feeds(self):
|
||||||
|
|
||||||
|
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||||
|
|
||||||
|
for curfeed in feeds:
|
||||||
|
delList = []
|
||||||
|
for a,curarticle in enumerate(curfeed.articles):
|
||||||
|
if re.search(r'rssad.jp', curarticle.url):
|
||||||
|
delList.append(curarticle)
|
||||||
|
if len(delList)>0:
|
||||||
|
for d in delList:
|
||||||
|
index = curfeed.articles.index(d)
|
||||||
|
curfeed.articles[index:index+1] = []
|
||||||
|
|
||||||
|
return feeds
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
feeds = []
|
||||||
|
soup = self.index_to_soup(self.index)
|
||||||
|
topstories = soup.find('ul',attrs={'class':'MaiLink'})
|
||||||
|
if topstories:
|
||||||
|
newsarticles = []
|
||||||
|
for itt in topstories.findAll('li'):
|
||||||
|
itema = itt.find('a',href=True)
|
||||||
|
if itema:
|
||||||
|
newsarticles.append({
|
||||||
|
'title' :itema.string
|
||||||
|
,'date' :''
|
||||||
|
,'url' :itema['href']
|
||||||
|
,'description':''
|
||||||
|
})
|
||||||
|
feeds.append(('Science', newsarticles))
|
||||||
|
return feeds
|
||||||
|
|
42
recipes/marine_corps_times.recipe
Normal file
42
recipes/marine_corps_times.recipe
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
class MarineCorpsTimes(BasicNewsRecipe):
|
||||||
|
title = 'Marine Corps Times'
|
||||||
|
__author__ = 'jde'
|
||||||
|
__date__ = '16 May 2012'
|
||||||
|
__version__ = '1.0'
|
||||||
|
description = 'News of the U.S. Marine Corps'
|
||||||
|
language = 'en'
|
||||||
|
publisher = 'MarineCorpsTimes.com'
|
||||||
|
category = 'news, U.S. Marine Corps'
|
||||||
|
tags = 'news, U.S. Marine Corps'
|
||||||
|
cover_url = 'http://www.marinecorpstimes.com/images/logo_marinetimes-alert.jpg'
|
||||||
|
masthead_url = 'http://www.marinecorpstimes.com/images/logo_marinetimes-alert.jpg'
|
||||||
|
oldest_article = 7 #days
|
||||||
|
max_articles_per_feed = 25
|
||||||
|
publication_type = 'newspaper'
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
encoding = None
|
||||||
|
recursions = 0
|
||||||
|
needs_subscription = False
|
||||||
|
remove_javascript = True
|
||||||
|
remove_empty_feeds = True
|
||||||
|
auto_cleanup = True
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
|
||||||
|
('News', 'http://www.MarineCorpstimes.com/rss_news.php'),
|
||||||
|
('Benefits', 'http://www.MarineCorpstimes.com/rss_benefits.php'),
|
||||||
|
('Money', 'http://www.MarineCorpstimes.com/rss_money.php'),
|
||||||
|
('Careers & Education', 'http://www.MarineCorpstimes.com/rss_careers.php'),
|
||||||
|
('Community', 'http://www.MarineCorpstimes.com/rss_community.php'),
|
||||||
|
('Off Duty', 'http://www.MarineCorpstimes.com/rss_off_duty.php'),
|
||||||
|
('Entertainment', 'http://www.MarineCorpstimes.com/rss_entertainment.php'),
|
||||||
|
('Guard & Reserve', 'http://www.MarineCorpstimes.com/rss_guard.php'),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
41
recipes/military_times.recipe
Normal file
41
recipes/military_times.recipe
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class MilitaryTimes(BasicNewsRecipe):
|
||||||
|
title = 'Military Times'
|
||||||
|
__author__ = 'jde'
|
||||||
|
__date__ = '16 May 2012'
|
||||||
|
__version__ = '1.0'
|
||||||
|
description = 'News of the U.S. Military'
|
||||||
|
language = 'en'
|
||||||
|
publisher = 'MilitaryTimes.com'
|
||||||
|
category = 'news, U.S. Military'
|
||||||
|
tags = 'news, U.S. Military'
|
||||||
|
cover_url = 'http://www.militarytimes.com/images/logo_militarytimes_landing-s.gif'
|
||||||
|
masthead_url = 'http://www.militarytimes.com/images/logo_militarytimes_landing-s.gif'
|
||||||
|
oldest_article = 7 #days
|
||||||
|
max_articles_per_feed = 25
|
||||||
|
publication_type = 'newspaper'
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
encoding = None
|
||||||
|
recursions = 0
|
||||||
|
needs_subscription = False
|
||||||
|
remove_javascript = True
|
||||||
|
remove_empty_feeds = True
|
||||||
|
auto_cleanup = True
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
|
||||||
|
('News', 'http://www.militarytimes.com/rss_news.php'),
|
||||||
|
('Benefits', 'http://www.militarytimes.com/rss_benefits.php'),
|
||||||
|
('Money', 'http://www.militarytimes.com/rss_money.php'),
|
||||||
|
('Careers & Education', 'http://www.militarytimes.com/rss_careers.php'),
|
||||||
|
('Community', 'http://www.militarytimes.com/rss_community.php'),
|
||||||
|
('Off Duty', 'http://www.militarytimes.com/rss_off_duty.php'),
|
||||||
|
('Entertainment', 'http://www.militarytimes.com/rss_entertainment.php'),
|
||||||
|
('Guard & Reserve', 'http://www.militarytimes.com/rss_guard.php'),
|
||||||
|
|
||||||
|
]
|
||||||
|
|
@ -1,5 +1,4 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
|
|
||||||
@ -7,77 +6,21 @@ __license__ = 'GPL v3'
|
|||||||
www.canada.com
|
www.canada.com
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import re
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
|
|
||||||
|
|
||||||
|
|
||||||
class CanWestPaper(BasicNewsRecipe):
|
class CanWestPaper(BasicNewsRecipe):
|
||||||
|
|
||||||
# un-comment the following four lines for the Victoria Times Colonist
|
# un-comment the following three lines for the Montreal Gazette
|
||||||
## title = u'Victoria Times Colonist'
|
|
||||||
## url_prefix = 'http://www.timescolonist.com'
|
|
||||||
## description = u'News from Victoria, BC'
|
|
||||||
## fp_tag = 'CAN_TC'
|
|
||||||
|
|
||||||
# un-comment the following four lines for the Vancouver Province
|
|
||||||
## title = u'Vancouver Province'
|
|
||||||
## url_prefix = 'http://www.theprovince.com'
|
|
||||||
## description = u'News from Vancouver, BC'
|
|
||||||
## fp_tag = 'CAN_VP'
|
|
||||||
|
|
||||||
# un-comment the following four lines for the Vancouver Sun
|
|
||||||
## title = u'Vancouver Sun'
|
|
||||||
## url_prefix = 'http://www.vancouversun.com'
|
|
||||||
## description = u'News from Vancouver, BC'
|
|
||||||
## fp_tag = 'CAN_VS'
|
|
||||||
|
|
||||||
# un-comment the following four lines for the Edmonton Journal
|
|
||||||
## title = u'Edmonton Journal'
|
|
||||||
## url_prefix = 'http://www.edmontonjournal.com'
|
|
||||||
## description = u'News from Edmonton, AB'
|
|
||||||
## fp_tag = 'CAN_EJ'
|
|
||||||
|
|
||||||
# un-comment the following four lines for the Calgary Herald
|
|
||||||
## title = u'Calgary Herald'
|
|
||||||
## url_prefix = 'http://www.calgaryherald.com'
|
|
||||||
## description = u'News from Calgary, AB'
|
|
||||||
## fp_tag = 'CAN_CH'
|
|
||||||
|
|
||||||
# un-comment the following four lines for the Regina Leader-Post
|
|
||||||
## title = u'Regina Leader-Post'
|
|
||||||
## url_prefix = 'http://www.leaderpost.com'
|
|
||||||
## description = u'News from Regina, SK'
|
|
||||||
## fp_tag = ''
|
|
||||||
|
|
||||||
# un-comment the following four lines for the Saskatoon Star-Phoenix
|
|
||||||
## title = u'Saskatoon Star-Phoenix'
|
|
||||||
## url_prefix = 'http://www.thestarphoenix.com'
|
|
||||||
## description = u'News from Saskatoon, SK'
|
|
||||||
## fp_tag = ''
|
|
||||||
|
|
||||||
# un-comment the following four lines for the Windsor Star
|
|
||||||
## title = u'Windsor Star'
|
|
||||||
## url_prefix = 'http://www.windsorstar.com'
|
|
||||||
## description = u'News from Windsor, ON'
|
|
||||||
## fp_tag = 'CAN_'
|
|
||||||
|
|
||||||
# un-comment the following four lines for the Ottawa Citizen
|
|
||||||
## title = u'Ottawa Citizen'
|
|
||||||
## url_prefix = 'http://www.ottawacitizen.com'
|
|
||||||
## description = u'News from Ottawa, ON'
|
|
||||||
## fp_tag = 'CAN_OC'
|
|
||||||
|
|
||||||
# un-comment the following four lines for the Montreal Gazette
|
|
||||||
title = u'Montreal Gazette'
|
title = u'Montreal Gazette'
|
||||||
url_prefix = 'http://www.montrealgazette.com'
|
|
||||||
description = u'News from Montreal, QC'
|
description = u'News from Montreal, QC'
|
||||||
fp_tag = 'CAN_MG'
|
|
||||||
|
|
||||||
|
|
||||||
language = 'en_CA'
|
language = 'en_CA'
|
||||||
__author__ = 'Nick Redding'
|
__author__ = 'Nick Redding'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
auto_cleanup = True
|
||||||
|
auto_cleanup_keep = '//*[@id="imageBox"]'
|
||||||
timefmt = ' [%b %d]'
|
timefmt = ' [%b %d]'
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
.timestamp { font-size:xx-small; display: block; }
|
.timestamp { font-size:xx-small; display: block; }
|
||||||
@ -87,135 +30,19 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
.byline { font-size:xx-small; }
|
.byline { font-size:xx-small; }
|
||||||
#photocaption { font-size: small; font-style: italic }
|
#photocaption { font-size: small; font-style: italic }
|
||||||
#photocredit { font-size: xx-small; }'''
|
#photocredit { font-size: xx-small; }'''
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
|
|
||||||
remove_tags = [{'class':'comments'},
|
|
||||||
dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
|
|
||||||
dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
|
|
||||||
dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
|
|
||||||
dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
|
|
||||||
dict(name='div', attrs={'class':'rule_grey_solid'}),
|
|
||||||
dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
|
|
||||||
|
|
||||||
|
|
||||||
def get_cover_url(self):
|
feeds = [
|
||||||
from datetime import timedelta, date
|
('News',
|
||||||
if self.fp_tag=='':
|
'http://rss.canada.com/get/?F297'),
|
||||||
return None
|
('Sports',
|
||||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
|
'http://rss.canada.com/get/?F299'),
|
||||||
br = BasicNewsRecipe.get_browser()
|
('Entertainment',
|
||||||
daysback=1
|
'http://rss.canada.com/get/?F7366'),
|
||||||
try:
|
('Business',
|
||||||
br.open(cover)
|
'http://rss.canada.com/get/?F6939'),
|
||||||
except:
|
]
|
||||||
while daysback<7:
|
|
||||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str((date.today() - timedelta(days=daysback)).day)+'/lg/'+self.fp_tag+'.jpg'
|
|
||||||
br = BasicNewsRecipe.get_browser()
|
|
||||||
try:
|
|
||||||
br.open(cover)
|
|
||||||
except:
|
|
||||||
daysback = daysback+1
|
|
||||||
continue
|
|
||||||
break
|
|
||||||
if daysback==7:
|
|
||||||
self.log("\nCover unavailable")
|
|
||||||
cover = None
|
|
||||||
return cover
|
|
||||||
|
|
||||||
def fixChars(self,string):
|
|
||||||
# Replace lsquo (\x91)
|
|
||||||
fixed = re.sub("\x91","‘",string)
|
|
||||||
# Replace rsquo (\x92)
|
|
||||||
fixed = re.sub("\x92","’",fixed)
|
|
||||||
# Replace ldquo (\x93)
|
|
||||||
fixed = re.sub("\x93","“",fixed)
|
|
||||||
# Replace rdquo (\x94)
|
|
||||||
fixed = re.sub("\x94","”",fixed)
|
|
||||||
# Replace ndash (\x96)
|
|
||||||
fixed = re.sub("\x96","–",fixed)
|
|
||||||
# Replace mdash (\x97)
|
|
||||||
fixed = re.sub("\x97","—",fixed)
|
|
||||||
fixed = re.sub("’","’",fixed)
|
|
||||||
return fixed
|
|
||||||
|
|
||||||
def massageNCXText(self, description):
|
|
||||||
# Kindle TOC descriptions won't render certain characters
|
|
||||||
if description:
|
|
||||||
massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
|
|
||||||
# Replace '&' with '&'
|
|
||||||
massaged = re.sub("&","&", massaged)
|
|
||||||
return self.fixChars(massaged)
|
|
||||||
else:
|
|
||||||
return description
|
|
||||||
|
|
||||||
def populate_article_metadata(self, article, soup, first):
|
|
||||||
if first:
|
|
||||||
picdiv = soup.find('body').find('img')
|
|
||||||
if picdiv is not None:
|
|
||||||
self.add_toc_thumbnail(article,re.sub(r'links\\link\d+\\','',picdiv['src']))
|
|
||||||
xtitle = article.text_summary.strip()
|
|
||||||
if len(xtitle) == 0:
|
|
||||||
desc = soup.find('meta',attrs={'property':'og:description'})
|
|
||||||
if desc is not None:
|
|
||||||
article.summary = article.text_summary = desc['content']
|
|
||||||
|
|
||||||
def strip_anchors(self,soup):
|
|
||||||
paras = soup.findAll(True)
|
|
||||||
for para in paras:
|
|
||||||
aTags = para.findAll('a')
|
|
||||||
for a in aTags:
|
|
||||||
if a.img is None:
|
|
||||||
a.replaceWith(a.renderContents().decode('cp1252','replace'))
|
|
||||||
return soup
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
return self.strip_anchors(soup)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def parse_index(self):
|
|
||||||
soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
|
|
||||||
|
|
||||||
articles = {}
|
|
||||||
key = 'News'
|
|
||||||
ans = ['News']
|
|
||||||
|
|
||||||
# Find each instance of class="sectiontitle", class="featurecontent"
|
|
||||||
for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
|
|
||||||
#self.log(" div class = %s" % divtag['class'])
|
|
||||||
if divtag['class'].startswith('section_title'):
|
|
||||||
# div contains section title
|
|
||||||
if not divtag.h3:
|
|
||||||
continue
|
|
||||||
key = self.tag_to_string(divtag.h3,False)
|
|
||||||
ans.append(key)
|
|
||||||
self.log("Section name %s" % key)
|
|
||||||
continue
|
|
||||||
# div contains article data
|
|
||||||
h1tag = divtag.find('h1')
|
|
||||||
if not h1tag:
|
|
||||||
continue
|
|
||||||
atag = h1tag.find('a',href=True)
|
|
||||||
if not atag:
|
|
||||||
continue
|
|
||||||
url = self.url_prefix+'/news/todays-paper/'+atag['href']
|
|
||||||
#self.log("Section %s" % key)
|
|
||||||
#self.log("url %s" % url)
|
|
||||||
title = self.tag_to_string(atag,False)
|
|
||||||
#self.log("title %s" % title)
|
|
||||||
pubdate = ''
|
|
||||||
description = ''
|
|
||||||
ptag = divtag.find('p');
|
|
||||||
if ptag:
|
|
||||||
description = self.tag_to_string(ptag,False)
|
|
||||||
#self.log("description %s" % description)
|
|
||||||
author = ''
|
|
||||||
autag = divtag.find('h4')
|
|
||||||
if autag:
|
|
||||||
author = self.tag_to_string(autag,False)
|
|
||||||
#self.log("author %s" % author)
|
|
||||||
if not articles.has_key(key):
|
|
||||||
articles[key] = []
|
|
||||||
articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
|
|
||||||
|
|
||||||
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
|
||||||
return ans
|
|
||||||
|
22
recipes/nachdenkseiten.recipe
Normal file
22
recipes/nachdenkseiten.recipe
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Nachdenkseiten(BasicNewsRecipe):
|
||||||
|
title = u'Nachdenkseiten'
|
||||||
|
__author__ = 'jrda'
|
||||||
|
publisher = 'www.nachdenkseiten.de Albrecht Mueller und Dr. Wolfgang Lieb'
|
||||||
|
description = 'NachDenkSeiten - Die kritische Website'
|
||||||
|
category = 'news'
|
||||||
|
oldest_article = 7
|
||||||
|
use_embedded_content = False
|
||||||
|
language = 'de'
|
||||||
|
timefmt = ''
|
||||||
|
max_articles_per_feed = 6
|
||||||
|
no_stylesheets = True
|
||||||
|
encoding = 'utf-8'
|
||||||
|
remove_javascript = True
|
||||||
|
keep_only_tags = [
|
||||||
|
{'id':'content'}]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
('News', 'http://www.nachdenkseiten.de/?feed=rss2'),
|
||||||
|
]
|
16
recipes/national_geographic_it.recipe
Normal file
16
recipes/national_geographic_it.recipe
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
__version__ = 'v1.0'
|
||||||
|
__date__ = '5, May 2012'
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AdvancedUserRecipe1336226255(BasicNewsRecipe):
|
||||||
|
title = u'National Geographic'
|
||||||
|
__author__ = 'faber1971'
|
||||||
|
description = 'Science magazine'
|
||||||
|
language = 'it'
|
||||||
|
|
||||||
|
oldest_article = 15
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
auto_cleanup = True
|
||||||
|
remove_tags = [dict(name='div',attrs={'class':'banner-abbonamenti'})]
|
||||||
|
feeds = [(u'National Geographic', u'http://www.nationalgeographic.it/rss/all/rss2.0.xml')]
|
42
recipes/navy_times.recipe
Normal file
42
recipes/navy_times.recipe
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
class NavyTimes(BasicNewsRecipe):
|
||||||
|
title = 'Navy Times'
|
||||||
|
__author__ = 'jde'
|
||||||
|
__date__ = '16 May 2012'
|
||||||
|
__version__ = '1.0'
|
||||||
|
description = 'News of the U.S. Navy'
|
||||||
|
language = 'en'
|
||||||
|
publisher = 'NavyTimes.com'
|
||||||
|
category = 'news, U.S. Navy'
|
||||||
|
tags = 'news, U.S. Navy'
|
||||||
|
cover_url = 'http://www.navytimes.com/images/logo_navytimes_alert.jpg'
|
||||||
|
masthead_url = 'http://www.navytimes.com/images/logo_navytimes_alert.jpg'
|
||||||
|
oldest_article = 7 #days
|
||||||
|
max_articles_per_feed = 25
|
||||||
|
publication_type = 'newspaper'
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
encoding = None
|
||||||
|
recursions = 0
|
||||||
|
needs_subscription = False
|
||||||
|
remove_javascript = True
|
||||||
|
remove_empty_feeds = True
|
||||||
|
auto_cleanup = True
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
|
||||||
|
('News', 'http://www.navytimes.com/rss_news.php'),
|
||||||
|
('Benefits', 'http://www.navytimes.com/rss_benefits.php'),
|
||||||
|
('Money', 'http://www.navytimes.com/rss_money.php'),
|
||||||
|
('Careers & Education', 'http://www.navytimes.com/rss_careers.php'),
|
||||||
|
('Community', 'http://www.navytimes.com/rss_community.php'),
|
||||||
|
('Off Duty', 'http://www.navytimes.com/rss_off_duty.php'),
|
||||||
|
('Entertainment', 'http://www.navytimes.com/rss_entertainment.php'),
|
||||||
|
('Guard & Reserve', 'http://www.navytimes.com/rss_guard.php'),
|
||||||
|
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
20
recipes/news_busters.recipe
Normal file
20
recipes/news_busters.recipe
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class NewsBusters(BasicNewsRecipe):
|
||||||
|
title = u'News Busters'
|
||||||
|
description = 'Exposing and Combating Liberal Media Bias'
|
||||||
|
__author__ = 'jde'
|
||||||
|
oldest_article = 1#day
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
cover_url = "http://newsbusters.org/sites/all/themes/genesis_nb/images/nb-mrc.png"
|
||||||
|
language = 'en'
|
||||||
|
encoding = 'utf8'
|
||||||
|
needs_subscription = False
|
||||||
|
remove_javascript = True
|
||||||
|
recursions = 0
|
||||||
|
use_embedded_content = False
|
||||||
|
no_stylesheets = True
|
||||||
|
auto_cleanup = True
|
||||||
|
|
||||||
|
feeds = [(u'Blog', u'http://www.newsbusters.org/rss.xml')]
|
||||||
|
|
@ -9,10 +9,10 @@ import re
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class Pescanik(BasicNewsRecipe):
|
class Pescanik(BasicNewsRecipe):
|
||||||
title = 'Peščanik'
|
title = u'Peščanik'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Peščanik je udruženje građana osnovano 2006. godine. Glavni proizvod Peščanika je radio emisija koja je emitovana na Radiju B92 od 02.02.2000. do 16.06.2011, a od septembra 2011. se emituje na osam radio stanica u Srbiji, Crnoj Gori i BiH'
|
description = u'Peščanik je udruženje građana osnovano 2006. godine. Glavni proizvod Peščanika je radio emisija koja je emitovana na Radiju B92 od 02.02.2000. do 16.06.2011, a od septembra 2011. se emituje na osam radio stanica u Srbiji, Crnoj Gori i BiH'
|
||||||
publisher = 'Peščanik'
|
publisher = u'Peščanik'
|
||||||
category = 'news, politics, Serbia'
|
category = 'news, politics, Serbia'
|
||||||
oldest_article = 10
|
oldest_article = 10
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
@ -45,4 +45,4 @@ class Pescanik(BasicNewsRecipe):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url + 'print/'
|
return url + 'print/'
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
"""
|
"""
|
||||||
Pocket Calibre Recipe v1.0
|
Pocket Calibre Recipe v1.2
|
||||||
"""
|
"""
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '''
|
__copyright__ = '''
|
||||||
@ -73,6 +73,9 @@ class Pocket(BasicNewsRecipe):
|
|||||||
articles = []
|
articles = []
|
||||||
soup = self.index_to_soup(feedurl)
|
soup = self.index_to_soup(feedurl)
|
||||||
ritem = soup.find('ul', attrs={'id':'list'})
|
ritem = soup.find('ul', attrs={'id':'list'})
|
||||||
|
if ritem is None:
|
||||||
|
self.log.exception("Page %s skipped: invalid HTML" % (feedtitle if feedtitle else feedurl))
|
||||||
|
continue
|
||||||
for item in reversed(ritem.findAll('li')):
|
for item in reversed(ritem.findAll('li')):
|
||||||
if articlesToGrab < 1:
|
if articlesToGrab < 1:
|
||||||
break
|
break
|
||||||
@ -94,7 +97,12 @@ class Pocket(BasicNewsRecipe):
|
|||||||
self.readList.append(readLink)
|
self.readList.append(readLink)
|
||||||
totalfeeds.append((feedtitle, articles))
|
totalfeeds.append((feedtitle, articles))
|
||||||
if len(self.readList) < self.minimum_articles:
|
if len(self.readList) < self.minimum_articles:
|
||||||
raise Exception("Not enough articles in RIL! Change minimum_articles or add more.")
|
self.mark_as_read_after_dl = False
|
||||||
|
if hasattr(self, 'abort_recipe_processing'):
|
||||||
|
self.abort_recipe_processing("Only %d articles retrieved, minimum_articles not reached" % len(self.readList))
|
||||||
|
else:
|
||||||
|
self.log.exception("Only %d articles retrieved, minimum_articles not reached" % len(self.readList))
|
||||||
|
return []
|
||||||
return totalfeeds
|
return totalfeeds
|
||||||
|
|
||||||
def mark_as_read(self, markList):
|
def mark_as_read(self, markList):
|
||||||
|
22
recipes/revista_summa.recipe
Normal file
22
recipes/revista_summa.recipe
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__author__ = 'Vakya'
|
||||||
|
__version__ = 'v1.0'
|
||||||
|
__date__ = '14, May 2012'
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AdvancedUserRecipe1336226255(BasicNewsRecipe):
|
||||||
|
|
||||||
|
title = u'Revista Summa'
|
||||||
|
publisher = u'Summa'
|
||||||
|
__author__ = 'Vakya'
|
||||||
|
description = 'Informacion regional sobre economia y negocios'
|
||||||
|
language = 'es'
|
||||||
|
|
||||||
|
oldest_article = 15
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
auto_cleanup = True
|
||||||
|
remove_tags_before = dict(name='h1')
|
||||||
|
remove_tags_after = dict(name='label')
|
||||||
|
feeds = [(u'Revista Summa', u'http://www.revistasumma.com/rss/rss-v2.0.rss')]
|
||||||
|
|
@ -1,3 +1,4 @@
|
|||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
@ -15,6 +16,8 @@ class Spiegel_int(BasicNewsRecipe):
|
|||||||
language = 'en_DE'
|
language = 'en_DE'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
|
auto_cleanup = True
|
||||||
|
auto_cleanup_keep = '//*[@id="spArticleTopAsset"]'
|
||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
publisher = 'SPIEGEL ONLINE GmbH'
|
publisher = 'SPIEGEL ONLINE GmbH'
|
||||||
category = 'news, politics, Germany'
|
category = 'news, politics, Germany'
|
||||||
@ -43,25 +46,25 @@ class Spiegel_int(BasicNewsRecipe):
|
|||||||
.spPhotoGallery{font-size:x-small; color:#990000 ;}
|
.spPhotoGallery{font-size:x-small; color:#990000 ;}
|
||||||
'''
|
'''
|
||||||
|
|
||||||
keep_only_tags = [dict(attrs={'id':'spArticleContent'})]
|
#keep_only_tags = [dict(attrs={'id':'spArticleContent'})]
|
||||||
remove_tags_after = dict(attrs={'id':'spArticleBody'})
|
#remove_tags_after = dict(attrs={'id':'spArticleBody'})
|
||||||
remove_tags = [dict(name=['meta','base','iframe','embed','object'])]
|
#remove_tags = [dict(name=['meta','base','iframe','embed','object'])]
|
||||||
remove_attributes = ['clear']
|
#remove_attributes = ['clear']
|
||||||
feeds = [(u'Spiegel Online', u'http://www.spiegel.de/international/index.rss')]
|
feeds = [(u'Spiegel Online', u'http://www.spiegel.de/international/index.rss')]
|
||||||
|
|
||||||
def print_version(self, url):
|
#def print_version(self, url):
|
||||||
main, sep, rest = url.rpartition(',')
|
#main, sep, rest = url.rpartition(',')
|
||||||
rmain, rsep, rrest = main.rpartition(',')
|
#rmain, rsep, rrest = main.rpartition(',')
|
||||||
return rmain + ',druck-' + rrest + ',' + rest
|
#return rmain + ',druck-' + rrest + ',' + rest
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
#def preprocess_html(self, soup):
|
||||||
for item in soup.findAll(style=True):
|
#for item in soup.findAll(style=True):
|
||||||
del item['style']
|
#del item['style']
|
||||||
for item in soup.findAll('a'):
|
#for item in soup.findAll('a'):
|
||||||
if item.string is not None:
|
#if item.string is not None:
|
||||||
str = item.string
|
#str = item.string
|
||||||
item.replaceWith(str)
|
#item.replaceWith(str)
|
||||||
else:
|
#else:
|
||||||
str = self.tag_to_string(item)
|
#str = self.tag_to_string(item)
|
||||||
item.replaceWith(str)
|
#item.replaceWith(str)
|
||||||
return soup
|
#return soup
|
||||||
|
39
recipes/stars_and_stripes.recipe
Normal file
39
recipes/stars_and_stripes.recipe
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
''' Stars and Stripes
|
||||||
|
'''
|
||||||
|
|
||||||
|
|
||||||
|
import re
|
||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
|
class AdvancedUserRecipe1308791026(BasicNewsRecipe):
|
||||||
|
title = u'Stars and Stripes'
|
||||||
|
oldest_article = 3
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
__author__ = 'adoucette'
|
||||||
|
description = 'The U.S. militarys independent news source, featuring exclusive reports from Iraq, Afghanistan, Europe and the Far East.'
|
||||||
|
no_stylesheets = True
|
||||||
|
#delay = 1
|
||||||
|
use_embedded_content = False
|
||||||
|
encoding = 'utf8'
|
||||||
|
publisher = 'stripes.com'
|
||||||
|
category = 'news, US, world'
|
||||||
|
language = 'en_US'
|
||||||
|
publication_type = 'newsportal'
|
||||||
|
preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
|
||||||
|
conversion_options = {
|
||||||
|
'comments' : description
|
||||||
|
,'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
,'linearize_tables': True
|
||||||
|
}
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'class':['element article']})]
|
||||||
|
remove_tags_after = [dict(name='ul', attrs={'class':'inline-bookmarks'})]
|
||||||
|
feeds = [
|
||||||
|
(u'News', u'http://feeds.stripes.com/starsandstripes/news'),
|
||||||
|
(u'Sports', u'http://feeds.stripes.com/starsandstripes/sports'),
|
||||||
|
(u'Military Life', u'http://feeds.stripes.com/starsandstripes/militarylife'),
|
||||||
|
(u'Opinion', u'http://feeds.stripes.com/starsandstripes/opinion'),
|
||||||
|
(u'Travel', u'http://feeds.stripes.com/starsandstripes/travel')
|
||||||
|
]
|
92
recipes/strategic_culture.recipe
Normal file
92
recipes/strategic_culture.recipe
Normal file
@ -0,0 +1,92 @@
|
|||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
|
||||||
|
'''
|
||||||
|
www.strategic-culture.org
|
||||||
|
'''
|
||||||
|
|
||||||
|
import time
|
||||||
|
from calibre import strftime
|
||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
|
class StrategicCulture(BasicNewsRecipe):
|
||||||
|
title = 'Strategic Culture Foundation'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'Online Journal'
|
||||||
|
publisher = 'Strategic Culture Foundation'
|
||||||
|
category = 'news, politics'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
encoding = 'utf-8'
|
||||||
|
use_embedded_content = False
|
||||||
|
language = 'en'
|
||||||
|
publication_type = 'newsportal'
|
||||||
|
masthead_url = 'http://www.strategic-culture.org/img/logo.jpg'
|
||||||
|
extra_css = '''
|
||||||
|
body{font-family: Arial, sans-serif}
|
||||||
|
h1{font-family: "Times New Roman",Times,serif}
|
||||||
|
img{margin-bottom: 0.8em}
|
||||||
|
'''
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name=['h1','p'])
|
||||||
|
,dict(name='div', attrs={'id':'cke_pastebin'})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [dict(name=['object','link','base','meta','iframe'])]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'News' , u'http://www.strategic-culture.org/blocks/news.html' )
|
||||||
|
,(u'Politics' , u'http://www.strategic-culture.org/rubrics/politics.html' )
|
||||||
|
,(u'Economics' , u'http://www.strategic-culture.org/rubrics/economics.html' )
|
||||||
|
,(u'History & Culture', u'http://www.strategic-culture.org/rubrics/history-and-culture.html')
|
||||||
|
,(u'Columnists' , u'http://www.strategic-culture.org/rubrics/columnists.html' )
|
||||||
|
]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return url.replace('-culture.org/news/','-culture.org/pview/')
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
totalfeeds = []
|
||||||
|
lfeeds = self.get_feeds()
|
||||||
|
for feedobj in lfeeds:
|
||||||
|
feedtitle, feedurl = feedobj
|
||||||
|
self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
|
||||||
|
articles = []
|
||||||
|
soup = self.index_to_soup(feedurl)
|
||||||
|
if feedurl.endswith('news.html'):
|
||||||
|
clname = 'sini14'
|
||||||
|
else:
|
||||||
|
clname = 'h22'
|
||||||
|
checker = []
|
||||||
|
for item in soup.findAll('a', attrs={'class':clname}):
|
||||||
|
atag = item
|
||||||
|
url = atag['href']
|
||||||
|
title = self.tag_to_string(atag)
|
||||||
|
description = ''
|
||||||
|
daypart = url.rpartition('/')[0]
|
||||||
|
mpart,sep,day = daypart.rpartition('/')
|
||||||
|
ypart,sep,month = mpart.rpartition('/')
|
||||||
|
year = ypart.rpartition('/')[2]
|
||||||
|
date = strftime("%a, %d %b %Y %H:%M:%S +0000", time.strptime(day + "/" + month + "/" + year, "%d/%m/%Y"))
|
||||||
|
if url not in checker:
|
||||||
|
checker.append(url)
|
||||||
|
articles.append({
|
||||||
|
'title' :title
|
||||||
|
,'date' :date
|
||||||
|
,'url' :url
|
||||||
|
,'description':description
|
||||||
|
})
|
||||||
|
totalfeeds.append((feedtitle, articles))
|
||||||
|
return totalfeeds
|
||||||
|
|
||||||
|
|
BIN
resources/compiled_coffeescript.zip
Normal file
BIN
resources/compiled_coffeescript.zip
Normal file
Binary file not shown.
@ -506,3 +506,17 @@ change_book_details_font_size_by = 0
|
|||||||
# No compile: compile_gpm_templates = False
|
# No compile: compile_gpm_templates = False
|
||||||
compile_gpm_templates = True
|
compile_gpm_templates = True
|
||||||
|
|
||||||
|
#: What format to default to when using the Tweak feature
|
||||||
|
# The Tweak feature of calibre allows direct editing of a book format.
|
||||||
|
# If multiple formats are available, calibre will offer you a choice
|
||||||
|
# of formats, defaulting to your preferred output format if it is available.
|
||||||
|
# Set this tweak to a specific value of 'EPUB' or 'AZW3' to always default
|
||||||
|
# to that format rather than your output format preference.
|
||||||
|
# Set to a value of 'remember' to use whichever format you chose last time you
|
||||||
|
# used the Tweak feature.
|
||||||
|
# Examples:
|
||||||
|
# default_tweak_format = None (Use output format)
|
||||||
|
# default_tweak_format = 'EPUB'
|
||||||
|
# default_tweak_format = 'remember'
|
||||||
|
default_tweak_format = None
|
||||||
|
|
||||||
|
@ -20,7 +20,11 @@ vipy.session.initialize(project_name='calibre', src_dir=src_dir,
|
|||||||
project_dir=project_dir, base_dir=project_dir)
|
project_dir=project_dir, base_dir=project_dir)
|
||||||
|
|
||||||
def recipe_title_callback(raw):
|
def recipe_title_callback(raw):
|
||||||
return eval(raw.decode('utf-8')).replace(' ', '_')
|
try:
|
||||||
|
return eval(raw.decode('utf-8')).replace(u' ', u'_')
|
||||||
|
except:
|
||||||
|
print ('Failed to decode recipe title: %r'%raw)
|
||||||
|
raise
|
||||||
|
|
||||||
vipy.session.add_content_browser('<leader>r', 'Recipe',
|
vipy.session.add_content_browser('<leader>r', 'Recipe',
|
||||||
vipy.session.glob_based_iterator(os.path.join(project_dir, 'recipes', '*.recipe')),
|
vipy.session.glob_based_iterator(os.path.join(project_dir, 'recipes', '*.recipe')),
|
||||||
|
@ -22,7 +22,8 @@ Do not modify it unless you know what you are doing.
|
|||||||
import sys, os
|
import sys, os
|
||||||
|
|
||||||
path = os.environ.get('CALIBRE_PYTHON_PATH', {path!r})
|
path = os.environ.get('CALIBRE_PYTHON_PATH', {path!r})
|
||||||
sys.path.insert(0, path)
|
if path not in sys.path:
|
||||||
|
sys.path.insert(0, path)
|
||||||
|
|
||||||
sys.resources_location = os.environ.get('CALIBRE_RESOURCES_PATH', {resources!r})
|
sys.resources_location = os.environ.get('CALIBRE_RESOURCES_PATH', {resources!r})
|
||||||
sys.extensions_location = os.environ.get('CALIBRE_EXTENSIONS_PATH', {extensions!r})
|
sys.extensions_location = os.environ.get('CALIBRE_EXTENSIONS_PATH', {extensions!r})
|
||||||
|
@ -41,8 +41,8 @@ binary_includes = [
|
|||||||
'/usr/lib/libgthread-2.0.so.0',
|
'/usr/lib/libgthread-2.0.so.0',
|
||||||
'/usr/lib/libpng14.so.14',
|
'/usr/lib/libpng14.so.14',
|
||||||
'/usr/lib/libexslt.so.0',
|
'/usr/lib/libexslt.so.0',
|
||||||
MAGICK_PREFIX+'/lib/libMagickWand.so.4',
|
MAGICK_PREFIX+'/lib/libMagickWand.so.5',
|
||||||
MAGICK_PREFIX+'/lib/libMagickCore.so.4',
|
MAGICK_PREFIX+'/lib/libMagickCore.so.5',
|
||||||
'/usr/lib/libgcrypt.so.11',
|
'/usr/lib/libgcrypt.so.11',
|
||||||
'/usr/lib/libgpg-error.so.0',
|
'/usr/lib/libgpg-error.so.0',
|
||||||
'/usr/lib/libphonon.so.4',
|
'/usr/lib/libphonon.so.4',
|
||||||
|
@ -429,7 +429,7 @@ class Py2App(object):
|
|||||||
def add_imagemagick(self):
|
def add_imagemagick(self):
|
||||||
info('\nAdding ImageMagick')
|
info('\nAdding ImageMagick')
|
||||||
for x in ('Wand', 'Core'):
|
for x in ('Wand', 'Core'):
|
||||||
self.install_dylib(os.path.join(SW, 'lib', 'libMagick%s.4.dylib'%x))
|
self.install_dylib(os.path.join(SW, 'lib', 'libMagick%s.5.dylib'%x))
|
||||||
idir = glob.glob(os.path.join(SW, 'lib', 'ImageMagick-*'))[-1]
|
idir = glob.glob(os.path.join(SW, 'lib', 'ImageMagick-*'))[-1]
|
||||||
dest = os.path.join(self.frameworks_dir, 'ImageMagick')
|
dest = os.path.join(self.frameworks_dir, 'ImageMagick')
|
||||||
if os.path.exists(dest):
|
if os.path.exists(dest):
|
||||||
|
@ -18,7 +18,7 @@ QT_DIR = 'Q:\\Qt\\4.8.1'
|
|||||||
QT_DLLS = ['Core', 'Gui', 'Network', 'Svg', 'WebKit', 'Xml', 'XmlPatterns']
|
QT_DLLS = ['Core', 'Gui', 'Network', 'Svg', 'WebKit', 'Xml', 'XmlPatterns']
|
||||||
LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll'
|
LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll'
|
||||||
SW = r'C:\cygwin\home\kovid\sw'
|
SW = r'C:\cygwin\home\kovid\sw'
|
||||||
IMAGEMAGICK = os.path.join(SW, 'build', 'ImageMagick-6.6.6',
|
IMAGEMAGICK = os.path.join(SW, 'build', 'ImageMagick-6.7.6',
|
||||||
'VisualMagick', 'bin')
|
'VisualMagick', 'bin')
|
||||||
CRT = r'C:\Microsoft.VC90.CRT'
|
CRT = r'C:\Microsoft.VC90.CRT'
|
||||||
|
|
||||||
|
@ -336,6 +336,8 @@ Index: src/PdfFiltersPrivate.cpp
|
|||||||
ImageMagick
|
ImageMagick
|
||||||
--------------
|
--------------
|
||||||
|
|
||||||
|
Get the source from: http://www.imagemagick.org/download/windows/ImageMagick-windows.zip
|
||||||
|
|
||||||
Edit VisualMagick/configure/configure.cpp to set
|
Edit VisualMagick/configure/configure.cpp to set
|
||||||
|
|
||||||
int projectType = MULTITHREADEDDLL;
|
int projectType = MULTITHREADEDDLL;
|
||||||
@ -349,7 +351,10 @@ Edit magick/magick-config.h
|
|||||||
Undefine ProvideDllMain and MAGICKCORE_X11_DELEGATE
|
Undefine ProvideDllMain and MAGICKCORE_X11_DELEGATE
|
||||||
|
|
||||||
Now open VisualMagick/VisualDynamicMT.sln set to Release
|
Now open VisualMagick/VisualDynamicMT.sln set to Release
|
||||||
Remove the CORE_xlib and UTIL_Imdisplay project CORE_Magick++
|
Remove the CORE_xlib, UTIL_Imdisplay and CORE_Magick++ projects.
|
||||||
|
|
||||||
|
F7 for build project, you will get one error due to the removal of xlib, ignore
|
||||||
|
it.
|
||||||
|
|
||||||
calibre
|
calibre
|
||||||
---------
|
---------
|
||||||
|
@ -12,14 +12,14 @@ msgstr ""
|
|||||||
"Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
|
"Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
|
||||||
"devel@lists.alioth.debian.org>\n"
|
"devel@lists.alioth.debian.org>\n"
|
||||||
"POT-Creation-Date: 2011-11-25 14:01+0000\n"
|
"POT-Creation-Date: 2011-11-25 14:01+0000\n"
|
||||||
"PO-Revision-Date: 2012-04-28 10:42+0000\n"
|
"PO-Revision-Date: 2012-05-03 16:09+0000\n"
|
||||||
"Last-Translator: Ferran Rius <frius64@hotmail.com>\n"
|
"Last-Translator: Dídac Rios <didac@niorcs.com>\n"
|
||||||
"Language-Team: Catalan <linux@softcatala.org>\n"
|
"Language-Team: Catalan <linux@softcatala.org>\n"
|
||||||
"MIME-Version: 1.0\n"
|
"MIME-Version: 1.0\n"
|
||||||
"Content-Type: text/plain; charset=UTF-8\n"
|
"Content-Type: text/plain; charset=UTF-8\n"
|
||||||
"Content-Transfer-Encoding: 8bit\n"
|
"Content-Transfer-Encoding: 8bit\n"
|
||||||
"X-Launchpad-Export-Date: 2012-04-29 04:45+0000\n"
|
"X-Launchpad-Export-Date: 2012-05-04 04:47+0000\n"
|
||||||
"X-Generator: Launchpad (build 15149)\n"
|
"X-Generator: Launchpad (build 15195)\n"
|
||||||
"Language: ca\n"
|
"Language: ca\n"
|
||||||
|
|
||||||
#. name for aaa
|
#. name for aaa
|
||||||
@ -9936,11 +9936,11 @@ msgstr "Ibani"
|
|||||||
|
|
||||||
#. name for ica
|
#. name for ica
|
||||||
msgid "Ede Ica"
|
msgid "Ede Ica"
|
||||||
msgstr ""
|
msgstr "Ede Ica"
|
||||||
|
|
||||||
#. name for ich
|
#. name for ich
|
||||||
msgid "Etkywan"
|
msgid "Etkywan"
|
||||||
msgstr ""
|
msgstr "Etkywan"
|
||||||
|
|
||||||
#. name for icl
|
#. name for icl
|
||||||
msgid "Icelandic Sign Language"
|
msgid "Icelandic Sign Language"
|
||||||
@ -9952,7 +9952,7 @@ msgstr "Anglès crioll; Islander"
|
|||||||
|
|
||||||
#. name for ida
|
#. name for ida
|
||||||
msgid "Idakho-Isukha-Tiriki"
|
msgid "Idakho-Isukha-Tiriki"
|
||||||
msgstr ""
|
msgstr "Idakho-Isukha-Tiriki"
|
||||||
|
|
||||||
#. name for idb
|
#. name for idb
|
||||||
msgid "Indo-Portuguese"
|
msgid "Indo-Portuguese"
|
||||||
@ -9960,15 +9960,15 @@ msgstr "Indo-portuguès"
|
|||||||
|
|
||||||
#. name for idc
|
#. name for idc
|
||||||
msgid "Idon"
|
msgid "Idon"
|
||||||
msgstr ""
|
msgstr "Idon"
|
||||||
|
|
||||||
#. name for idd
|
#. name for idd
|
||||||
msgid "Ede Idaca"
|
msgid "Ede Idaca"
|
||||||
msgstr ""
|
msgstr "Ede Idaca"
|
||||||
|
|
||||||
#. name for ide
|
#. name for ide
|
||||||
msgid "Idere"
|
msgid "Idere"
|
||||||
msgstr ""
|
msgstr "Idere"
|
||||||
|
|
||||||
#. name for idi
|
#. name for idi
|
||||||
msgid "Idi"
|
msgid "Idi"
|
||||||
@ -9976,43 +9976,43 @@ msgstr ""
|
|||||||
|
|
||||||
#. name for ido
|
#. name for ido
|
||||||
msgid "Ido"
|
msgid "Ido"
|
||||||
msgstr ""
|
msgstr "ido"
|
||||||
|
|
||||||
#. name for idr
|
#. name for idr
|
||||||
msgid "Indri"
|
msgid "Indri"
|
||||||
msgstr ""
|
msgstr "Indri"
|
||||||
|
|
||||||
#. name for ids
|
#. name for ids
|
||||||
msgid "Idesa"
|
msgid "Idesa"
|
||||||
msgstr ""
|
msgstr "Idesa"
|
||||||
|
|
||||||
#. name for idt
|
#. name for idt
|
||||||
msgid "Idaté"
|
msgid "Idaté"
|
||||||
msgstr ""
|
msgstr "Idaté"
|
||||||
|
|
||||||
#. name for idu
|
#. name for idu
|
||||||
msgid "Idoma"
|
msgid "Idoma"
|
||||||
msgstr ""
|
msgstr "Idoma"
|
||||||
|
|
||||||
#. name for ifa
|
#. name for ifa
|
||||||
msgid "Ifugao; Amganad"
|
msgid "Ifugao; Amganad"
|
||||||
msgstr ""
|
msgstr "Ifugao; Amganad"
|
||||||
|
|
||||||
#. name for ifb
|
#. name for ifb
|
||||||
msgid "Ifugao; Batad"
|
msgid "Ifugao; Batad"
|
||||||
msgstr ""
|
msgstr "Ifugao; Batad"
|
||||||
|
|
||||||
#. name for ife
|
#. name for ife
|
||||||
msgid "Ifè"
|
msgid "Ifè"
|
||||||
msgstr ""
|
msgstr "Ifè"
|
||||||
|
|
||||||
#. name for iff
|
#. name for iff
|
||||||
msgid "Ifo"
|
msgid "Ifo"
|
||||||
msgstr ""
|
msgstr "Ifo"
|
||||||
|
|
||||||
#. name for ifk
|
#. name for ifk
|
||||||
msgid "Ifugao; Tuwali"
|
msgid "Ifugao; Tuwali"
|
||||||
msgstr ""
|
msgstr "Ifugao; Tuwali"
|
||||||
|
|
||||||
#. name for ifm
|
#. name for ifm
|
||||||
msgid "Teke-Fuumu"
|
msgid "Teke-Fuumu"
|
||||||
@ -10020,15 +10020,15 @@ msgstr "Teke; Fuumu"
|
|||||||
|
|
||||||
#. name for ifu
|
#. name for ifu
|
||||||
msgid "Ifugao; Mayoyao"
|
msgid "Ifugao; Mayoyao"
|
||||||
msgstr ""
|
msgstr "Ifugao; Mayoyao"
|
||||||
|
|
||||||
#. name for ify
|
#. name for ify
|
||||||
msgid "Kallahan; Keley-I"
|
msgid "Kallahan; Keley-I"
|
||||||
msgstr ""
|
msgstr "Kallahan; Keley-I"
|
||||||
|
|
||||||
#. name for igb
|
#. name for igb
|
||||||
msgid "Ebira"
|
msgid "Ebira"
|
||||||
msgstr ""
|
msgstr "Ebira"
|
||||||
|
|
||||||
#. name for ige
|
#. name for ige
|
||||||
msgid "Igede"
|
msgid "Igede"
|
||||||
|
@ -8,14 +8,14 @@ msgstr ""
|
|||||||
"Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
|
"Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
|
||||||
"devel@lists.alioth.debian.org>\n"
|
"devel@lists.alioth.debian.org>\n"
|
||||||
"POT-Creation-Date: 2011-11-25 14:01+0000\n"
|
"POT-Creation-Date: 2011-11-25 14:01+0000\n"
|
||||||
"PO-Revision-Date: 2012-03-25 12:19+0000\n"
|
"PO-Revision-Date: 2012-05-03 14:49+0000\n"
|
||||||
"Last-Translator: Radan Putnik <srastral@gmail.com>\n"
|
"Last-Translator: Иван Старчевић <ivanstar61@gmail.com>\n"
|
||||||
"Language-Team: Serbian <gnu@prevod.org>\n"
|
"Language-Team: Serbian <gnu@prevod.org>\n"
|
||||||
"MIME-Version: 1.0\n"
|
"MIME-Version: 1.0\n"
|
||||||
"Content-Type: text/plain; charset=UTF-8\n"
|
"Content-Type: text/plain; charset=UTF-8\n"
|
||||||
"Content-Transfer-Encoding: 8bit\n"
|
"Content-Transfer-Encoding: 8bit\n"
|
||||||
"X-Launchpad-Export-Date: 2012-03-26 04:37+0000\n"
|
"X-Launchpad-Export-Date: 2012-05-04 04:47+0000\n"
|
||||||
"X-Generator: Launchpad (build 15008)\n"
|
"X-Generator: Launchpad (build 15195)\n"
|
||||||
"Language: sr\n"
|
"Language: sr\n"
|
||||||
|
|
||||||
#. name for aaa
|
#. name for aaa
|
||||||
@ -6152,7 +6152,7 @@ msgstr ""
|
|||||||
|
|
||||||
#. name for deu
|
#. name for deu
|
||||||
msgid "German"
|
msgid "German"
|
||||||
msgstr "немачки"
|
msgstr "Немачки"
|
||||||
|
|
||||||
#. name for dev
|
#. name for dev
|
||||||
msgid "Domung"
|
msgid "Domung"
|
||||||
@ -8416,7 +8416,7 @@ msgstr "ирски"
|
|||||||
|
|
||||||
#. name for glg
|
#. name for glg
|
||||||
msgid "Galician"
|
msgid "Galician"
|
||||||
msgstr ""
|
msgstr "Галицијски"
|
||||||
|
|
||||||
#. name for glh
|
#. name for glh
|
||||||
msgid "Pashayi; Northwest"
|
msgid "Pashayi; Northwest"
|
||||||
@ -8472,11 +8472,11 @@ msgstr ""
|
|||||||
|
|
||||||
#. name for gmh
|
#. name for gmh
|
||||||
msgid "German; Middle High (ca. 1050-1500)"
|
msgid "German; Middle High (ca. 1050-1500)"
|
||||||
msgstr ""
|
msgstr "Немачки; средње високи (ca. 1050-1500)"
|
||||||
|
|
||||||
#. name for gml
|
#. name for gml
|
||||||
msgid "German; Middle Low"
|
msgid "German; Middle Low"
|
||||||
msgstr ""
|
msgstr "Немачки; средње низак"
|
||||||
|
|
||||||
#. name for gmm
|
#. name for gmm
|
||||||
msgid "Gbaya-Mbodomo"
|
msgid "Gbaya-Mbodomo"
|
||||||
@ -8792,7 +8792,7 @@ msgstr ""
|
|||||||
|
|
||||||
#. name for gsg
|
#. name for gsg
|
||||||
msgid "German Sign Language"
|
msgid "German Sign Language"
|
||||||
msgstr ""
|
msgstr "Немачки језик"
|
||||||
|
|
||||||
#. name for gsl
|
#. name for gsl
|
||||||
msgid "Gusilay"
|
msgid "Gusilay"
|
||||||
@ -8820,7 +8820,7 @@ msgstr ""
|
|||||||
|
|
||||||
#. name for gsw
|
#. name for gsw
|
||||||
msgid "German; Swiss"
|
msgid "German; Swiss"
|
||||||
msgstr ""
|
msgstr "Немачки ; Швајцарска"
|
||||||
|
|
||||||
#. name for gta
|
#. name for gta
|
||||||
msgid "Guató"
|
msgid "Guató"
|
||||||
@ -17954,7 +17954,7 @@ msgstr ""
|
|||||||
|
|
||||||
#. name for nds
|
#. name for nds
|
||||||
msgid "German; Low"
|
msgid "German; Low"
|
||||||
msgstr ""
|
msgstr "Немачки; низак"
|
||||||
|
|
||||||
#. name for ndt
|
#. name for ndt
|
||||||
msgid "Ndunga"
|
msgid "Ndunga"
|
||||||
@ -18778,7 +18778,7 @@ msgstr ""
|
|||||||
|
|
||||||
#. name for nno
|
#. name for nno
|
||||||
msgid "Norwegian Nynorsk"
|
msgid "Norwegian Nynorsk"
|
||||||
msgstr "норвешки модерни"
|
msgstr "Норвешки модерни"
|
||||||
|
|
||||||
#. name for nnp
|
#. name for nnp
|
||||||
msgid "Naga; Wancho"
|
msgid "Naga; Wancho"
|
||||||
@ -18830,7 +18830,7 @@ msgstr ""
|
|||||||
|
|
||||||
#. name for nob
|
#. name for nob
|
||||||
msgid "Norwegian Bokmål"
|
msgid "Norwegian Bokmål"
|
||||||
msgstr ""
|
msgstr "Норвешки (књижевни)"
|
||||||
|
|
||||||
#. name for noc
|
#. name for noc
|
||||||
msgid "Nuk"
|
msgid "Nuk"
|
||||||
@ -18886,7 +18886,7 @@ msgstr ""
|
|||||||
|
|
||||||
#. name for nor
|
#. name for nor
|
||||||
msgid "Norwegian"
|
msgid "Norwegian"
|
||||||
msgstr "норвешки"
|
msgstr "Норвешки"
|
||||||
|
|
||||||
#. name for nos
|
#. name for nos
|
||||||
msgid "Nisu; Eastern"
|
msgid "Nisu; Eastern"
|
||||||
@ -19066,7 +19066,7 @@ msgstr ""
|
|||||||
|
|
||||||
#. name for nsl
|
#. name for nsl
|
||||||
msgid "Norwegian Sign Language"
|
msgid "Norwegian Sign Language"
|
||||||
msgstr ""
|
msgstr "Норвешки језик"
|
||||||
|
|
||||||
#. name for nsm
|
#. name for nsm
|
||||||
msgid "Naga; Sumi"
|
msgid "Naga; Sumi"
|
||||||
@ -20406,7 +20406,7 @@ msgstr ""
|
|||||||
|
|
||||||
#. name for pdc
|
#. name for pdc
|
||||||
msgid "German; Pennsylvania"
|
msgid "German; Pennsylvania"
|
||||||
msgstr ""
|
msgstr "Немачки ; Пенсилванија"
|
||||||
|
|
||||||
#. name for pdi
|
#. name for pdi
|
||||||
msgid "Pa Di"
|
msgid "Pa Di"
|
||||||
@ -22086,7 +22086,7 @@ msgstr ""
|
|||||||
|
|
||||||
#. name for rmg
|
#. name for rmg
|
||||||
msgid "Norwegian; Traveller"
|
msgid "Norwegian; Traveller"
|
||||||
msgstr ""
|
msgstr "Норвешки; путнички"
|
||||||
|
|
||||||
#. name for rmh
|
#. name for rmh
|
||||||
msgid "Murkim"
|
msgid "Murkim"
|
||||||
@ -22871,7 +22871,7 @@ msgstr ""
|
|||||||
|
|
||||||
#. name for sgg
|
#. name for sgg
|
||||||
msgid "Swiss-German Sign Language"
|
msgid "Swiss-German Sign Language"
|
||||||
msgstr ""
|
msgstr "Швајцарско-Немачки језик"
|
||||||
|
|
||||||
#. name for sgh
|
#. name for sgh
|
||||||
msgid "Shughni"
|
msgid "Shughni"
|
||||||
|
@ -26,7 +26,7 @@ def get_opts_from_parser(parser):
|
|||||||
class Coffee(Command): # {{{
|
class Coffee(Command): # {{{
|
||||||
|
|
||||||
description = 'Compile coffeescript files into javascript'
|
description = 'Compile coffeescript files into javascript'
|
||||||
COFFEE_DIRS = {'ebooks/oeb/display': 'display'}
|
COFFEE_DIRS = ('ebooks/oeb/display',)
|
||||||
|
|
||||||
def add_options(self, parser):
|
def add_options(self, parser):
|
||||||
parser.add_option('--watch', '-w', action='store_true', default=False,
|
parser.add_option('--watch', '-w', action='store_true', default=False,
|
||||||
@ -47,49 +47,69 @@ class Coffee(Command): # {{{
|
|||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def show_js(self, jsfile):
|
def show_js(self, raw):
|
||||||
from pygments.lexers import JavascriptLexer
|
from pygments.lexers import JavascriptLexer
|
||||||
from pygments.formatters import TerminalFormatter
|
from pygments.formatters import TerminalFormatter
|
||||||
from pygments import highlight
|
from pygments import highlight
|
||||||
with open(jsfile, 'rb') as f:
|
|
||||||
raw = f.read()
|
|
||||||
print highlight(raw, JavascriptLexer(), TerminalFormatter())
|
print highlight(raw, JavascriptLexer(), TerminalFormatter())
|
||||||
|
|
||||||
def do_coffee_compile(self, opts, timestamp=False, ignore_errors=False):
|
def do_coffee_compile(self, opts, timestamp=False, ignore_errors=False):
|
||||||
for toplevel, dest in self.COFFEE_DIRS.iteritems():
|
src_files = {}
|
||||||
dest = self.j(self.RESOURCES, dest)
|
for src in self.COFFEE_DIRS:
|
||||||
for x in glob.glob(self.j(self.SRC, __appname__, toplevel, '*.coffee')):
|
for f in glob.glob(self.j(self.SRC, __appname__, src,
|
||||||
js = self.j(dest, os.path.basename(x.rpartition('.')[0]+'.js'))
|
'*.coffee')):
|
||||||
if self.newer(js, x):
|
bn = os.path.basename(f).rpartition('.')[0]
|
||||||
print ('\t%sCompiling %s'%(time.strftime('[%H:%M:%S] ') if
|
arcname = src.replace('/', '.') + '.' + bn + '.js'
|
||||||
timestamp else '', os.path.basename(x)))
|
src_files[arcname] = (f, os.stat(f).st_mtime)
|
||||||
try:
|
|
||||||
cs = subprocess.check_output(self.compiler +
|
existing = {}
|
||||||
[x]).decode('utf-8')
|
dest = self.j(self.RESOURCES, 'compiled_coffeescript.zip')
|
||||||
except Exception as e:
|
if os.path.exists(dest):
|
||||||
print ('\n\tCompilation of %s failed'%os.path.basename(x))
|
with zipfile.ZipFile(dest, 'r') as zf:
|
||||||
print (e)
|
for info in zf.infolist():
|
||||||
if ignore_errors:
|
mtime = time.mktime(info.date_time + (0, 0, -1))
|
||||||
with open(js, 'wb') as f:
|
arcname = info.filename
|
||||||
f.write('# Compilation from coffeescript failed')
|
if (arcname in src_files and src_files[arcname][1] <
|
||||||
else:
|
mtime):
|
||||||
raise SystemExit(1)
|
existing[arcname] = (zf.read(info), info)
|
||||||
else:
|
|
||||||
with open(js, 'wb') as f:
|
todo = set(src_files) - set(existing)
|
||||||
f.write(cs.encode('utf-8'))
|
updated = {}
|
||||||
if opts.show_js:
|
for arcname in todo:
|
||||||
self.show_js(js)
|
name = arcname.rpartition('.')[0]
|
||||||
print ('#'*80)
|
print ('\t%sCompiling %s'%(time.strftime('[%H:%M:%S] ') if
|
||||||
print ('#'*80)
|
timestamp else '', name))
|
||||||
|
src = src_files[arcname][0]
|
||||||
|
try:
|
||||||
|
js = subprocess.check_output(self.compiler +
|
||||||
|
[src]).decode('utf-8')
|
||||||
|
except Exception as e:
|
||||||
|
print ('\n\tCompilation of %s failed'%name)
|
||||||
|
print (e)
|
||||||
|
if ignore_errors:
|
||||||
|
js = u'# Compilation from coffeescript failed'
|
||||||
|
else:
|
||||||
|
raise SystemExit(1)
|
||||||
|
else:
|
||||||
|
if opts.show_js:
|
||||||
|
self.show_js(js)
|
||||||
|
print ('#'*80)
|
||||||
|
print ('#'*80)
|
||||||
|
zi = zipfile.ZipInfo()
|
||||||
|
zi.filename = arcname
|
||||||
|
zi.date_time = time.localtime()[:6]
|
||||||
|
updated[arcname] = (js.encode('utf-8'), zi)
|
||||||
|
if updated:
|
||||||
|
with zipfile.ZipFile(dest, 'w', zipfile.ZIP_STORED) as zf:
|
||||||
|
for raw, zi in updated.itervalues():
|
||||||
|
zf.writestr(zi, raw)
|
||||||
|
for raw, zi in existing.itervalues():
|
||||||
|
zf.writestr(zi, raw)
|
||||||
|
|
||||||
def clean(self):
|
def clean(self):
|
||||||
for toplevel, dest in self.COFFEE_DIRS.iteritems():
|
x = self.j(self.RESOURCES, 'compiled_coffeescript.zip')
|
||||||
dest = self.j(self.RESOURCES, dest)
|
if os.path.exists(x):
|
||||||
for x in glob.glob(self.j(self.SRC, __appname__, toplevel, '*.coffee')):
|
os.remove(x)
|
||||||
x = x.rpartition('.')[0] + '.js'
|
|
||||||
x = self.j(dest, os.path.basename(x))
|
|
||||||
if os.path.exists(x):
|
|
||||||
os.remove(x)
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
class Kakasi(Command): # {{{
|
class Kakasi(Command): # {{{
|
||||||
|
@ -4,7 +4,7 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
__appname__ = u'calibre'
|
__appname__ = u'calibre'
|
||||||
numeric_version = (0, 8, 50)
|
numeric_version = (0, 8, 51)
|
||||||
__version__ = u'.'.join(map(unicode, numeric_version))
|
__version__ = u'.'.join(map(unicode, numeric_version))
|
||||||
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"
|
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"
|
||||||
|
|
||||||
|
@ -302,7 +302,9 @@ class OutputFormatPlugin(Plugin):
|
|||||||
|
|
||||||
:param item: The item (HTML file) being processed
|
:param item: The item (HTML file) being processed
|
||||||
:param stylizer: A Stylizer object containing the flattened styles for
|
:param stylizer: A Stylizer object containing the flattened styles for
|
||||||
item. You can get the style for any element by stylizer.style(element).
|
item. You can get the style for any element by
|
||||||
|
stylizer.style(element).
|
||||||
|
|
||||||
'''
|
'''
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@ -57,6 +57,7 @@ class ANDROID(USBMS):
|
|||||||
0x4316 : [0x216],
|
0x4316 : [0x216],
|
||||||
0x42d6 : [0x216],
|
0x42d6 : [0x216],
|
||||||
0x42d7 : [0x216],
|
0x42d7 : [0x216],
|
||||||
|
0x42f7 : [0x216],
|
||||||
},
|
},
|
||||||
# Freescale
|
# Freescale
|
||||||
0x15a2 : {
|
0x15a2 : {
|
||||||
@ -193,7 +194,7 @@ class ANDROID(USBMS):
|
|||||||
'GT-I9003_CARD', 'XT912', 'FILE-CD_GADGET', 'RK29_SDK', 'MB855',
|
'GT-I9003_CARD', 'XT912', 'FILE-CD_GADGET', 'RK29_SDK', 'MB855',
|
||||||
'XT910', 'BOOK_A10', 'USB_2.0_DRIVER', 'I9100T', 'P999DW',
|
'XT910', 'BOOK_A10', 'USB_2.0_DRIVER', 'I9100T', 'P999DW',
|
||||||
'KTABLET_PC', 'INGENIC', 'GT-I9001_CARD', 'USB_2.0_DRIVER',
|
'KTABLET_PC', 'INGENIC', 'GT-I9001_CARD', 'USB_2.0_DRIVER',
|
||||||
'GT-S5830L_CARD', 'UNIVERSE']
|
'GT-S5830L_CARD', 'UNIVERSE', 'XT875']
|
||||||
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
|
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
|
||||||
'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
|
'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
|
||||||
'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
|
'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
|
||||||
@ -201,7 +202,8 @@ class ANDROID(USBMS):
|
|||||||
'ANDROID_MID', 'P990_SD_CARD', '.K080', 'LTE_CARD', 'MB853',
|
'ANDROID_MID', 'P990_SD_CARD', '.K080', 'LTE_CARD', 'MB853',
|
||||||
'A1-07___C0541A4F', 'XT912', 'MB855', 'XT910', 'BOOK_A10_CARD',
|
'A1-07___C0541A4F', 'XT912', 'MB855', 'XT910', 'BOOK_A10_CARD',
|
||||||
'USB_2.0_DRIVER', 'I9100T', 'P999DW_SD_CARD', 'KTABLET_PC',
|
'USB_2.0_DRIVER', 'I9100T', 'P999DW_SD_CARD', 'KTABLET_PC',
|
||||||
'FILE-CD_GADGET', 'GT-I9001_CARD', 'USB_2.0_DRIVER']
|
'FILE-CD_GADGET', 'GT-I9001_CARD', 'USB_2.0_DRIVER', 'XT875',
|
||||||
|
'UMS_COMPOSITE']
|
||||||
|
|
||||||
OSX_MAIN_MEM = 'Android Device Main Memory'
|
OSX_MAIN_MEM = 'Android Device Main Memory'
|
||||||
|
|
||||||
|
@ -92,6 +92,10 @@ class POCKETBOOK360(EB600):
|
|||||||
name = 'PocketBook 360 Device Interface'
|
name = 'PocketBook 360 Device Interface'
|
||||||
|
|
||||||
gui_name = 'PocketBook 360'
|
gui_name = 'PocketBook 360'
|
||||||
|
VENDOR_ID = [0x1f85, 0x525]
|
||||||
|
PRODUCT_ID = [0x1688, 0xa4a5]
|
||||||
|
BCD = [0x110]
|
||||||
|
|
||||||
|
|
||||||
FORMATS = ['epub', 'fb2', 'prc', 'mobi', 'pdf', 'djvu', 'rtf', 'chm', 'txt']
|
FORMATS = ['epub', 'fb2', 'prc', 'mobi', 'pdf', 'djvu', 'rtf', 'chm', 'txt']
|
||||||
|
|
||||||
|
@ -1,4 +1,25 @@
|
|||||||
from __future__ import with_statement
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
__license__ = 'GPL 3'
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
|
||||||
|
class ConversionUserFeedBack(Exception):
|
||||||
|
|
||||||
|
def __init__(self, title, msg, level='info', det_msg=''):
|
||||||
|
''' Show a simple message to the user
|
||||||
|
|
||||||
|
:param title: The title (very short description)
|
||||||
|
:param msg: The message to show the user
|
||||||
|
:param level: Must be one of 'info', 'warn' or 'error'
|
||||||
|
:param det_msg: Optional detailed message to show the user
|
||||||
|
'''
|
||||||
|
import json
|
||||||
|
Exception.__init__(self, json.dumps({'msg':msg, 'level':level,
|
||||||
|
'det_msg':det_msg, 'title':title}))
|
||||||
|
self.title, self.msg, self.det_msg = title, msg, det_msg
|
||||||
|
self.level = level
|
||||||
|
|
||||||
|
@ -15,6 +15,7 @@ from calibre.utils.logging import Log
|
|||||||
from calibre.constants import preferred_encoding
|
from calibre.constants import preferred_encoding
|
||||||
from calibre.customize.conversion import OptionRecommendation
|
from calibre.customize.conversion import OptionRecommendation
|
||||||
from calibre import patheq
|
from calibre import patheq
|
||||||
|
from calibre.ebooks.conversion import ConversionUserFeedBack
|
||||||
|
|
||||||
USAGE = '%prog ' + _('''\
|
USAGE = '%prog ' + _('''\
|
||||||
input_file output_file [options]
|
input_file output_file [options]
|
||||||
@ -304,7 +305,10 @@ def read_sr_patterns(path, log=None):
|
|||||||
def main(args=sys.argv):
|
def main(args=sys.argv):
|
||||||
log = Log()
|
log = Log()
|
||||||
parser, plumber = create_option_parser(args, log)
|
parser, plumber = create_option_parser(args, log)
|
||||||
opts = parser.parse_args(args)[0]
|
opts, leftover_args = parser.parse_args(args)
|
||||||
|
if len(leftover_args) > 3:
|
||||||
|
log.error('Extra arguments not understood:', u', '.join(leftover_args[3:]))
|
||||||
|
return 1
|
||||||
for x in ('read_metadata_from_opf', 'cover'):
|
for x in ('read_metadata_from_opf', 'cover'):
|
||||||
if getattr(opts, x, None) is not None:
|
if getattr(opts, x, None) is not None:
|
||||||
setattr(opts, x, abspath(getattr(opts, x)))
|
setattr(opts, x, abspath(getattr(opts, x)))
|
||||||
@ -317,7 +321,16 @@ def main(args=sys.argv):
|
|||||||
if n.dest]
|
if n.dest]
|
||||||
plumber.merge_ui_recommendations(recommendations)
|
plumber.merge_ui_recommendations(recommendations)
|
||||||
|
|
||||||
plumber.run()
|
try:
|
||||||
|
plumber.run()
|
||||||
|
except ConversionUserFeedBack as e:
|
||||||
|
ll = {'info': log.info, 'warn': log.warn,
|
||||||
|
'error':log.error}.get(e.level, log.info)
|
||||||
|
ll(e.title)
|
||||||
|
if e.det_msg:
|
||||||
|
log.debug(e.detmsg)
|
||||||
|
ll(e.msg)
|
||||||
|
raise SystemExit(1)
|
||||||
|
|
||||||
log(_('Output saved to'), ' ', plumber.output)
|
log(_('Output saved to'), ' ', plumber.output)
|
||||||
|
|
||||||
|
@ -207,7 +207,7 @@ class EPUBInput(InputFormatPlugin):
|
|||||||
if rc:
|
if rc:
|
||||||
cover_toc_item = None
|
cover_toc_item = None
|
||||||
for item in oeb.toc.iterdescendants():
|
for item in oeb.toc.iterdescendants():
|
||||||
if item.href == rc:
|
if item.href and item.href.partition('#')[0] == rc:
|
||||||
cover_toc_item = item
|
cover_toc_item = item
|
||||||
break
|
break
|
||||||
spine = {x.href for x in oeb.spine}
|
spine = {x.href for x in oeb.spine}
|
||||||
|
@ -393,8 +393,14 @@ class EPUBOutput(OutputFormatPlugin):
|
|||||||
for tag in XPath('//h:body/descendant::h:script')(root):
|
for tag in XPath('//h:body/descendant::h:script')(root):
|
||||||
tag.getparent().remove(tag)
|
tag.getparent().remove(tag)
|
||||||
|
|
||||||
|
formchildren = XPath('./h:input|./h:button|./h:textarea|'
|
||||||
|
'./h:label|./h:fieldset|./h:legend')
|
||||||
for tag in XPath('//h:form')(root):
|
for tag in XPath('//h:form')(root):
|
||||||
tag.getparent().remove(tag)
|
if formchildren(tag):
|
||||||
|
tag.getparent().remove(tag)
|
||||||
|
else:
|
||||||
|
# Not a real form
|
||||||
|
tag.tag = XHTML('div')
|
||||||
|
|
||||||
for tag in XPath('//h:center')(root):
|
for tag in XPath('//h:center')(root):
|
||||||
tag.tag = XHTML('div')
|
tag.tag = XHTML('div')
|
||||||
|
@ -12,7 +12,7 @@ class MOBIInput(InputFormatPlugin):
|
|||||||
name = 'MOBI Input'
|
name = 'MOBI Input'
|
||||||
author = 'Kovid Goyal'
|
author = 'Kovid Goyal'
|
||||||
description = 'Convert MOBI files (.mobi, .prc, .azw) to HTML'
|
description = 'Convert MOBI files (.mobi, .prc, .azw) to HTML'
|
||||||
file_types = set(['mobi', 'prc', 'azw', 'azw3'])
|
file_types = set(['mobi', 'prc', 'azw', 'azw3', 'pobi'])
|
||||||
|
|
||||||
def convert(self, stream, options, file_ext, log,
|
def convert(self, stream, options, file_ext, log,
|
||||||
accelerators):
|
accelerators):
|
||||||
|
@ -343,21 +343,25 @@ OptionRecommendation(name='remove_fake_margins',
|
|||||||
OptionRecommendation(name='margin_top',
|
OptionRecommendation(name='margin_top',
|
||||||
recommended_value=5.0, level=OptionRecommendation.LOW,
|
recommended_value=5.0, level=OptionRecommendation.LOW,
|
||||||
help=_('Set the top margin in pts. Default is %default. '
|
help=_('Set the top margin in pts. Default is %default. '
|
||||||
|
'Setting this to less than zero will cause no margin to be set. '
|
||||||
'Note: 72 pts equals 1 inch')),
|
'Note: 72 pts equals 1 inch')),
|
||||||
|
|
||||||
OptionRecommendation(name='margin_bottom',
|
OptionRecommendation(name='margin_bottom',
|
||||||
recommended_value=5.0, level=OptionRecommendation.LOW,
|
recommended_value=5.0, level=OptionRecommendation.LOW,
|
||||||
help=_('Set the bottom margin in pts. Default is %default. '
|
help=_('Set the bottom margin in pts. Default is %default. '
|
||||||
|
'Setting this to less than zero will cause no margin to be set. '
|
||||||
'Note: 72 pts equals 1 inch')),
|
'Note: 72 pts equals 1 inch')),
|
||||||
|
|
||||||
OptionRecommendation(name='margin_left',
|
OptionRecommendation(name='margin_left',
|
||||||
recommended_value=5.0, level=OptionRecommendation.LOW,
|
recommended_value=5.0, level=OptionRecommendation.LOW,
|
||||||
help=_('Set the left margin in pts. Default is %default. '
|
help=_('Set the left margin in pts. Default is %default. '
|
||||||
|
'Setting this to less than zero will cause no margin to be set. '
|
||||||
'Note: 72 pts equals 1 inch')),
|
'Note: 72 pts equals 1 inch')),
|
||||||
|
|
||||||
OptionRecommendation(name='margin_right',
|
OptionRecommendation(name='margin_right',
|
||||||
recommended_value=5.0, level=OptionRecommendation.LOW,
|
recommended_value=5.0, level=OptionRecommendation.LOW,
|
||||||
help=_('Set the right margin in pts. Default is %default. '
|
help=_('Set the right margin in pts. Default is %default. '
|
||||||
|
'Setting this to less than zero will cause no margin to be set. '
|
||||||
'Note: 72 pts equals 1 inch')),
|
'Note: 72 pts equals 1 inch')),
|
||||||
|
|
||||||
OptionRecommendation(name='change_justification',
|
OptionRecommendation(name='change_justification',
|
||||||
@ -885,7 +889,10 @@ OptionRecommendation(name='search_replace',
|
|||||||
self.log.debug('Resolved conversion options')
|
self.log.debug('Resolved conversion options')
|
||||||
try:
|
try:
|
||||||
self.log.debug('calibre version:', __version__)
|
self.log.debug('calibre version:', __version__)
|
||||||
self.log.debug(pprint.pformat(self.opts.__dict__))
|
odict = dict(self.opts.__dict__)
|
||||||
|
for x in ('username', 'password'):
|
||||||
|
odict.pop(x, None)
|
||||||
|
self.log.debug(pprint.pformat(odict))
|
||||||
except:
|
except:
|
||||||
self.log.exception('Failed to get resolved conversion options')
|
self.log.exception('Failed to get resolved conversion options')
|
||||||
|
|
||||||
|
@ -5,7 +5,7 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import re, error as re_error
|
import re
|
||||||
from math import ceil
|
from math import ceil
|
||||||
from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator
|
from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator
|
||||||
from calibre.utils.logging import default_log
|
from calibre.utils.logging import default_log
|
||||||
@ -184,7 +184,7 @@ class HeuristicProcessor(object):
|
|||||||
except OverflowError:
|
except OverflowError:
|
||||||
# match.group(0) was too large to be compiled into a regex
|
# match.group(0) was too large to be compiled into a regex
|
||||||
continue
|
continue
|
||||||
except re_error:
|
except re.error:
|
||||||
# the match was not a valid regular expression
|
# the match was not a valid regular expression
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
@ -113,6 +113,11 @@ class HTMLFile(object):
|
|||||||
raise IOError(msg)
|
raise IOError(msg)
|
||||||
raise IgnoreFile(msg, err.errno)
|
raise IgnoreFile(msg, err.errno)
|
||||||
|
|
||||||
|
if not src:
|
||||||
|
if level == 0:
|
||||||
|
raise ValueError('The file %s is empty'%self.path)
|
||||||
|
self.is_binary = True
|
||||||
|
|
||||||
if not self.is_binary:
|
if not self.is_binary:
|
||||||
if not encoding:
|
if not encoding:
|
||||||
encoding = detect_xml_encoding(src[:4096], verbose=verbose)[1]
|
encoding = detect_xml_encoding(src[:4096], verbose=verbose)[1]
|
||||||
|
@ -18,7 +18,7 @@ from calibre.ebooks.metadata import check_isbn
|
|||||||
from calibre.ebooks.metadata.sources.base import (Source, Option, fixcase,
|
from calibre.ebooks.metadata.sources.base import (Source, Option, fixcase,
|
||||||
fixauthors)
|
fixauthors)
|
||||||
from calibre.ebooks.metadata.book.base import Metadata
|
from calibre.ebooks.metadata.book.base import Metadata
|
||||||
from calibre.utils.date import parse_date
|
from calibre.utils.date import parse_only_date
|
||||||
from calibre.utils.localization import canonicalize_lang
|
from calibre.utils.localization import canonicalize_lang
|
||||||
|
|
||||||
class Worker(Thread): # Get details {{{
|
class Worker(Thread): # Get details {{{
|
||||||
@ -471,7 +471,7 @@ class Worker(Thread): # Get details {{{
|
|||||||
ans = x.tail
|
ans = x.tail
|
||||||
date = ans.rpartition('(')[-1].replace(')', '').strip()
|
date = ans.rpartition('(')[-1].replace(')', '').strip()
|
||||||
date = self.delocalize_datestr(date)
|
date = self.delocalize_datestr(date)
|
||||||
return parse_date(date, assume_utc=True)
|
return parse_only_date(date, assume_utc=True)
|
||||||
|
|
||||||
def parse_language(self, pd):
|
def parse_language(self, pd):
|
||||||
for x in reversed(pd.xpath(self.language_xpath)):
|
for x in reversed(pd.xpath(self.language_xpath)):
|
||||||
|
@ -306,10 +306,15 @@ class MOBIHeader(object): # {{{
|
|||||||
self.extra_data_flags = 0
|
self.extra_data_flags = 0
|
||||||
if self.has_extra_data_flags:
|
if self.has_extra_data_flags:
|
||||||
self.unknown4 = self.raw[184:192]
|
self.unknown4 = self.raw[184:192]
|
||||||
self.fdst_idx, self.fdst_count = struct.unpack_from(b'>LL',
|
if self.file_version < 8:
|
||||||
self.raw, 192)
|
self.first_text_record, self.last_text_record = \
|
||||||
if self.fdst_count <= 1:
|
struct.unpack_from(b'>HH', self.raw, 192)
|
||||||
self.fdst_idx = NULL_INDEX
|
self.fdst_count = struct.unpack_from(b'>L', self.raw, 196)
|
||||||
|
else:
|
||||||
|
self.fdst_idx, self.fdst_count = struct.unpack_from(b'>LL',
|
||||||
|
self.raw, 192)
|
||||||
|
if self.fdst_count <= 1:
|
||||||
|
self.fdst_idx = NULL_INDEX
|
||||||
(self.fcis_number, self.fcis_count, self.flis_number,
|
(self.fcis_number, self.fcis_count, self.flis_number,
|
||||||
self.flis_count) = struct.unpack(b'>IIII',
|
self.flis_count) = struct.unpack(b'>IIII',
|
||||||
self.raw[200:216])
|
self.raw[200:216])
|
||||||
@ -409,7 +414,11 @@ class MOBIHeader(object): # {{{
|
|||||||
a('DRM Flags: %r'%self.drm_flags)
|
a('DRM Flags: %r'%self.drm_flags)
|
||||||
if self.has_extra_data_flags:
|
if self.has_extra_data_flags:
|
||||||
a('Unknown4: %r'%self.unknown4)
|
a('Unknown4: %r'%self.unknown4)
|
||||||
r('FDST Index', 'fdst_idx')
|
if hasattr(self, 'first_text_record'):
|
||||||
|
a('First content record: %d'%self.first_text_record)
|
||||||
|
a('Last content record: %d'%self.last_text_record)
|
||||||
|
else:
|
||||||
|
r('FDST Index', 'fdst_idx')
|
||||||
a('FDST Count: %d'% self.fdst_count)
|
a('FDST Count: %d'% self.fdst_count)
|
||||||
r('FCIS number', 'fcis_number')
|
r('FCIS number', 'fcis_number')
|
||||||
a('FCIS count: %d'% self.fcis_count)
|
a('FCIS count: %d'% self.fcis_count)
|
||||||
|
@ -111,7 +111,11 @@ def update_flow_links(mobi8_reader, resource_map, log):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
if not isinstance(flow, unicode):
|
if not isinstance(flow, unicode):
|
||||||
flow = flow.decode(mr.header.codec)
|
try:
|
||||||
|
flow = flow.decode(mr.header.codec)
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
log.error('Flow part has invalid %s encoded bytes'%mr.header.codec)
|
||||||
|
flow = flow.decode(mr.header.codec, 'replace')
|
||||||
|
|
||||||
# links to raster image files from image tags
|
# links to raster image files from image tags
|
||||||
# image_pattern
|
# image_pattern
|
||||||
|
@ -207,9 +207,9 @@ class Mobi8Reader(object):
|
|||||||
fname = 'svgimg' + nstr + '.svg'
|
fname = 'svgimg' + nstr + '.svg'
|
||||||
else:
|
else:
|
||||||
# search for CDATA and if exists inline it
|
# search for CDATA and if exists inline it
|
||||||
if flowpart.find('[CDATA[') >= 0:
|
if flowpart.find(b'[CDATA[') >= 0:
|
||||||
typ = 'css'
|
typ = 'css'
|
||||||
flowpart = '<style type="text/css">\n' + flowpart + '\n</style>\n'
|
flowpart = b'<style type="text/css">\n' + flowpart + b'\n</style>\n'
|
||||||
format = 'inline'
|
format = 'inline'
|
||||||
dir = None
|
dir = None
|
||||||
fname = None
|
fname = None
|
||||||
|
@ -382,6 +382,7 @@ class MobiWriter(object):
|
|||||||
first_image_record = len(self.records)
|
first_image_record = len(self.records)
|
||||||
self.resources.serialize(self.records, used_images)
|
self.resources.serialize(self.records, used_images)
|
||||||
resource_record_count = len(self.records) - old
|
resource_record_count = len(self.records) - old
|
||||||
|
last_content_record = len(self.records) - 1
|
||||||
|
|
||||||
# FCIS/FLIS (Seems to serve no purpose)
|
# FCIS/FLIS (Seems to serve no purpose)
|
||||||
flis_number = len(self.records)
|
flis_number = len(self.records)
|
||||||
@ -406,7 +407,7 @@ class MobiWriter(object):
|
|||||||
# header
|
# header
|
||||||
header_fields['first_resource_record'] = first_image_record
|
header_fields['first_resource_record'] = first_image_record
|
||||||
header_fields['exth_flags'] = 0b100001010000 # Kinglegen uses this
|
header_fields['exth_flags'] = 0b100001010000 # Kinglegen uses this
|
||||||
header_fields['fdst_record'] = NULL_INDEX
|
header_fields['fdst_record'] = pack(b'>HH', 1, last_content_record)
|
||||||
header_fields['fdst_count'] = 1 # Why not 0? Kindlegen uses 1
|
header_fields['fdst_count'] = 1 # Why not 0? Kindlegen uses 1
|
||||||
header_fields['flis_record'] = flis_number
|
header_fields['flis_record'] = flis_number
|
||||||
header_fields['fcis_record'] = fcis_number
|
header_fields['fcis_record'] = fcis_number
|
||||||
|
@ -314,9 +314,9 @@ class KF8Writer(object):
|
|||||||
return
|
return
|
||||||
|
|
||||||
# Flatten the ToC into a depth first list
|
# Flatten the ToC into a depth first list
|
||||||
fl = toc.iter() if is_periodical else toc.iterdescendants()
|
fl = toc.iterdescendants()
|
||||||
for i, item in enumerate(fl):
|
for i, item in enumerate(fl):
|
||||||
entry = {'id': id(item), 'index': i, 'href':item.href,
|
entry = {'id': id(item), 'index': i, 'href':item.href or '',
|
||||||
'label':(item.title or _('Unknown')),
|
'label':(item.title or _('Unknown')),
|
||||||
'children':[]}
|
'children':[]}
|
||||||
entry['depth'] = getattr(item, 'ncx_hlvl', 0)
|
entry['depth'] = getattr(item, 'ncx_hlvl', 0)
|
||||||
|
@ -138,6 +138,8 @@ class MOBIHeader(Header): # {{{
|
|||||||
unknown2 = zeroes(8)
|
unknown2 = zeroes(8)
|
||||||
|
|
||||||
# 192: FDST
|
# 192: FDST
|
||||||
|
# In MOBI 6 the fdst record is instead two two byte fields storing the
|
||||||
|
# index of the first and last content records
|
||||||
fdst_record = DYN
|
fdst_record = DYN
|
||||||
fdst_count = DYN
|
fdst_count = DYN
|
||||||
|
|
||||||
|
@ -966,7 +966,7 @@ class Manifest(object):
|
|||||||
data = data.cssText
|
data = data.cssText
|
||||||
if isinstance(data, unicode):
|
if isinstance(data, unicode):
|
||||||
data = data.encode('utf-8')
|
data = data.encode('utf-8')
|
||||||
return data
|
return data + b'\n'
|
||||||
return str(data)
|
return str(data)
|
||||||
|
|
||||||
def __unicode__(self):
|
def __unicode__(self):
|
||||||
|
@ -389,8 +389,17 @@ class CanonicalFragmentIdentifier
|
|||||||
# Drill down into iframes, etc.
|
# Drill down into iframes, etc.
|
||||||
while true
|
while true
|
||||||
target = cdoc.elementFromPoint x, y
|
target = cdoc.elementFromPoint x, y
|
||||||
if not target or target.localName == 'html'
|
if not target or target.localName in ['html', 'body']
|
||||||
log("No element at (#{ x }, #{ y })")
|
# We ignore both html and body even though body could
|
||||||
|
# have text nodes under it as performance is very poor if body
|
||||||
|
# has large margins/padding (for e.g. in fullscreen mode)
|
||||||
|
# A possible solution for this is to wrap all text node
|
||||||
|
# children of body in <span> but that is seriously ugly and
|
||||||
|
# might have side effects. Lets do this only if there are lots of
|
||||||
|
# books in the wild that actually have text children of body,
|
||||||
|
# and even in this case it might be better to change the input
|
||||||
|
# plugin to prevent this from happening.
|
||||||
|
# log("No element at (#{ x }, #{ y })")
|
||||||
return null
|
return null
|
||||||
|
|
||||||
name = target.localName
|
name = target.localName
|
||||||
|
76
src/calibre/ebooks/oeb/display/indexing.coffee
Normal file
76
src/calibre/ebooks/oeb/display/indexing.coffee
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
#!/usr/bin/env coffee
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
|
||||||
|
###
|
||||||
|
Copyright 2012, Kovid Goyal <kovid@kovidgoyal.net>
|
||||||
|
Released under the GPLv3 License
|
||||||
|
###
|
||||||
|
|
||||||
|
body_height = () ->
|
||||||
|
db = document.body
|
||||||
|
dde = document.documentElement
|
||||||
|
if db? and dde?
|
||||||
|
return Math.max(db.scrollHeight, dde.scrollHeight, db.offsetHeight,
|
||||||
|
dde.offsetHeight, db.clientHeight, dde.clientHeight)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
abstop = (elem) ->
|
||||||
|
ans = elem.offsetTop
|
||||||
|
while elem.offsetParent
|
||||||
|
elem = elem.offsetParent
|
||||||
|
ans += elem.offsetTop
|
||||||
|
return ans
|
||||||
|
|
||||||
|
class BookIndexing
|
||||||
|
###
|
||||||
|
This class is a namespace to expose indexing functions via the
|
||||||
|
window.book_indexing object. The most important functions are:
|
||||||
|
|
||||||
|
anchor_positions(): Get the absolute (document co-ordinate system) position
|
||||||
|
for elements with the specified id/name attributes.
|
||||||
|
|
||||||
|
###
|
||||||
|
|
||||||
|
constructor: () ->
|
||||||
|
this.cache = {}
|
||||||
|
this.body_height_at_last_check = null
|
||||||
|
|
||||||
|
cache_valid: (anchors) ->
|
||||||
|
for a in anchors
|
||||||
|
if not Object.prototype.hasOwnProperty.call(this.cache, a)
|
||||||
|
return false
|
||||||
|
for p of this.cache
|
||||||
|
if Object.prototype.hasOwnProperty.call(this.cache, p) and p not in anchors
|
||||||
|
return false
|
||||||
|
return true
|
||||||
|
|
||||||
|
anchor_positions: (anchors, use_cache=false) ->
|
||||||
|
if use_cache and body_height() == this.body_height_at_last_check and this.cache_valid(anchors)
|
||||||
|
return this.cache
|
||||||
|
|
||||||
|
ans = {}
|
||||||
|
for anchor in anchors
|
||||||
|
elem = document.getElementById(anchor)
|
||||||
|
if elem == null
|
||||||
|
# Look for an <a name="anchor"> element
|
||||||
|
try
|
||||||
|
result = document.evaluate(
|
||||||
|
".//*[local-name() = 'a' and @name='#{ anchor }']",
|
||||||
|
document.body, null,
|
||||||
|
XPathResult.FIRST_ORDERED_NODE_TYPE, null)
|
||||||
|
elem = result.singleNodeValue
|
||||||
|
catch error
|
||||||
|
# The anchor had a ' or other invalid char
|
||||||
|
elem = null
|
||||||
|
if elem == null
|
||||||
|
pos = body_height() + 10000
|
||||||
|
else
|
||||||
|
pos = abstop(elem)
|
||||||
|
ans[anchor] = pos
|
||||||
|
this.cache = ans
|
||||||
|
this.body_height_at_last_check = body_height()
|
||||||
|
return ans
|
||||||
|
|
||||||
|
if window?
|
||||||
|
window.book_indexing = new BookIndexing()
|
||||||
|
|
@ -1,383 +0,0 @@
|
|||||||
from __future__ import with_statement
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2008 Kovid Goyal <kovid at kovidgoyal.net>'
|
|
||||||
|
|
||||||
'''
|
|
||||||
Iterate over the HTML files in an ebook. Useful for writing viewers.
|
|
||||||
'''
|
|
||||||
|
|
||||||
import re, os, math
|
|
||||||
from cStringIO import StringIO
|
|
||||||
|
|
||||||
from PyQt4.Qt import QFontDatabase
|
|
||||||
|
|
||||||
from calibre.customize.ui import available_input_formats
|
|
||||||
from calibre.ebooks.metadata.opf2 import OPF
|
|
||||||
from calibre.ptempfile import TemporaryDirectory
|
|
||||||
from calibre.ebooks.chardet import xml_to_unicode
|
|
||||||
from calibre.utils.zipfile import safe_replace
|
|
||||||
from calibre.utils.config import DynamicConfig
|
|
||||||
from calibre.utils.logging import Log
|
|
||||||
from calibre import (guess_type, prints, prepare_string_for_xml,
|
|
||||||
xml_replace_entities)
|
|
||||||
from calibre.ebooks.oeb.transforms.cover import CoverManager
|
|
||||||
from calibre.constants import filesystem_encoding
|
|
||||||
|
|
||||||
TITLEPAGE = CoverManager.SVG_TEMPLATE.decode('utf-8').replace(\
|
|
||||||
'__ar__', 'none').replace('__viewbox__', '0 0 600 800'
|
|
||||||
).replace('__width__', '600').replace('__height__', '800')
|
|
||||||
BM_FIELD_SEP = u'*|!|?|*'
|
|
||||||
BM_LEGACY_ESC = u'esc-text-%&*#%(){}ads19-end-esc'
|
|
||||||
|
|
||||||
def character_count(html):
|
|
||||||
'''
|
|
||||||
Return the number of "significant" text characters in a HTML string.
|
|
||||||
'''
|
|
||||||
count = 0
|
|
||||||
strip_space = re.compile(r'\s+')
|
|
||||||
for match in re.finditer(r'>[^<]+<', html):
|
|
||||||
count += len(strip_space.sub(' ', match.group()))-2
|
|
||||||
return count
|
|
||||||
|
|
||||||
class UnsupportedFormatError(Exception):
|
|
||||||
|
|
||||||
def __init__(self, fmt):
|
|
||||||
Exception.__init__(self, _('%s format books are not supported')%fmt.upper())
|
|
||||||
|
|
||||||
class SpineItem(unicode):
|
|
||||||
|
|
||||||
def __new__(cls, path, mime_type=None):
|
|
||||||
ppath = path.partition('#')[0]
|
|
||||||
if not os.path.exists(path) and os.path.exists(ppath):
|
|
||||||
path = ppath
|
|
||||||
obj = super(SpineItem, cls).__new__(cls, path)
|
|
||||||
raw = open(path, 'rb').read()
|
|
||||||
raw, obj.encoding = xml_to_unicode(raw)
|
|
||||||
obj.character_count = character_count(raw)
|
|
||||||
obj.start_page = -1
|
|
||||||
obj.pages = -1
|
|
||||||
obj.max_page = -1
|
|
||||||
if mime_type is None:
|
|
||||||
mime_type = guess_type(obj)[0]
|
|
||||||
obj.mime_type = mime_type
|
|
||||||
return obj
|
|
||||||
|
|
||||||
class FakeOpts(object):
|
|
||||||
verbose = 0
|
|
||||||
breadth_first = False
|
|
||||||
max_levels = 5
|
|
||||||
input_encoding = None
|
|
||||||
|
|
||||||
def is_supported(path):
|
|
||||||
ext = os.path.splitext(path)[1].replace('.', '').lower()
|
|
||||||
ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext)
|
|
||||||
return ext in available_input_formats()
|
|
||||||
|
|
||||||
|
|
||||||
def write_oebbook(oeb, path):
|
|
||||||
from calibre.ebooks.oeb.writer import OEBWriter
|
|
||||||
from calibre import walk
|
|
||||||
w = OEBWriter()
|
|
||||||
w(oeb, path)
|
|
||||||
for f in walk(path):
|
|
||||||
if f.endswith('.opf'):
|
|
||||||
return f
|
|
||||||
|
|
||||||
class EbookIterator(object):
|
|
||||||
|
|
||||||
CHARACTERS_PER_PAGE = 1000
|
|
||||||
|
|
||||||
def __init__(self, pathtoebook, log=None):
|
|
||||||
self.log = log
|
|
||||||
if log is None:
|
|
||||||
self.log = Log()
|
|
||||||
pathtoebook = pathtoebook.strip()
|
|
||||||
self.pathtoebook = os.path.abspath(pathtoebook)
|
|
||||||
self.config = DynamicConfig(name='iterator')
|
|
||||||
ext = os.path.splitext(pathtoebook)[1].replace('.', '').lower()
|
|
||||||
ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext)
|
|
||||||
self.ebook_ext = ext.replace('original_', '')
|
|
||||||
|
|
||||||
def search(self, text, index, backwards=False):
|
|
||||||
text = prepare_string_for_xml(text.lower())
|
|
||||||
pmap = [(i, path) for i, path in enumerate(self.spine)]
|
|
||||||
if backwards:
|
|
||||||
pmap.reverse()
|
|
||||||
for i, path in pmap:
|
|
||||||
if (backwards and i < index) or (not backwards and i > index):
|
|
||||||
with open(path, 'rb') as f:
|
|
||||||
raw = f.read().decode(path.encoding)
|
|
||||||
try:
|
|
||||||
raw = xml_replace_entities(raw)
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
if text in raw.lower():
|
|
||||||
return i
|
|
||||||
|
|
||||||
def find_missing_css_files(self):
|
|
||||||
for x in os.walk(os.path.dirname(self.pathtoopf)):
|
|
||||||
for f in x[-1]:
|
|
||||||
if f.endswith('.css'):
|
|
||||||
yield os.path.join(x[0], f)
|
|
||||||
|
|
||||||
def find_declared_css_files(self):
|
|
||||||
for item in self.opf.manifest:
|
|
||||||
if item.mime_type and 'css' in item.mime_type.lower():
|
|
||||||
yield item.path
|
|
||||||
|
|
||||||
def find_embedded_fonts(self):
|
|
||||||
'''
|
|
||||||
This will become unnecessary once Qt WebKit supports the @font-face rule.
|
|
||||||
'''
|
|
||||||
css_files = set(self.find_declared_css_files())
|
|
||||||
if not css_files:
|
|
||||||
css_files = set(self.find_missing_css_files())
|
|
||||||
bad_map = {}
|
|
||||||
font_family_pat = re.compile(r'font-family\s*:\s*([^;]+)')
|
|
||||||
for csspath in css_files:
|
|
||||||
try:
|
|
||||||
css = open(csspath, 'rb').read().decode('utf-8', 'replace')
|
|
||||||
except:
|
|
||||||
continue
|
|
||||||
for match in re.compile(r'@font-face\s*{([^}]+)}').finditer(css):
|
|
||||||
block = match.group(1)
|
|
||||||
family = font_family_pat.search(block)
|
|
||||||
url = re.compile(r'url\s*\([\'"]*(.+?)[\'"]*\)', re.DOTALL).search(block)
|
|
||||||
if url:
|
|
||||||
path = url.group(1).split('/')
|
|
||||||
path = os.path.join(os.path.dirname(csspath), *path)
|
|
||||||
if not os.access(path, os.R_OK):
|
|
||||||
continue
|
|
||||||
id = QFontDatabase.addApplicationFont(path)
|
|
||||||
if id != -1:
|
|
||||||
families = [unicode(f) for f in QFontDatabase.applicationFontFamilies(id)]
|
|
||||||
if family:
|
|
||||||
family = family.group(1)
|
|
||||||
specified_families = [x.strip().replace('"',
|
|
||||||
'').replace("'", '') for x in family.split(',')]
|
|
||||||
aliasing_ok = False
|
|
||||||
for f in specified_families:
|
|
||||||
bad_map[f] = families[0]
|
|
||||||
if not aliasing_ok and f in families:
|
|
||||||
aliasing_ok = True
|
|
||||||
|
|
||||||
if not aliasing_ok:
|
|
||||||
prints('WARNING: Family aliasing not fully supported.')
|
|
||||||
prints('\tDeclared family: %r not in actual families: %r'
|
|
||||||
% (family, families))
|
|
||||||
else:
|
|
||||||
prints('Loaded embedded font:', repr(family))
|
|
||||||
if bad_map:
|
|
||||||
def prepend_embedded_font(match):
|
|
||||||
for bad, good in bad_map.items():
|
|
||||||
if bad in match.group(1):
|
|
||||||
prints('Substituting font family: %s -> %s'%(bad, good))
|
|
||||||
return match.group().replace(bad, '"%s"'%good)
|
|
||||||
|
|
||||||
from calibre.ebooks.chardet import force_encoding
|
|
||||||
for csspath in css_files:
|
|
||||||
with open(csspath, 'r+b') as f:
|
|
||||||
css = f.read()
|
|
||||||
enc = force_encoding(css, False)
|
|
||||||
css = css.decode(enc, 'replace')
|
|
||||||
ncss = font_family_pat.sub(prepend_embedded_font, css)
|
|
||||||
if ncss != css:
|
|
||||||
f.seek(0)
|
|
||||||
f.truncate()
|
|
||||||
f.write(ncss.encode(enc))
|
|
||||||
|
|
||||||
def __enter__(self, processed=False, only_input_plugin=False):
|
|
||||||
self.delete_on_exit = []
|
|
||||||
self._tdir = TemporaryDirectory('_ebook_iter')
|
|
||||||
self.base = self._tdir.__enter__()
|
|
||||||
if not isinstance(self.base, unicode):
|
|
||||||
self.base = self.base.decode(filesystem_encoding)
|
|
||||||
from calibre.ebooks.conversion.plumber import Plumber, create_oebbook
|
|
||||||
plumber = Plumber(self.pathtoebook, self.base, self.log)
|
|
||||||
plumber.setup_options()
|
|
||||||
if self.pathtoebook.lower().endswith('.opf'):
|
|
||||||
plumber.opts.dont_package = True
|
|
||||||
if hasattr(plumber.opts, 'no_process'):
|
|
||||||
plumber.opts.no_process = True
|
|
||||||
|
|
||||||
plumber.input_plugin.for_viewer = True
|
|
||||||
with plumber.input_plugin:
|
|
||||||
self.pathtoopf = plumber.input_plugin(open(plumber.input, 'rb'),
|
|
||||||
plumber.opts, plumber.input_fmt, self.log,
|
|
||||||
{}, self.base)
|
|
||||||
|
|
||||||
if not only_input_plugin:
|
|
||||||
if processed or plumber.input_fmt.lower() in ('pdb', 'pdf', 'rb') and \
|
|
||||||
not hasattr(self.pathtoopf, 'manifest'):
|
|
||||||
if hasattr(self.pathtoopf, 'manifest'):
|
|
||||||
self.pathtoopf = write_oebbook(self.pathtoopf, self.base)
|
|
||||||
self.pathtoopf = create_oebbook(self.log, self.pathtoopf,
|
|
||||||
plumber.opts)
|
|
||||||
|
|
||||||
if hasattr(self.pathtoopf, 'manifest'):
|
|
||||||
self.pathtoopf = write_oebbook(self.pathtoopf, self.base)
|
|
||||||
|
|
||||||
self.book_format = os.path.splitext(self.pathtoebook)[1][1:].upper()
|
|
||||||
if getattr(plumber.input_plugin, 'is_kf8', False):
|
|
||||||
self.book_format = 'KF8'
|
|
||||||
|
|
||||||
self.opf = getattr(plumber.input_plugin, 'optimize_opf_parsing', None)
|
|
||||||
if self.opf is None:
|
|
||||||
self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf))
|
|
||||||
self.language = self.opf.language
|
|
||||||
if self.language:
|
|
||||||
self.language = self.language.lower()
|
|
||||||
ordered = [i for i in self.opf.spine if i.is_linear] + \
|
|
||||||
[i for i in self.opf.spine if not i.is_linear]
|
|
||||||
self.spine = []
|
|
||||||
for i in ordered:
|
|
||||||
spath = i.path
|
|
||||||
mt = None
|
|
||||||
if i.idref is not None:
|
|
||||||
mt = self.opf.manifest.type_for_id(i.idref)
|
|
||||||
if mt is None:
|
|
||||||
mt = guess_type(spath)[0]
|
|
||||||
try:
|
|
||||||
self.spine.append(SpineItem(spath, mime_type=mt))
|
|
||||||
except:
|
|
||||||
self.log.warn('Missing spine item:', repr(spath))
|
|
||||||
|
|
||||||
cover = self.opf.cover
|
|
||||||
if self.ebook_ext in ('lit', 'mobi', 'prc', 'opf', 'fb2') and cover:
|
|
||||||
cfile = os.path.join(self.base, 'calibre_iterator_cover.html')
|
|
||||||
rcpath = os.path.relpath(cover, self.base).replace(os.sep, '/')
|
|
||||||
chtml = (TITLEPAGE%prepare_string_for_xml(rcpath, True)).encode('utf-8')
|
|
||||||
open(cfile, 'wb').write(chtml)
|
|
||||||
self.spine[0:0] = [SpineItem(cfile,
|
|
||||||
mime_type='application/xhtml+xml')]
|
|
||||||
self.delete_on_exit.append(cfile)
|
|
||||||
|
|
||||||
if self.opf.path_to_html_toc is not None and \
|
|
||||||
self.opf.path_to_html_toc not in self.spine:
|
|
||||||
try:
|
|
||||||
self.spine.append(SpineItem(self.opf.path_to_html_toc))
|
|
||||||
except:
|
|
||||||
import traceback
|
|
||||||
traceback.print_exc()
|
|
||||||
|
|
||||||
|
|
||||||
sizes = [i.character_count for i in self.spine]
|
|
||||||
self.pages = [math.ceil(i/float(self.CHARACTERS_PER_PAGE)) for i in sizes]
|
|
||||||
for p, s in zip(self.pages, self.spine):
|
|
||||||
s.pages = p
|
|
||||||
start = 1
|
|
||||||
|
|
||||||
for s in self.spine:
|
|
||||||
s.start_page = start
|
|
||||||
start += s.pages
|
|
||||||
s.max_page = s.start_page + s.pages - 1
|
|
||||||
self.toc = self.opf.toc
|
|
||||||
|
|
||||||
self.read_bookmarks()
|
|
||||||
|
|
||||||
return self
|
|
||||||
|
|
||||||
def parse_bookmarks(self, raw):
|
|
||||||
for line in raw.splitlines():
|
|
||||||
bm = None
|
|
||||||
if line.count('^') > 0:
|
|
||||||
tokens = line.rpartition('^')
|
|
||||||
title, ref = tokens[0], tokens[2]
|
|
||||||
try:
|
|
||||||
spine, _, pos = ref.partition('#')
|
|
||||||
spine = int(spine.strip())
|
|
||||||
except:
|
|
||||||
continue
|
|
||||||
bm = {'type':'legacy', 'title':title, 'spine':spine, 'pos':pos}
|
|
||||||
elif BM_FIELD_SEP in line:
|
|
||||||
try:
|
|
||||||
title, spine, pos = line.strip().split(BM_FIELD_SEP)
|
|
||||||
spine = int(spine)
|
|
||||||
except:
|
|
||||||
continue
|
|
||||||
# Unescape from serialization
|
|
||||||
pos = pos.replace(BM_LEGACY_ESC, u'^')
|
|
||||||
# Check for pos being a scroll fraction
|
|
||||||
try:
|
|
||||||
pos = float(pos)
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
bm = {'type':'cfi', 'title':title, 'pos':pos, 'spine':spine}
|
|
||||||
|
|
||||||
if bm:
|
|
||||||
self.bookmarks.append(bm)
|
|
||||||
|
|
||||||
def serialize_bookmarks(self, bookmarks):
|
|
||||||
dat = []
|
|
||||||
for bm in bookmarks:
|
|
||||||
if bm['type'] == 'legacy':
|
|
||||||
rec = u'%s^%d#%s'%(bm['title'], bm['spine'], bm['pos'])
|
|
||||||
else:
|
|
||||||
pos = bm['pos']
|
|
||||||
if isinstance(pos, (int, float)):
|
|
||||||
pos = unicode(pos)
|
|
||||||
else:
|
|
||||||
pos = pos.replace(u'^', BM_LEGACY_ESC)
|
|
||||||
rec = BM_FIELD_SEP.join([bm['title'], unicode(bm['spine']), pos])
|
|
||||||
dat.append(rec)
|
|
||||||
return (u'\n'.join(dat) +u'\n')
|
|
||||||
|
|
||||||
def read_bookmarks(self):
|
|
||||||
self.bookmarks = []
|
|
||||||
bmfile = os.path.join(self.base, 'META-INF', 'calibre_bookmarks.txt')
|
|
||||||
raw = ''
|
|
||||||
if os.path.exists(bmfile):
|
|
||||||
with open(bmfile, 'rb') as f:
|
|
||||||
raw = f.read()
|
|
||||||
else:
|
|
||||||
saved = self.config['bookmarks_'+self.pathtoebook]
|
|
||||||
if saved:
|
|
||||||
raw = saved
|
|
||||||
if not isinstance(raw, unicode):
|
|
||||||
raw = raw.decode('utf-8')
|
|
||||||
self.parse_bookmarks(raw)
|
|
||||||
|
|
||||||
def save_bookmarks(self, bookmarks=None):
|
|
||||||
if bookmarks is None:
|
|
||||||
bookmarks = self.bookmarks
|
|
||||||
dat = self.serialize_bookmarks(bookmarks)
|
|
||||||
if os.path.splitext(self.pathtoebook)[1].lower() == '.epub' and \
|
|
||||||
os.access(self.pathtoebook, os.R_OK):
|
|
||||||
try:
|
|
||||||
zf = open(self.pathtoebook, 'r+b')
|
|
||||||
except IOError:
|
|
||||||
return
|
|
||||||
safe_replace(zf, 'META-INF/calibre_bookmarks.txt',
|
|
||||||
StringIO(dat.encode('utf-8')),
|
|
||||||
add_missing=True)
|
|
||||||
else:
|
|
||||||
self.config['bookmarks_'+self.pathtoebook] = dat
|
|
||||||
|
|
||||||
def add_bookmark(self, bm):
|
|
||||||
self.bookmarks = [x for x in self.bookmarks if x['title'] !=
|
|
||||||
bm['title']]
|
|
||||||
self.bookmarks.append(bm)
|
|
||||||
self.save_bookmarks()
|
|
||||||
|
|
||||||
def set_bookmarks(self, bookmarks):
|
|
||||||
self.bookmarks = bookmarks
|
|
||||||
|
|
||||||
def __exit__(self, *args):
|
|
||||||
self._tdir.__exit__(*args)
|
|
||||||
for x in self.delete_on_exit:
|
|
||||||
if os.path.exists(x):
|
|
||||||
os.remove(x)
|
|
||||||
|
|
||||||
def get_preprocess_html(path_to_ebook, output):
|
|
||||||
from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
|
|
||||||
iterator = EbookIterator(path_to_ebook)
|
|
||||||
iterator.__enter__(only_input_plugin=True)
|
|
||||||
preprocessor = HTMLPreProcessor(None, False)
|
|
||||||
with open(output, 'wb') as out:
|
|
||||||
for path in iterator.spine:
|
|
||||||
with open(path, 'rb') as f:
|
|
||||||
html = f.read().decode('utf-8', 'replace')
|
|
||||||
html = preprocessor(html, get_preprocess_html=True)
|
|
||||||
out.write(html.encode('utf-8'))
|
|
||||||
out.write(b'\n\n' + b'-'*80 + b'\n\n')
|
|
||||||
|
|
42
src/calibre/ebooks/oeb/iterator/__init__.py
Normal file
42
src/calibre/ebooks/oeb/iterator/__init__.py
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
|
print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import os, re
|
||||||
|
|
||||||
|
from calibre.customize.ui import available_input_formats
|
||||||
|
|
||||||
|
def is_supported(path):
|
||||||
|
ext = os.path.splitext(path)[1].replace('.', '').lower()
|
||||||
|
ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext)
|
||||||
|
return ext in available_input_formats()
|
||||||
|
|
||||||
|
class UnsupportedFormatError(Exception):
|
||||||
|
|
||||||
|
def __init__(self, fmt):
|
||||||
|
Exception.__init__(self, _('%s format books are not supported')%fmt.upper())
|
||||||
|
|
||||||
|
def EbookIterator(*args, **kwargs):
|
||||||
|
'For backwards compatibility'
|
||||||
|
from calibre.ebooks.oeb.iterator.book import EbookIterator
|
||||||
|
return EbookIterator(*args, **kwargs)
|
||||||
|
|
||||||
|
def get_preprocess_html(path_to_ebook, output):
|
||||||
|
from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
|
||||||
|
iterator = EbookIterator(path_to_ebook)
|
||||||
|
iterator.__enter__(only_input_plugin=True, run_char_count=False,
|
||||||
|
read_anchor_map=False)
|
||||||
|
preprocessor = HTMLPreProcessor(None, False)
|
||||||
|
with open(output, 'wb') as out:
|
||||||
|
for path in iterator.spine:
|
||||||
|
with open(path, 'rb') as f:
|
||||||
|
html = f.read().decode('utf-8', 'replace')
|
||||||
|
html = preprocessor(html, get_preprocess_html=True)
|
||||||
|
out.write(html.encode('utf-8'))
|
||||||
|
out.write(b'\n\n' + b'-'*80 + b'\n\n')
|
||||||
|
|
187
src/calibre/ebooks/oeb/iterator/book.py
Normal file
187
src/calibre/ebooks/oeb/iterator/book.py
Normal file
@ -0,0 +1,187 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
|
print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
|
||||||
|
'''
|
||||||
|
Iterate over the HTML files in an ebook. Useful for writing viewers.
|
||||||
|
'''
|
||||||
|
|
||||||
|
import re, os, math
|
||||||
|
from functools import partial
|
||||||
|
|
||||||
|
from calibre.ebooks.metadata.opf2 import OPF
|
||||||
|
from calibre.ptempfile import TemporaryDirectory
|
||||||
|
from calibre.utils.config import DynamicConfig
|
||||||
|
from calibre.utils.logging import default_log
|
||||||
|
from calibre import (guess_type, prepare_string_for_xml,
|
||||||
|
xml_replace_entities)
|
||||||
|
from calibre.ebooks.oeb.transforms.cover import CoverManager
|
||||||
|
|
||||||
|
from calibre.ebooks.oeb.iterator.spine import (SpineItem, create_indexing_data)
|
||||||
|
from calibre.ebooks.oeb.iterator.bookmarks import BookmarksMixin
|
||||||
|
|
||||||
|
TITLEPAGE = CoverManager.SVG_TEMPLATE.decode('utf-8').replace(\
|
||||||
|
'__ar__', 'none').replace('__viewbox__', '0 0 600 800'
|
||||||
|
).replace('__width__', '600').replace('__height__', '800')
|
||||||
|
|
||||||
|
class FakeOpts(object):
|
||||||
|
verbose = 0
|
||||||
|
breadth_first = False
|
||||||
|
max_levels = 5
|
||||||
|
input_encoding = None
|
||||||
|
|
||||||
|
|
||||||
|
def write_oebbook(oeb, path):
|
||||||
|
from calibre.ebooks.oeb.writer import OEBWriter
|
||||||
|
from calibre import walk
|
||||||
|
w = OEBWriter()
|
||||||
|
w(oeb, path)
|
||||||
|
for f in walk(path):
|
||||||
|
if f.endswith('.opf'):
|
||||||
|
return f
|
||||||
|
|
||||||
|
class EbookIterator(BookmarksMixin):
|
||||||
|
|
||||||
|
CHARACTERS_PER_PAGE = 1000
|
||||||
|
|
||||||
|
def __init__(self, pathtoebook, log=None):
|
||||||
|
self.log = log or default_log
|
||||||
|
pathtoebook = pathtoebook.strip()
|
||||||
|
self.pathtoebook = os.path.abspath(pathtoebook)
|
||||||
|
self.config = DynamicConfig(name='iterator')
|
||||||
|
ext = os.path.splitext(pathtoebook)[1].replace('.', '').lower()
|
||||||
|
ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext)
|
||||||
|
self.ebook_ext = ext.replace('original_', '')
|
||||||
|
|
||||||
|
def search(self, text, index, backwards=False):
|
||||||
|
text = prepare_string_for_xml(text.lower())
|
||||||
|
pmap = [(i, path) for i, path in enumerate(self.spine)]
|
||||||
|
if backwards:
|
||||||
|
pmap.reverse()
|
||||||
|
for i, path in pmap:
|
||||||
|
if (backwards and i < index) or (not backwards and i > index):
|
||||||
|
with open(path, 'rb') as f:
|
||||||
|
raw = f.read().decode(path.encoding)
|
||||||
|
try:
|
||||||
|
raw = xml_replace_entities(raw)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
if text in raw.lower():
|
||||||
|
return i
|
||||||
|
|
||||||
|
def __enter__(self, processed=False, only_input_plugin=False,
|
||||||
|
run_char_count=True, read_anchor_map=True):
|
||||||
|
''' Convert an ebook file into an exploded OEB book suitable for
|
||||||
|
display in viewers/preprocessing etc. '''
|
||||||
|
|
||||||
|
from calibre.ebooks.conversion.plumber import Plumber, create_oebbook
|
||||||
|
|
||||||
|
self.delete_on_exit = []
|
||||||
|
self._tdir = TemporaryDirectory('_ebook_iter')
|
||||||
|
self.base = self._tdir.__enter__()
|
||||||
|
plumber = Plumber(self.pathtoebook, self.base, self.log)
|
||||||
|
plumber.setup_options()
|
||||||
|
if self.pathtoebook.lower().endswith('.opf'):
|
||||||
|
plumber.opts.dont_package = True
|
||||||
|
if hasattr(plumber.opts, 'no_process'):
|
||||||
|
plumber.opts.no_process = True
|
||||||
|
|
||||||
|
plumber.input_plugin.for_viewer = True
|
||||||
|
with plumber.input_plugin, open(plumber.input, 'rb') as inf:
|
||||||
|
self.pathtoopf = plumber.input_plugin(inf,
|
||||||
|
plumber.opts, plumber.input_fmt, self.log,
|
||||||
|
{}, self.base)
|
||||||
|
|
||||||
|
if not only_input_plugin:
|
||||||
|
# Run the HTML preprocess/parsing from the conversion pipeline as
|
||||||
|
# well
|
||||||
|
if (processed or plumber.input_fmt.lower() in {'pdb', 'pdf', 'rb'}
|
||||||
|
and not hasattr(self.pathtoopf, 'manifest')):
|
||||||
|
if hasattr(self.pathtoopf, 'manifest'):
|
||||||
|
self.pathtoopf = write_oebbook(self.pathtoopf, self.base)
|
||||||
|
self.pathtoopf = create_oebbook(self.log, self.pathtoopf,
|
||||||
|
plumber.opts)
|
||||||
|
|
||||||
|
if hasattr(self.pathtoopf, 'manifest'):
|
||||||
|
self.pathtoopf = write_oebbook(self.pathtoopf, self.base)
|
||||||
|
|
||||||
|
self.book_format = os.path.splitext(self.pathtoebook)[1][1:].upper()
|
||||||
|
if getattr(plumber.input_plugin, 'is_kf8', False):
|
||||||
|
self.book_format = 'KF8'
|
||||||
|
|
||||||
|
self.opf = getattr(plumber.input_plugin, 'optimize_opf_parsing', None)
|
||||||
|
if self.opf is None:
|
||||||
|
self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf))
|
||||||
|
self.language = self.opf.language
|
||||||
|
if self.language:
|
||||||
|
self.language = self.language.lower()
|
||||||
|
ordered = [i for i in self.opf.spine if i.is_linear] + \
|
||||||
|
[i for i in self.opf.spine if not i.is_linear]
|
||||||
|
self.spine = []
|
||||||
|
Spiny = partial(SpineItem, read_anchor_map=read_anchor_map,
|
||||||
|
run_char_count=run_char_count)
|
||||||
|
for i in ordered:
|
||||||
|
spath = i.path
|
||||||
|
mt = None
|
||||||
|
if i.idref is not None:
|
||||||
|
mt = self.opf.manifest.type_for_id(i.idref)
|
||||||
|
if mt is None:
|
||||||
|
mt = guess_type(spath)[0]
|
||||||
|
try:
|
||||||
|
self.spine.append(Spiny(spath, mime_type=mt))
|
||||||
|
except:
|
||||||
|
self.log.warn('Missing spine item:', repr(spath))
|
||||||
|
|
||||||
|
cover = self.opf.cover
|
||||||
|
if cover and self.ebook_ext in {'lit', 'mobi', 'prc', 'opf', 'fb2',
|
||||||
|
'azw', 'azw3'}:
|
||||||
|
cfile = os.path.join(self.base, 'calibre_iterator_cover.html')
|
||||||
|
rcpath = os.path.relpath(cover, self.base).replace(os.sep, '/')
|
||||||
|
chtml = (TITLEPAGE%prepare_string_for_xml(rcpath, True)).encode('utf-8')
|
||||||
|
with open(cfile, 'wb') as f:
|
||||||
|
f.write(chtml)
|
||||||
|
self.spine[0:0] = [Spiny(cfile,
|
||||||
|
mime_type='application/xhtml+xml')]
|
||||||
|
self.delete_on_exit.append(cfile)
|
||||||
|
|
||||||
|
if self.opf.path_to_html_toc is not None and \
|
||||||
|
self.opf.path_to_html_toc not in self.spine:
|
||||||
|
try:
|
||||||
|
self.spine.append(Spiny(self.opf.path_to_html_toc))
|
||||||
|
except:
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
|
sizes = [i.character_count for i in self.spine]
|
||||||
|
self.pages = [math.ceil(i/float(self.CHARACTERS_PER_PAGE)) for i in sizes]
|
||||||
|
for p, s in zip(self.pages, self.spine):
|
||||||
|
s.pages = p
|
||||||
|
start = 1
|
||||||
|
|
||||||
|
for s in self.spine:
|
||||||
|
s.start_page = start
|
||||||
|
start += s.pages
|
||||||
|
s.max_page = s.start_page + s.pages - 1
|
||||||
|
self.toc = self.opf.toc
|
||||||
|
if read_anchor_map:
|
||||||
|
create_indexing_data(self.spine, self.toc)
|
||||||
|
|
||||||
|
self.read_bookmarks()
|
||||||
|
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, *args):
|
||||||
|
self._tdir.__exit__(*args)
|
||||||
|
for x in self.delete_on_exit:
|
||||||
|
try:
|
||||||
|
os.remove(x)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
105
src/calibre/ebooks/oeb/iterator/bookmarks.py
Normal file
105
src/calibre/ebooks/oeb/iterator/bookmarks.py
Normal file
@ -0,0 +1,105 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
|
print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import os
|
||||||
|
from io import BytesIO
|
||||||
|
|
||||||
|
from calibre.utils.zipfile import safe_replace
|
||||||
|
|
||||||
|
BM_FIELD_SEP = u'*|!|?|*'
|
||||||
|
BM_LEGACY_ESC = u'esc-text-%&*#%(){}ads19-end-esc'
|
||||||
|
|
||||||
|
class BookmarksMixin(object):
|
||||||
|
|
||||||
|
def parse_bookmarks(self, raw):
|
||||||
|
for line in raw.splitlines():
|
||||||
|
bm = None
|
||||||
|
if line.count('^') > 0:
|
||||||
|
tokens = line.rpartition('^')
|
||||||
|
title, ref = tokens[0], tokens[2]
|
||||||
|
try:
|
||||||
|
spine, _, pos = ref.partition('#')
|
||||||
|
spine = int(spine.strip())
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
bm = {'type':'legacy', 'title':title, 'spine':spine, 'pos':pos}
|
||||||
|
elif BM_FIELD_SEP in line:
|
||||||
|
try:
|
||||||
|
title, spine, pos = line.strip().split(BM_FIELD_SEP)
|
||||||
|
spine = int(spine)
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
# Unescape from serialization
|
||||||
|
pos = pos.replace(BM_LEGACY_ESC, u'^')
|
||||||
|
# Check for pos being a scroll fraction
|
||||||
|
try:
|
||||||
|
pos = float(pos)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
bm = {'type':'cfi', 'title':title, 'pos':pos, 'spine':spine}
|
||||||
|
|
||||||
|
if bm:
|
||||||
|
self.bookmarks.append(bm)
|
||||||
|
|
||||||
|
def serialize_bookmarks(self, bookmarks):
|
||||||
|
dat = []
|
||||||
|
for bm in bookmarks:
|
||||||
|
if bm['type'] == 'legacy':
|
||||||
|
rec = u'%s^%d#%s'%(bm['title'], bm['spine'], bm['pos'])
|
||||||
|
else:
|
||||||
|
pos = bm['pos']
|
||||||
|
if isinstance(pos, (int, float)):
|
||||||
|
pos = unicode(pos)
|
||||||
|
else:
|
||||||
|
pos = pos.replace(u'^', BM_LEGACY_ESC)
|
||||||
|
rec = BM_FIELD_SEP.join([bm['title'], unicode(bm['spine']), pos])
|
||||||
|
dat.append(rec)
|
||||||
|
return (u'\n'.join(dat) +u'\n')
|
||||||
|
|
||||||
|
def read_bookmarks(self):
|
||||||
|
self.bookmarks = []
|
||||||
|
bmfile = os.path.join(self.base, 'META-INF', 'calibre_bookmarks.txt')
|
||||||
|
raw = ''
|
||||||
|
if os.path.exists(bmfile):
|
||||||
|
with open(bmfile, 'rb') as f:
|
||||||
|
raw = f.read()
|
||||||
|
else:
|
||||||
|
saved = self.config['bookmarks_'+self.pathtoebook]
|
||||||
|
if saved:
|
||||||
|
raw = saved
|
||||||
|
if not isinstance(raw, unicode):
|
||||||
|
raw = raw.decode('utf-8')
|
||||||
|
self.parse_bookmarks(raw)
|
||||||
|
|
||||||
|
def save_bookmarks(self, bookmarks=None):
|
||||||
|
if bookmarks is None:
|
||||||
|
bookmarks = self.bookmarks
|
||||||
|
dat = self.serialize_bookmarks(bookmarks)
|
||||||
|
if os.path.splitext(self.pathtoebook)[1].lower() == '.epub' and \
|
||||||
|
os.access(self.pathtoebook, os.R_OK):
|
||||||
|
try:
|
||||||
|
zf = open(self.pathtoebook, 'r+b')
|
||||||
|
except IOError:
|
||||||
|
return
|
||||||
|
safe_replace(zf, 'META-INF/calibre_bookmarks.txt',
|
||||||
|
BytesIO(dat.encode('utf-8')),
|
||||||
|
add_missing=True)
|
||||||
|
else:
|
||||||
|
self.config['bookmarks_'+self.pathtoebook] = dat
|
||||||
|
|
||||||
|
def add_bookmark(self, bm):
|
||||||
|
self.bookmarks = [x for x in self.bookmarks if x['title'] !=
|
||||||
|
bm['title']]
|
||||||
|
self.bookmarks.append(bm)
|
||||||
|
self.save_bookmarks()
|
||||||
|
|
||||||
|
def set_bookmarks(self, bookmarks):
|
||||||
|
self.bookmarks = bookmarks
|
||||||
|
|
||||||
|
|
120
src/calibre/ebooks/oeb/iterator/spine.py
Normal file
120
src/calibre/ebooks/oeb/iterator/spine.py
Normal file
@ -0,0 +1,120 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
|
print_function)
|
||||||
|
from future_builtins import map
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import re, os
|
||||||
|
from functools import partial
|
||||||
|
from operator import attrgetter
|
||||||
|
from collections import namedtuple
|
||||||
|
|
||||||
|
from calibre import guess_type
|
||||||
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
|
|
||||||
|
def character_count(html):
|
||||||
|
''' Return the number of "significant" text characters in a HTML string. '''
|
||||||
|
count = 0
|
||||||
|
strip_space = re.compile(r'\s+')
|
||||||
|
for match in re.finditer(r'>[^<]+<', html):
|
||||||
|
count += len(strip_space.sub(' ', match.group()))-2
|
||||||
|
return count
|
||||||
|
|
||||||
|
def anchor_map(html):
|
||||||
|
''' Return map of all anchor names to their offsets in the html '''
|
||||||
|
ans = {}
|
||||||
|
for match in re.finditer(
|
||||||
|
r'''(?:id|name)\s*=\s*['"]([^'"]+)['"]''', html):
|
||||||
|
anchor = match.group(0)
|
||||||
|
ans[anchor] = ans.get(anchor, match.start())
|
||||||
|
return ans
|
||||||
|
|
||||||
|
class SpineItem(unicode):
|
||||||
|
|
||||||
|
def __new__(cls, path, mime_type=None, read_anchor_map=True,
|
||||||
|
run_char_count=True):
|
||||||
|
ppath = path.partition('#')[0]
|
||||||
|
if not os.path.exists(path) and os.path.exists(ppath):
|
||||||
|
path = ppath
|
||||||
|
obj = super(SpineItem, cls).__new__(cls, path)
|
||||||
|
with open(path, 'rb') as f:
|
||||||
|
raw = f.read()
|
||||||
|
raw, obj.encoding = xml_to_unicode(raw)
|
||||||
|
obj.character_count = character_count(raw) if run_char_count else 10000
|
||||||
|
obj.anchor_map = anchor_map(raw) if read_anchor_map else {}
|
||||||
|
obj.start_page = -1
|
||||||
|
obj.pages = -1
|
||||||
|
obj.max_page = -1
|
||||||
|
obj.index_entries = []
|
||||||
|
if mime_type is None:
|
||||||
|
mime_type = guess_type(obj)[0]
|
||||||
|
obj.mime_type = mime_type
|
||||||
|
return obj
|
||||||
|
|
||||||
|
class IndexEntry(object):
|
||||||
|
|
||||||
|
def __init__(self, spine, toc_entry, num):
|
||||||
|
self.num = num
|
||||||
|
self.text = toc_entry.text or _('Unknown')
|
||||||
|
self.key = toc_entry.abspath
|
||||||
|
self.anchor = self.start_anchor = toc_entry.fragment or None
|
||||||
|
try:
|
||||||
|
self.spine_pos = spine.index(self.key)
|
||||||
|
except ValueError:
|
||||||
|
self.spine_pos = -1
|
||||||
|
self.anchor_pos = 0
|
||||||
|
if self.spine_pos > -1:
|
||||||
|
self.anchor_pos = spine[self.spine_pos].anchor_map.get(self.anchor,
|
||||||
|
0)
|
||||||
|
|
||||||
|
self.depth = 0
|
||||||
|
p = toc_entry.parent
|
||||||
|
while p is not None:
|
||||||
|
self.depth += 1
|
||||||
|
p = p.parent
|
||||||
|
|
||||||
|
self.sort_key = (self.spine_pos, self.anchor_pos)
|
||||||
|
self.spine_count = len(spine)
|
||||||
|
|
||||||
|
def find_end(self, all_entries):
|
||||||
|
potential_enders = [i for i in all_entries if
|
||||||
|
i.depth <= self.depth and
|
||||||
|
(
|
||||||
|
(i.spine_pos == self.spine_pos and i.anchor_pos >
|
||||||
|
self.anchor_pos)
|
||||||
|
or
|
||||||
|
i.spine_pos > self.spine_pos
|
||||||
|
)]
|
||||||
|
if potential_enders:
|
||||||
|
# potential_enders is sorted by (spine_pos, anchor_pos)
|
||||||
|
end = potential_enders[0]
|
||||||
|
self.end_spine_pos = end.spine_pos
|
||||||
|
self.end_anchor = end.anchor
|
||||||
|
else:
|
||||||
|
self.end_spine_pos = self.spine_count - 1
|
||||||
|
self.end_anchor = None
|
||||||
|
|
||||||
|
def create_indexing_data(spine, toc):
|
||||||
|
if not toc: return
|
||||||
|
f = partial(IndexEntry, spine)
|
||||||
|
index_entries = list(map(f,
|
||||||
|
(t for t in toc.flat() if t is not toc),
|
||||||
|
(i-1 for i, t in enumerate(toc.flat()) if t is not toc)
|
||||||
|
))
|
||||||
|
index_entries.sort(key=attrgetter('sort_key'))
|
||||||
|
[ i.find_end(index_entries) for i in index_entries ]
|
||||||
|
|
||||||
|
ie = namedtuple('IndexEntry', 'entry start_anchor end_anchor')
|
||||||
|
|
||||||
|
for spine_pos, spine_item in enumerate(spine):
|
||||||
|
for i in index_entries:
|
||||||
|
if i.end_spine_pos < spine_pos or i.spine_pos > spine_pos:
|
||||||
|
continue # Does not touch this file
|
||||||
|
start = i.anchor if i.spine_pos == spine_pos else None
|
||||||
|
end = i.end_anchor if i.spine_pos == spine_pos else None
|
||||||
|
spine_item.index_entries.append(ie(i, start, end))
|
||||||
|
|
@ -361,9 +361,11 @@ def parse_html(data, log=None, decoder=None, preprocessor=None,
|
|||||||
# Remove any encoding-specifying <meta/> elements
|
# Remove any encoding-specifying <meta/> elements
|
||||||
for meta in META_XP(data):
|
for meta in META_XP(data):
|
||||||
meta.getparent().remove(meta)
|
meta.getparent().remove(meta)
|
||||||
etree.SubElement(head, XHTML('meta'),
|
meta = etree.SubElement(head, XHTML('meta'),
|
||||||
attrib={'http-equiv': 'Content-Type',
|
attrib={'http-equiv': 'Content-Type'})
|
||||||
'content': '%s; charset=utf-8' % XHTML_NS})
|
meta.set('content', 'text/html; charset=utf-8') # Ensure content is second
|
||||||
|
# attribute
|
||||||
|
|
||||||
# Ensure has a <body/>
|
# Ensure has a <body/>
|
||||||
if not xpath(data, '/h:html/h:body'):
|
if not xpath(data, '/h:html/h:body'):
|
||||||
body = xpath(data, '//h:body')
|
body = xpath(data, '//h:body')
|
||||||
|
@ -347,7 +347,11 @@ class Stylizer(object):
|
|||||||
style = self.flatten_style(rule.style)
|
style = self.flatten_style(rule.style)
|
||||||
self.page_rule.update(style)
|
self.page_rule.update(style)
|
||||||
elif isinstance(rule, CSSFontFaceRule):
|
elif isinstance(rule, CSSFontFaceRule):
|
||||||
self.font_face_rules.append(rule)
|
if rule.style.length > 1:
|
||||||
|
# Ignore the meaningless font face rules generated by the
|
||||||
|
# benighted MS Word that contain only a font-family declaration
|
||||||
|
# and nothing else
|
||||||
|
self.font_face_rules.append(rule)
|
||||||
return results
|
return results
|
||||||
|
|
||||||
def flatten_style(self, cssstyle):
|
def flatten_style(self, cssstyle):
|
||||||
|
@ -157,10 +157,12 @@ class CSSFlattener(object):
|
|||||||
bs = body.get('style', '').split(';')
|
bs = body.get('style', '').split(';')
|
||||||
bs.append('margin-top: 0pt')
|
bs.append('margin-top: 0pt')
|
||||||
bs.append('margin-bottom: 0pt')
|
bs.append('margin-bottom: 0pt')
|
||||||
bs.append('margin-left : %fpt'%\
|
if float(self.context.margin_left) >= 0:
|
||||||
float(self.context.margin_left))
|
bs.append('margin-left : %gpt'%\
|
||||||
bs.append('margin-right : %fpt'%\
|
float(self.context.margin_left))
|
||||||
float(self.context.margin_right))
|
if float(self.context.margin_right) >= 0:
|
||||||
|
bs.append('margin-right : %gpt'%\
|
||||||
|
float(self.context.margin_right))
|
||||||
bs.extend(['padding-left: 0pt', 'padding-right: 0pt'])
|
bs.extend(['padding-left: 0pt', 'padding-right: 0pt'])
|
||||||
if self.page_break_on_body:
|
if self.page_break_on_body:
|
||||||
bs.extend(['page-break-before: always'])
|
bs.extend(['page-break-before: always'])
|
||||||
@ -393,10 +395,11 @@ class CSSFlattener(object):
|
|||||||
l = etree.SubElement(head, XHTML('link'),
|
l = etree.SubElement(head, XHTML('link'),
|
||||||
rel='stylesheet', type=CSS_MIME, href=href)
|
rel='stylesheet', type=CSS_MIME, href=href)
|
||||||
l.tail='\n'
|
l.tail='\n'
|
||||||
href = item.relhref(global_href)
|
if global_href:
|
||||||
l = etree.SubElement(head, XHTML('link'),
|
href = item.relhref(global_href)
|
||||||
rel='stylesheet', type=CSS_MIME, href=href)
|
l = etree.SubElement(head, XHTML('link'),
|
||||||
l.tail = '\n'
|
rel='stylesheet', type=CSS_MIME, href=href)
|
||||||
|
l.tail = '\n'
|
||||||
|
|
||||||
def replace_css(self, css):
|
def replace_css(self, css):
|
||||||
manifest = self.oeb.manifest
|
manifest = self.oeb.manifest
|
||||||
@ -413,14 +416,16 @@ class CSSFlattener(object):
|
|||||||
global_css = defaultdict(list)
|
global_css = defaultdict(list)
|
||||||
for item in self.oeb.spine:
|
for item in self.oeb.spine:
|
||||||
stylizer = self.stylizers[item]
|
stylizer = self.stylizers[item]
|
||||||
stylizer.page_rule['margin-top'] = '%gpt'%\
|
if float(self.context.margin_top) >= 0:
|
||||||
float(self.context.margin_top)
|
stylizer.page_rule['margin-top'] = '%gpt'%\
|
||||||
stylizer.page_rule['margin-bottom'] = '%gpt'%\
|
float(self.context.margin_top)
|
||||||
float(self.context.margin_bottom)
|
if float(self.context.margin_bottom) >= 0:
|
||||||
|
stylizer.page_rule['margin-bottom'] = '%gpt'%\
|
||||||
|
float(self.context.margin_bottom)
|
||||||
items = stylizer.page_rule.items()
|
items = stylizer.page_rule.items()
|
||||||
items.sort()
|
items.sort()
|
||||||
css = ';\n'.join("%s: %s" % (key, val) for key, val in items)
|
css = ';\n'.join("%s: %s" % (key, val) for key, val in items)
|
||||||
css = '@page {\n%s\n}\n'%css
|
css = ('@page {\n%s\n}\n'%css) if items else ''
|
||||||
rules = [r.cssText for r in stylizer.font_face_rules]
|
rules = [r.cssText for r in stylizer.font_face_rules]
|
||||||
raw = '\n\n'.join(rules)
|
raw = '\n\n'.join(rules)
|
||||||
css += '\n\n' + raw
|
css += '\n\n' + raw
|
||||||
@ -429,9 +434,11 @@ class CSSFlattener(object):
|
|||||||
gc_map = {}
|
gc_map = {}
|
||||||
manifest = self.oeb.manifest
|
manifest = self.oeb.manifest
|
||||||
for css in global_css:
|
for css in global_css:
|
||||||
id_, href = manifest.generate('page_css', 'page_styles.css')
|
href = None
|
||||||
manifest.add(id_, href, CSS_MIME, data=cssutils.parseString(css,
|
if css.strip():
|
||||||
validate=False))
|
id_, href = manifest.generate('page_css', 'page_styles.css')
|
||||||
|
manifest.add(id_, href, CSS_MIME, data=cssutils.parseString(css,
|
||||||
|
validate=False))
|
||||||
gc_map[css] = href
|
gc_map[css] = href
|
||||||
|
|
||||||
ans = {}
|
ans = {}
|
||||||
|
@ -6,7 +6,7 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import os
|
import os, re
|
||||||
from calibre.utils.date import isoformat, now
|
from calibre.utils.date import isoformat, now
|
||||||
from calibre import guess_type
|
from calibre import guess_type
|
||||||
|
|
||||||
@ -141,7 +141,7 @@ class MergeMetadata(object):
|
|||||||
item = self.oeb.manifest.hrefs[old_cover.href]
|
item = self.oeb.manifest.hrefs[old_cover.href]
|
||||||
if not cdata:
|
if not cdata:
|
||||||
return item.id
|
return item.id
|
||||||
self.oeb.manifest.remove(item)
|
self.remove_old_cover(item)
|
||||||
elif not cdata:
|
elif not cdata:
|
||||||
id = self.oeb.manifest.generate(id='cover')
|
id = self.oeb.manifest.generate(id='cover')
|
||||||
self.oeb.manifest.add(id, old_cover.href, 'image/jpeg')
|
self.oeb.manifest.add(id, old_cover.href, 'image/jpeg')
|
||||||
@ -152,3 +152,41 @@ class MergeMetadata(object):
|
|||||||
self.oeb.guide.add('cover', 'Cover', href)
|
self.oeb.guide.add('cover', 'Cover', href)
|
||||||
return id
|
return id
|
||||||
|
|
||||||
|
def remove_old_cover(self, cover_item):
|
||||||
|
from calibre.ebooks.oeb.base import XPath
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
|
self.oeb.manifest.remove(cover_item)
|
||||||
|
|
||||||
|
# Remove any references to the cover in the HTML
|
||||||
|
affected_items = set()
|
||||||
|
for item in self.oeb.spine:
|
||||||
|
try:
|
||||||
|
images = XPath('//h:img[@src]')(item.data)
|
||||||
|
except:
|
||||||
|
images = []
|
||||||
|
removed = False
|
||||||
|
for img in images:
|
||||||
|
href = item.abshref(img.get('src'))
|
||||||
|
if href == cover_item.href:
|
||||||
|
img.getparent().remove(img)
|
||||||
|
removed = True
|
||||||
|
if removed:
|
||||||
|
affected_items.add(item)
|
||||||
|
|
||||||
|
# Check if the resulting HTML has no content, if so remove it
|
||||||
|
for item in affected_items:
|
||||||
|
body = XPath('//h:body')(item.data)
|
||||||
|
if body:
|
||||||
|
text = etree.tostring(body[0], method='text', encoding=unicode)
|
||||||
|
else:
|
||||||
|
text = ''
|
||||||
|
text = re.sub(r'\s+', '', text)
|
||||||
|
if not text and not XPath('//h:img|//svg:svg')(item.data):
|
||||||
|
self.log('Removing %s as it is a wrapper around'
|
||||||
|
' the cover image'%item.href)
|
||||||
|
self.oeb.spine.remove(item)
|
||||||
|
self.oeb.manifest.remove(item)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -372,8 +372,8 @@ class ParseRtf:
|
|||||||
old_rtf = old_rtf_obj.check_if_old_rtf()
|
old_rtf = old_rtf_obj.check_if_old_rtf()
|
||||||
if old_rtf:
|
if old_rtf:
|
||||||
if self.__run_level > 5:
|
if self.__run_level > 5:
|
||||||
msg = 'Older RTF\n'
|
msg = 'Older RTF\n' \
|
||||||
msg += 'self.__run_level is "%s"\n' % self.__run_level
|
'self.__run_level is "%s"\n' % self.__run_level
|
||||||
raise RtfInvalidCodeException, msg
|
raise RtfInvalidCodeException, msg
|
||||||
if self.__run_level > 1:
|
if self.__run_level > 1:
|
||||||
sys.stderr.write('File could be older RTF...\n')
|
sys.stderr.write('File could be older RTF...\n')
|
||||||
@ -381,7 +381,7 @@ class ParseRtf:
|
|||||||
if self.__run_level > 1:
|
if self.__run_level > 1:
|
||||||
sys.stderr.write(
|
sys.stderr.write(
|
||||||
'File also has newer RTF.\n'
|
'File also has newer RTF.\n'
|
||||||
'Will do the best to convert.\n'
|
'Will do the best to convert...\n'
|
||||||
)
|
)
|
||||||
add_brackets_obj = add_brackets.AddBrackets(
|
add_brackets_obj = add_brackets.AddBrackets(
|
||||||
in_file = self.__temp_file,
|
in_file = self.__temp_file,
|
||||||
|
@ -20,6 +20,9 @@ class AddBrackets:
|
|||||||
"""
|
"""
|
||||||
Add brackets for old RTF.
|
Add brackets for old RTF.
|
||||||
Logic:
|
Logic:
|
||||||
|
When control words without their own brackets are encountered
|
||||||
|
and in the list of allowed words, this will add brackets
|
||||||
|
to facilitate the treatment of the file
|
||||||
"""
|
"""
|
||||||
def __init__(self, in_file,
|
def __init__(self, in_file,
|
||||||
bug_handler,
|
bug_handler,
|
||||||
@ -41,53 +44,56 @@ class AddBrackets:
|
|||||||
self.__copy = copy
|
self.__copy = copy
|
||||||
self.__write_to = better_mktemp()
|
self.__write_to = better_mktemp()
|
||||||
self.__run_level = run_level
|
self.__run_level = run_level
|
||||||
|
|
||||||
def __initiate_values(self):
|
|
||||||
"""
|
|
||||||
"""
|
|
||||||
self.__state_dict = {
|
self.__state_dict = {
|
||||||
'before_body' : self.__before_body_func,
|
'before_body' : self.__before_body_func,
|
||||||
'in_body' : self.__in_body_func,
|
'in_body' : self.__in_body_func,
|
||||||
'after_control_word' : self.__after_control_word_func,
|
'after_control_word' : self.__after_control_word_func,
|
||||||
'in_ignore' : self.__ignore_func,
|
'in_ignore' : self.__ignore_func,
|
||||||
}
|
}
|
||||||
|
self.__accept = [
|
||||||
|
'cw<ci<bold______' ,
|
||||||
|
'cw<ci<annotation' ,
|
||||||
|
'cw<ci<blue______' ,
|
||||||
|
# 'cw<ci<bold______' ,
|
||||||
|
'cw<ci<caps______' ,
|
||||||
|
'cw<ci<char-style' ,
|
||||||
|
'cw<ci<dbl-strike' ,
|
||||||
|
'cw<ci<emboss____' ,
|
||||||
|
'cw<ci<engrave___' ,
|
||||||
|
'cw<ci<font-color' ,
|
||||||
|
'cw<ci<font-down_' ,
|
||||||
|
'cw<ci<font-size_' ,
|
||||||
|
'cw<ci<font-style' ,
|
||||||
|
'cw<ci<font-up___' ,
|
||||||
|
'cw<ci<footnot-mk' ,
|
||||||
|
'cw<ci<green_____' ,
|
||||||
|
'cw<ci<hidden____' ,
|
||||||
|
'cw<ci<italics___' ,
|
||||||
|
'cw<ci<outline___' ,
|
||||||
|
'cw<ci<red_______' ,
|
||||||
|
'cw<ci<shadow____' ,
|
||||||
|
'cw<ci<small-caps' ,
|
||||||
|
'cw<ci<strike-thr' ,
|
||||||
|
'cw<ci<subscript_' ,
|
||||||
|
'cw<ci<superscrip' ,
|
||||||
|
'cw<ci<underlined' ,
|
||||||
|
# 'cw<ul<underlined' ,
|
||||||
|
]
|
||||||
|
|
||||||
|
def __initiate_values(self):
|
||||||
|
"""
|
||||||
|
Init temp values
|
||||||
|
"""
|
||||||
self.__state = 'before_body'
|
self.__state = 'before_body'
|
||||||
self.__inline = {}
|
self.__inline = {}
|
||||||
self.__temp_group = []
|
self.__temp_group = []
|
||||||
self.__open_bracket = 0
|
self.__open_bracket = False
|
||||||
self.__found_brackets = 0
|
self.__found_brackets = False
|
||||||
self.__accept = [
|
|
||||||
'cw<ci<bold______',
|
|
||||||
'cw<ci<annotation' ,
|
|
||||||
'cw<ci<blue______' ,
|
|
||||||
'cw<ci<bold______' ,
|
|
||||||
'cw<ci<caps______' ,
|
|
||||||
'cw<ci<char-style' ,
|
|
||||||
'cw<ci<dbl-strike' ,
|
|
||||||
'cw<ci<emboss____' ,
|
|
||||||
'cw<ci<engrave___' ,
|
|
||||||
'cw<ci<font-color' ,
|
|
||||||
'cw<ci<font-down_' ,
|
|
||||||
'cw<ci<font-size_' ,
|
|
||||||
'cw<ci<font-style' ,
|
|
||||||
'cw<ci<font-up___',
|
|
||||||
'cw<ci<footnot-mk',
|
|
||||||
'cw<ci<green_____' ,
|
|
||||||
'cw<ci<hidden____',
|
|
||||||
'cw<ci<italics___' ,
|
|
||||||
'cw<ci<outline___',
|
|
||||||
'cw<ci<red_______' ,
|
|
||||||
'cw<ci<shadow____',
|
|
||||||
'cw<ci<small-caps' ,
|
|
||||||
'cw<ci<strike-thr',
|
|
||||||
'cw<ci<subscript_' ,
|
|
||||||
'cw<ci<superscrip',
|
|
||||||
'cw<ci<underlined' ,
|
|
||||||
# 'cw<ul<underlined' ,
|
|
||||||
]
|
|
||||||
|
|
||||||
def __before_body_func(self, line):
|
def __before_body_func(self, line):
|
||||||
"""
|
"""
|
||||||
|
If we are before the body, not interest in changing anything
|
||||||
"""
|
"""
|
||||||
if self.__token_info == 'mi<mk<body-open_':
|
if self.__token_info == 'mi<mk<body-open_':
|
||||||
self.__state = 'in_body'
|
self.__state = 'in_body'
|
||||||
@ -95,6 +101,14 @@ class AddBrackets:
|
|||||||
|
|
||||||
def __in_body_func(self, line):
|
def __in_body_func(self, line):
|
||||||
"""
|
"""
|
||||||
|
Select what action to take in body:
|
||||||
|
1-At the end of the file close the braket if a bracket was opened
|
||||||
|
This happens if there is achange
|
||||||
|
2-If an open bracket is found the code inside is ignore
|
||||||
|
(written without modifications)
|
||||||
|
3-If an accepted control word is found put the line
|
||||||
|
in a buffer then chage state to after cw
|
||||||
|
4-Else simply write the line
|
||||||
"""
|
"""
|
||||||
if line == 'cb<nu<clos-brack<0001\n' and self.__open_bracket:
|
if line == 'cb<nu<clos-brack<0001\n' and self.__open_bracket:
|
||||||
self.__write_obj.write(
|
self.__write_obj.write(
|
||||||
@ -102,7 +116,7 @@ class AddBrackets:
|
|||||||
)
|
)
|
||||||
self.__write_obj.write(line)
|
self.__write_obj.write(line)
|
||||||
elif self.__token_info == 'ob<nu<open-brack':
|
elif self.__token_info == 'ob<nu<open-brack':
|
||||||
self.__found_brackets = 1
|
self.__found_brackets = True
|
||||||
self.__state = 'in_ignore'
|
self.__state = 'in_ignore'
|
||||||
self.__ignore_count = self.__ob_count
|
self.__ignore_count = self.__ob_count
|
||||||
self.__write_obj.write(line)
|
self.__write_obj.write(line)
|
||||||
@ -114,6 +128,10 @@ class AddBrackets:
|
|||||||
|
|
||||||
def __after_control_word_func(self, line):
|
def __after_control_word_func(self, line):
|
||||||
"""
|
"""
|
||||||
|
After a cw either add next allowed cw to temporary list or
|
||||||
|
change groupe and write it.
|
||||||
|
If the token leading to an exit is an open bracket go to
|
||||||
|
ignore otherwise goto in body
|
||||||
"""
|
"""
|
||||||
if self.__token_info in self.__accept:
|
if self.__token_info in self.__accept:
|
||||||
self.__temp_group.append(line)
|
self.__temp_group.append(line)
|
||||||
@ -129,82 +147,84 @@ class AddBrackets:
|
|||||||
|
|
||||||
def __write_group(self):
|
def __write_group(self):
|
||||||
"""
|
"""
|
||||||
|
Write a tempory group after accepted control words end
|
||||||
|
But this is mostly useless in my opinion as there is no list of rejected cw
|
||||||
|
This may be a way to implement future old rtf processing for cw
|
||||||
|
Utility: open a group to just put brackets but why be so complicated?
|
||||||
|
Scheme: open brackets, write cw then go to body and back with cw after
|
||||||
"""
|
"""
|
||||||
if self.__open_bracket:
|
if self.__open_bracket:
|
||||||
self.__write_obj.write(
|
self.__write_obj.write(
|
||||||
'cb<nu<clos-brack<0003\n'
|
'cb<nu<clos-brack<0003\n'
|
||||||
)
|
)
|
||||||
self.__open_bracket = 0
|
self.__open_bracket = False
|
||||||
inline_string = ''
|
|
||||||
the_keys = self.__inline.keys()
|
inline_string = ''.join(['%s<nu<%s\n' % (k, v) \
|
||||||
for the_key in the_keys:
|
for k, v in self.__inline.iteritems() \
|
||||||
value = self.__inline[the_key]
|
if v != 'false'])
|
||||||
if value != 'false':
|
|
||||||
inline_string += '%s<nu<%s\n' % (the_key, value)
|
|
||||||
if inline_string:
|
if inline_string:
|
||||||
self.__write_obj.write('ob<nu<open-brack<0003\n')
|
self.__write_obj.write('ob<nu<open-brack<0003\n'
|
||||||
self.__write_obj.write(inline_string)
|
'%s' % inline_string)
|
||||||
self.__open_bracket = 1
|
self.__open_bracket = True
|
||||||
self.__temp_group = []
|
self.__temp_group = []
|
||||||
|
|
||||||
def __change_permanent_group(self):
|
def __change_permanent_group(self):
|
||||||
"""
|
"""
|
||||||
use temp group to change permanent group
|
Use temp group to change permanent group
|
||||||
|
If the control word is not accepted remove it
|
||||||
|
What is the interest as it is build to accept only accepted cw
|
||||||
|
in __after_control_word_func?
|
||||||
"""
|
"""
|
||||||
for line in self.__temp_group:
|
self.__inline = {line[:16] : line[20:-1]\
|
||||||
token_info = line[:16]
|
for line in self.__temp_group\
|
||||||
if token_info in self.__accept:
|
# Is this really necessary?
|
||||||
att = line[20:-1]
|
if line[:16] in self.__accept}
|
||||||
self.__inline[token_info] = att
|
|
||||||
|
|
||||||
def __ignore_func(self, line):
|
def __ignore_func(self, line):
|
||||||
"""
|
"""
|
||||||
Don't add any brackets while inside of brackets RTF has already
|
Just copy data inside of RTF brackets already here.
|
||||||
added.
|
|
||||||
"""
|
"""
|
||||||
self.__write_obj.write(line)
|
self.__write_obj.write(line)
|
||||||
if self.__token_info == 'cb<nu<clos-brack'and\
|
if self.__token_info == 'cb<nu<clos-brack'\
|
||||||
self.__cb_count == self.__ignore_count:
|
and self.__cb_count == self.__ignore_count:
|
||||||
self.__state = 'in_body'
|
self.__state = 'in_body'
|
||||||
|
|
||||||
def __check_brackets(self, in_file):
|
def __check_brackets(self, in_file):
|
||||||
self.__check_brack_obj = check_brackets.CheckBrackets\
|
"""
|
||||||
|
Return True if brackets match
|
||||||
|
"""
|
||||||
|
check_brack_obj = check_brackets.CheckBrackets\
|
||||||
(file = in_file)
|
(file = in_file)
|
||||||
good_br = self.__check_brack_obj.check_brackets()[0]
|
return check_brack_obj.check_brackets()[0]
|
||||||
if not good_br:
|
|
||||||
return 1
|
|
||||||
|
|
||||||
def add_brackets(self):
|
def add_brackets(self):
|
||||||
"""
|
"""
|
||||||
"""
|
"""
|
||||||
self.__initiate_values()
|
self.__initiate_values()
|
||||||
read_obj = open(self.__file, 'r')
|
with open(self.__file, 'r') as read_obj:
|
||||||
self.__write_obj = open(self.__write_to, 'w')
|
with open(self.__write_to, 'w') as self.__write_obj:
|
||||||
line_to_read = 1
|
for line in read_obj:
|
||||||
while line_to_read:
|
self.__token_info = line[:16]
|
||||||
line_to_read = read_obj.readline()
|
if self.__token_info == 'ob<nu<open-brack':
|
||||||
line = line_to_read
|
self.__ob_count = line[-5:-1]
|
||||||
self.__token_info = line[:16]
|
if self.__token_info == 'cb<nu<clos-brack':
|
||||||
if self.__token_info == 'ob<nu<open-brack':
|
self.__cb_count = line[-5:-1]
|
||||||
self.__ob_count = line[-5:-1]
|
action = self.__state_dict.get(self.__state)
|
||||||
if self.__token_info == 'cb<nu<clos-brack':
|
if action is None:
|
||||||
self.__cb_count = line[-5:-1]
|
sys.stderr.write(
|
||||||
action = self.__state_dict.get(self.__state)
|
'No matching state in module add_brackets.py\n'
|
||||||
if action == None:
|
'%s\n' % self.__state)
|
||||||
sys.stderr.write('No matching state in module add_brackets.py\n')
|
action(line)
|
||||||
sys.stderr.write(self.__state + '\n')
|
#Check bad brackets
|
||||||
action(line)
|
if self.__check_brackets(self.__write_to):
|
||||||
read_obj.close()
|
|
||||||
self.__write_obj.close()
|
|
||||||
bad_brackets = self.__check_brackets(self.__write_to)
|
|
||||||
if not bad_brackets:
|
|
||||||
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
||||||
if self.__copy:
|
if self.__copy:
|
||||||
copy_obj.copy_file(self.__write_to, "add_brackets.data")
|
copy_obj.copy_file(self.__write_to, "add_brackets.data")
|
||||||
copy_obj.rename(self.__write_to, self.__file)
|
copy_obj.rename(self.__write_to, self.__file)
|
||||||
else:
|
else:
|
||||||
if self.__run_level > 0:
|
if self.__run_level > 0:
|
||||||
sys.stderr.write(
|
sys.stderr.write(
|
||||||
'Sorry, but this files has a mix of old and new RTF.\n'
|
'Sorry, but this files has a mix of old and new RTF.\n'
|
||||||
'Some characteristics cannot be converted.\n')
|
'Some characteristics cannot be converted.\n')
|
||||||
os.remove(self.__write_to)
|
os.remove(self.__write_to)
|
File diff suppressed because it is too large
Load Diff
@ -1,4 +1,5 @@
|
|||||||
import os, sys
|
import os, sys
|
||||||
|
from codecs import EncodedFile
|
||||||
|
|
||||||
from calibre.ebooks.rtf2xml import copy, check_encoding
|
from calibre.ebooks.rtf2xml import copy, check_encoding
|
||||||
from calibre.ptempfile import better_mktemp
|
from calibre.ptempfile import better_mktemp
|
||||||
@ -41,6 +42,7 @@ class ConvertToTags:
|
|||||||
self.__run_level = run_level
|
self.__run_level = run_level
|
||||||
self.__write_to = better_mktemp()
|
self.__write_to = better_mktemp()
|
||||||
self.__convert_utf = False
|
self.__convert_utf = False
|
||||||
|
self.__bad_encoding = False
|
||||||
|
|
||||||
def __initiate_values(self):
|
def __initiate_values(self):
|
||||||
"""
|
"""
|
||||||
@ -213,13 +215,14 @@ class ConvertToTags:
|
|||||||
|
|
||||||
if not check_encoding_obj.check_encoding(self.__file, verbose=False):
|
if not check_encoding_obj.check_encoding(self.__file, verbose=False):
|
||||||
self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
|
self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
|
||||||
elif not check_encoding_obj.check_encoding(self.__file, self.__encoding):
|
elif not check_encoding_obj.check_encoding(self.__file, self.__encoding, verbose=False):
|
||||||
self.__write_obj.write('<?xml version="1.0" encoding="UTF-8" ?>')
|
self.__write_obj.write('<?xml version="1.0" encoding="UTF-8" ?>')
|
||||||
self.__convert_utf = True
|
self.__convert_utf = True
|
||||||
else:
|
else:
|
||||||
self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
|
self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
|
||||||
sys.stderr.write('Bad RTF encoding, revert to US-ASCII chars and'
|
sys.stderr.write('Bad RTF encoding, revert to US-ASCII chars and'
|
||||||
' hope for the best')
|
' hope for the best')
|
||||||
|
self.__bad_encoding = True
|
||||||
self.__new_line = 0
|
self.__new_line = 0
|
||||||
self.__write_new_line()
|
self.__write_new_line()
|
||||||
if self.__no_dtd:
|
if self.__no_dtd:
|
||||||
@ -247,7 +250,7 @@ class ConvertToTags:
|
|||||||
the appropriate function.
|
the appropriate function.
|
||||||
The functions that are called:
|
The functions that are called:
|
||||||
a text function for text
|
a text function for text
|
||||||
an open funciton for open tags
|
an open function for open tags
|
||||||
an open with attribute function for tags with attributes
|
an open with attribute function for tags with attributes
|
||||||
an empty with attribute function for tags that are empty but have
|
an empty with attribute function for tags that are empty but have
|
||||||
attribtes.
|
attribtes.
|
||||||
@ -263,20 +266,19 @@ class ConvertToTags:
|
|||||||
action = self.__state_dict.get(self.__token_info)
|
action = self.__state_dict.get(self.__token_info)
|
||||||
if action is not None:
|
if action is not None:
|
||||||
action(line)
|
action(line)
|
||||||
self.__write_obj.close()
|
#convert all encodings to UTF8 or ASCII to avoid unsupported encodings in lxml
|
||||||
#convert all encodings to UTF8 to avoid unsupported encodings in lxml
|
if self.__convert_utf or self.__bad_encoding:
|
||||||
if self.__convert_utf:
|
|
||||||
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
||||||
copy_obj.rename(self.__write_to, self.__file)
|
copy_obj.rename(self.__write_to, self.__file)
|
||||||
|
file_encoding = "utf-8"
|
||||||
|
if self.__bad_encoding:
|
||||||
|
file_encoding = "us-ascii"
|
||||||
with open(self.__file, 'r') as read_obj:
|
with open(self.__file, 'r') as read_obj:
|
||||||
with open(self.__write_to, 'w') as write_obj:
|
with open(self.__write_to, 'w') as write_obj:
|
||||||
file = read_obj.read()
|
write_objenc = EncodedFile(write_obj, self.__encoding,
|
||||||
try:
|
file_encoding, 'replace')
|
||||||
file = file.decode(self.__encoding)
|
for line in read_obj:
|
||||||
write_obj.write(file.encode('utf-8'))
|
write_objenc.write(line)
|
||||||
except:
|
|
||||||
sys.stderr.write('Conversion to UTF-8 is not possible,'
|
|
||||||
' encoding should be very carefully checked')
|
|
||||||
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
||||||
if self.__copy:
|
if self.__copy:
|
||||||
copy_obj.copy_file(self.__write_to, "convert_to_tags.data")
|
copy_obj.copy_file(self.__write_to, "convert_to_tags.data")
|
||||||
|
@ -11,6 +11,7 @@
|
|||||||
# #
|
# #
|
||||||
#########################################################################
|
#########################################################################
|
||||||
import sys, os
|
import sys, os
|
||||||
|
|
||||||
from calibre.ebooks.rtf2xml import copy
|
from calibre.ebooks.rtf2xml import copy
|
||||||
from calibre.ptempfile import better_mktemp
|
from calibre.ptempfile import better_mktemp
|
||||||
|
|
||||||
@ -31,29 +32,29 @@ class Header:
|
|||||||
self.__bug_handler = bug_handler
|
self.__bug_handler = bug_handler
|
||||||
self.__copy = copy
|
self.__copy = copy
|
||||||
self.__write_to = better_mktemp()
|
self.__write_to = better_mktemp()
|
||||||
self.__found_a_header = 0
|
self.__found_a_header = False
|
||||||
|
|
||||||
def __in_header_func(self, line):
|
def __in_header_func(self, line):
|
||||||
"""
|
"""
|
||||||
Handle all tokens that are part of header
|
Handle all tokens that are part of header
|
||||||
"""
|
"""
|
||||||
if self.__cb_count == self.__header_bracket_count:
|
if self.__cb_count == self.__header_bracket_count:
|
||||||
self.__in_header = 0
|
self.__in_header = False
|
||||||
self.__write_obj.write(line)
|
self.__write_obj.write(line)
|
||||||
self.__write_to_head_obj.write(
|
self.__write_to_head_obj.write(
|
||||||
'mi<mk<head___clo\n')
|
'mi<mk<head___clo\n' \
|
||||||
self.__write_to_head_obj.write(
|
'mi<tg<close_____<header-or-footer\n' \
|
||||||
'mi<tg<close_____<header-or-footer\n')
|
|
||||||
self.__write_to_head_obj.write(
|
|
||||||
'mi<mk<header-clo\n')
|
'mi<mk<header-clo\n')
|
||||||
else:
|
else:
|
||||||
self.__write_to_head_obj.write(line)
|
self.__write_to_head_obj.write(line)
|
||||||
|
|
||||||
def __found_header(self, line):
|
def __found_header(self, line):
|
||||||
"""
|
"""
|
||||||
Found a header
|
Found a header
|
||||||
"""
|
"""
|
||||||
# but this could be header or footer
|
# but this could be header or footer
|
||||||
self.__found_a_header = 1
|
self.__found_a_header = True
|
||||||
self.__in_header = 1
|
self.__in_header = True
|
||||||
self.__header_count += 1
|
self.__header_count += 1
|
||||||
# temporarily set this to zero so I can enter loop
|
# temporarily set this to zero so I can enter loop
|
||||||
self.__cb_count = 0
|
self.__cb_count = 0
|
||||||
@ -69,18 +70,23 @@ class Header:
|
|||||||
'mi<tg<open-att__<header-or-footer<type>%s\n' % (type)
|
'mi<tg<open-att__<header-or-footer<type>%s\n' % (type)
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
sys.stderr.write('module is header\n')
|
sys.stderr.write(
|
||||||
sys.stderr.write('method is __found_header\n')
|
'module is header\n' \
|
||||||
sys.stderr.write('no dict entry\n')
|
'method is __found_header\n' \
|
||||||
sys.stderr.write('line is %s' % line)
|
'no dict entry\n' \
|
||||||
|
'line is %s' % line)
|
||||||
self.__write_to_head_obj.write(
|
self.__write_to_head_obj.write(
|
||||||
'mi<tg<open-att__<header-or-footer<type>none\n'
|
'mi<tg<open-att__<header-or-footer<type>none\n'
|
||||||
)
|
)
|
||||||
|
|
||||||
def __default_sep(self, line):
|
def __default_sep(self, line):
|
||||||
"""Handle all tokens that are not header tokens"""
|
"""
|
||||||
|
Handle all tokens that are not header tokens
|
||||||
|
"""
|
||||||
if self.__token_info[3:5] == 'hf':
|
if self.__token_info[3:5] == 'hf':
|
||||||
self.__found_header(line)
|
self.__found_header(line)
|
||||||
self.__write_obj.write(line)
|
self.__write_obj.write(line)
|
||||||
|
|
||||||
def __initiate_sep_values(self):
|
def __initiate_sep_values(self):
|
||||||
"""
|
"""
|
||||||
initiate counters for separate_footnotes method.
|
initiate counters for separate_footnotes method.
|
||||||
@ -89,7 +95,7 @@ class Header:
|
|||||||
self.__ob_count = 0
|
self.__ob_count = 0
|
||||||
self.__cb_count = 0
|
self.__cb_count = 0
|
||||||
self.__header_bracket_count = 0
|
self.__header_bracket_count = 0
|
||||||
self.__in_header = 0
|
self.__in_header = False
|
||||||
self.__header_count = 0
|
self.__header_count = 0
|
||||||
self.__head_dict = {
|
self.__head_dict = {
|
||||||
'head-left_' : ('header-left'),
|
'head-left_' : ('header-left'),
|
||||||
@ -101,6 +107,7 @@ class Header:
|
|||||||
'header____' : ('header' ),
|
'header____' : ('header' ),
|
||||||
'footer____' : ('footer' ),
|
'footer____' : ('footer' ),
|
||||||
}
|
}
|
||||||
|
|
||||||
def separate_headers(self):
|
def separate_headers(self):
|
||||||
"""
|
"""
|
||||||
Separate all the footnotes in an RTF file and put them at the bottom,
|
Separate all the footnotes in an RTF file and put them at the bottom,
|
||||||
@ -110,53 +117,47 @@ class Header:
|
|||||||
bottom of the main file.
|
bottom of the main file.
|
||||||
"""
|
"""
|
||||||
self.__initiate_sep_values()
|
self.__initiate_sep_values()
|
||||||
read_obj = open(self.__file)
|
|
||||||
self.__write_obj = open(self.__write_to, 'w')
|
|
||||||
self.__header_holder = better_mktemp()
|
self.__header_holder = better_mktemp()
|
||||||
self.__write_to_head_obj = open(self.__header_holder, 'w')
|
with open(self.__file) as read_obj:
|
||||||
line_to_read = 1
|
with open(self.__write_to, 'w') as self.__write_obj:
|
||||||
while line_to_read:
|
with open(self.__header_holder, 'w') as self.__write_to_head_obj:
|
||||||
line_to_read = read_obj.readline()
|
for line in read_obj:
|
||||||
line = line_to_read
|
self.__token_info = line[:16]
|
||||||
self.__token_info = line[:16]
|
# keep track of opening and closing brackets
|
||||||
# keep track of opening and closing brackets
|
if self.__token_info == 'ob<nu<open-brack':
|
||||||
if self.__token_info == 'ob<nu<open-brack':
|
self.__ob_count = line[-5:-1]
|
||||||
self.__ob_count = line[-5:-1]
|
if self.__token_info == 'cb<nu<clos-brack':
|
||||||
if self.__token_info == 'cb<nu<clos-brack':
|
self.__cb_count = line[-5:-1]
|
||||||
self.__cb_count = line[-5:-1]
|
# In the middle of footnote text
|
||||||
# In the middle of footnote text
|
if self.__in_header:
|
||||||
if self.__in_header:
|
self.__in_header_func(line)
|
||||||
self.__in_header_func(line)
|
# not in the middle of footnote text
|
||||||
# not in the middle of footnote text
|
else:
|
||||||
else:
|
self.__default_sep(line)
|
||||||
self.__default_sep(line)
|
|
||||||
self.__write_obj.close()
|
with open(self.__header_holder, 'r') as read_obj:
|
||||||
read_obj.close()
|
with open(self.__write_to, 'a') as write_obj:
|
||||||
self.__write_to_head_obj.close()
|
write_obj.write(
|
||||||
read_obj = open(self.__header_holder, 'r')
|
'mi<mk<header-beg\n')
|
||||||
write_obj = open(self.__write_to, 'a')
|
for line in read_obj:
|
||||||
write_obj.write(
|
write_obj.write(line)
|
||||||
'mi<mk<header-beg\n')
|
write_obj.write(
|
||||||
line = 1
|
'mi<mk<header-end\n')
|
||||||
while line:
|
|
||||||
line = read_obj.readline()
|
|
||||||
write_obj.write(line)
|
|
||||||
write_obj.write(
|
|
||||||
'mi<mk<header-end\n')
|
|
||||||
read_obj.close()
|
|
||||||
write_obj.close()
|
|
||||||
os.remove(self.__header_holder)
|
os.remove(self.__header_holder)
|
||||||
|
|
||||||
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
||||||
if self.__copy:
|
if self.__copy:
|
||||||
copy_obj.copy_file(self.__write_to, "header_separate.info")
|
copy_obj.copy_file(self.__write_to, "header_separate.data")
|
||||||
copy_obj.rename(self.__write_to, self.__file)
|
copy_obj.rename(self.__write_to, self.__file)
|
||||||
os.remove(self.__write_to)
|
os.remove(self.__write_to)
|
||||||
|
|
||||||
def update_info(self, file, copy):
|
def update_info(self, file, copy):
|
||||||
"""
|
"""
|
||||||
Unused method
|
Unused method
|
||||||
"""
|
"""
|
||||||
self.__file = file
|
self.__file = file
|
||||||
self.__copy = copy
|
self.__copy = copy
|
||||||
|
|
||||||
def __get_head_body_func(self, line):
|
def __get_head_body_func(self, line):
|
||||||
"""
|
"""
|
||||||
Process lines in main body and look for beginning of headers.
|
Process lines in main body and look for beginning of headers.
|
||||||
@ -166,6 +167,7 @@ class Header:
|
|||||||
self.__state = 'head'
|
self.__state = 'head'
|
||||||
else:
|
else:
|
||||||
self.__write_obj.write(line)
|
self.__write_obj.write(line)
|
||||||
|
|
||||||
def __get_head_head_func(self, line):
|
def __get_head_head_func(self, line):
|
||||||
"""
|
"""
|
||||||
Copy headers and footers from bottom of file to a separate, temporary file.
|
Copy headers and footers from bottom of file to a separate, temporary file.
|
||||||
@ -174,6 +176,7 @@ class Header:
|
|||||||
self.__state = 'body'
|
self.__state = 'body'
|
||||||
else:
|
else:
|
||||||
self.__write_to_head_obj.write(line)
|
self.__write_to_head_obj.write(line)
|
||||||
|
|
||||||
def __get_headers(self):
|
def __get_headers(self):
|
||||||
"""
|
"""
|
||||||
Private method to remove footnotes from main file. Read one line from
|
Private method to remove footnotes from main file. Read one line from
|
||||||
@ -182,21 +185,16 @@ class Header:
|
|||||||
These two functions do the work of separating the footnotes form the
|
These two functions do the work of separating the footnotes form the
|
||||||
body.
|
body.
|
||||||
"""
|
"""
|
||||||
read_obj = open(self.__file)
|
with open(self.__file) as read_obj:
|
||||||
self.__write_obj = open(self.__write_to, 'w')
|
with open(self.__write_to, 'w') as self.__write_obj:
|
||||||
# self.__write_to = "footnote_info.data"
|
with open(self.__header_holder, 'w') as self.__write_to_head_obj:
|
||||||
self.__write_to_head_obj = open(self.__header_holder, 'w')
|
for line in read_obj:
|
||||||
line = 1
|
self.__token_info = line[:16]
|
||||||
while line:
|
if self.__state == 'body':
|
||||||
line = read_obj.readline()
|
self.__get_head_body_func(line)
|
||||||
self.__token_info = line[:16]
|
elif self.__state == 'head':
|
||||||
if self.__state == 'body':
|
self.__get_head_head_func(line)
|
||||||
self.__get_head_body_func(line)
|
|
||||||
elif self.__state == 'head':
|
|
||||||
self.__get_head_head_func(line)
|
|
||||||
read_obj.close()
|
|
||||||
self.__write_obj.close()
|
|
||||||
self.__write_to_head_obj.close()
|
|
||||||
def __get_head_from_temp(self, num):
|
def __get_head_from_temp(self, num):
|
||||||
"""
|
"""
|
||||||
Private method for joining headers and footers to body. This method
|
Private method for joining headers and footers to body. This method
|
||||||
@ -205,18 +203,17 @@ class Header:
|
|||||||
returns them as a string.
|
returns them as a string.
|
||||||
"""
|
"""
|
||||||
look_for = 'mi<mk<header-ope<' + num + '\n'
|
look_for = 'mi<mk<header-ope<' + num + '\n'
|
||||||
found_head = 0
|
found_head = False
|
||||||
string_to_return = ''
|
string_to_return = ''
|
||||||
line = 1
|
for line in self.__read_from_head_obj:
|
||||||
while line:
|
|
||||||
line = self.__read_from_head_obj.readline()
|
|
||||||
if found_head:
|
if found_head:
|
||||||
if line == 'mi<mk<header-clo\n':
|
if line == 'mi<mk<header-clo\n':
|
||||||
return string_to_return
|
return string_to_return
|
||||||
string_to_return = string_to_return + line
|
string_to_return += line
|
||||||
else:
|
else:
|
||||||
if line == look_for:
|
if line == look_for:
|
||||||
found_head = 1
|
found_head = True
|
||||||
|
|
||||||
def __join_from_temp(self):
|
def __join_from_temp(self):
|
||||||
"""
|
"""
|
||||||
Private method for rejoining footnotes to body. Read from the
|
Private method for rejoining footnotes to body. Read from the
|
||||||
@ -227,15 +224,13 @@ class Header:
|
|||||||
If no footnote marker is found, simply print out the token (line).
|
If no footnote marker is found, simply print out the token (line).
|
||||||
"""
|
"""
|
||||||
self.__read_from_head_obj = open(self.__header_holder, 'r')
|
self.__read_from_head_obj = open(self.__header_holder, 'r')
|
||||||
read_obj = open(self.__write_to, 'r')
|
|
||||||
self.__write_obj = open(self.__write_to2, 'w')
|
self.__write_obj = open(self.__write_to2, 'w')
|
||||||
line = 1
|
with open(self.__write_to, 'r') as read_obj:
|
||||||
while line:
|
for line in read_obj:
|
||||||
line = read_obj.readline()
|
if line[:16] == 'mi<mk<header-ind':
|
||||||
if line[:16] == 'mi<mk<header-ind':
|
line = self.__get_head_from_temp(line[17:-1])
|
||||||
line = self.__get_head_from_temp(line[17:-1])
|
self.__write_obj.write(line)
|
||||||
self.__write_obj.write(line)
|
|
||||||
read_obj.close()
|
|
||||||
def join_headers(self):
|
def join_headers(self):
|
||||||
"""
|
"""
|
||||||
Join the footnotes from the bottom of the file and put them in their
|
Join the footnotes from the bottom of the file and put them in their
|
||||||
|
@ -181,7 +181,7 @@ class Hex2Utf8:
|
|||||||
self.__dingbats_dict.update(dingbats_base_dict)
|
self.__dingbats_dict.update(dingbats_base_dict)
|
||||||
self.__dingbats_dict.update(ms_dingbats_dict)
|
self.__dingbats_dict.update(ms_dingbats_dict)
|
||||||
# load dictionary for caps, and make a string for the replacement
|
# load dictionary for caps, and make a string for the replacement
|
||||||
self.__caps_uni_dict = char_map_obj.get_char_map(map='caps_uni')
|
self.__caps_uni_dict = char_map_obj.get_char_map(map = 'caps_uni')
|
||||||
# # print self.__caps_uni_dict
|
# # print self.__caps_uni_dict
|
||||||
# don't think I'll need this
|
# don't think I'll need this
|
||||||
##keys = self.__caps_uni_dict.keys()
|
##keys = self.__caps_uni_dict.keys()
|
||||||
|
@ -11,14 +11,18 @@
|
|||||||
# #
|
# #
|
||||||
#########################################################################
|
#########################################################################
|
||||||
import sys
|
import sys
|
||||||
"""
|
|
||||||
"""
|
|
||||||
class OldRtf:
|
class OldRtf:
|
||||||
"""
|
"""
|
||||||
Check to see if the RTF is an older version
|
Check to see if the RTF is an older version
|
||||||
Logic:
|
Logic:
|
||||||
|
If allowable control word/properties happen in text without being enclosed
|
||||||
|
in brackets the file will be considered old rtf
|
||||||
"""
|
"""
|
||||||
def __init__(self, in_file, bug_handler, run_level ):
|
def __init__(self, in_file,
|
||||||
|
bug_handler,
|
||||||
|
run_level,
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
Required:
|
Required:
|
||||||
'file'--file to parse
|
'file'--file to parse
|
||||||
@ -32,46 +36,46 @@ class OldRtf:
|
|||||||
"""
|
"""
|
||||||
self.__file = in_file
|
self.__file = in_file
|
||||||
self.__bug_handler = bug_handler
|
self.__bug_handler = bug_handler
|
||||||
self.__initiate_values()
|
self.__run_level = run_level
|
||||||
self.__ob_group = 0
|
|
||||||
def __initiate_values(self):
|
|
||||||
self.__previous_token = ''
|
|
||||||
self.__new_found = 0
|
|
||||||
self.__allowable = [
|
self.__allowable = [
|
||||||
'annotation' ,
|
'annotation' ,
|
||||||
'blue______' ,
|
'blue______' ,
|
||||||
'bold______',
|
'bold______',
|
||||||
'caps______',
|
'caps______',
|
||||||
'char-style' ,
|
'char-style' ,
|
||||||
'dbl-strike' ,
|
'dbl-strike' ,
|
||||||
'emboss____',
|
'emboss____',
|
||||||
'engrave___' ,
|
'engrave___' ,
|
||||||
'font-color',
|
'font-color',
|
||||||
'font-down_' ,
|
'font-down_' ,
|
||||||
'font-size_',
|
'font-size_',
|
||||||
'font-style',
|
'font-style',
|
||||||
'font-up___',
|
'font-up___',
|
||||||
'footnot-mk' ,
|
'footnot-mk' ,
|
||||||
'green_____' ,
|
'green_____' ,
|
||||||
'hidden____',
|
'hidden____',
|
||||||
'italics___',
|
'italics___',
|
||||||
'outline___',
|
'outline___',
|
||||||
'red_______',
|
'red_______',
|
||||||
'shadow____' ,
|
'shadow____' ,
|
||||||
'small-caps',
|
'small-caps',
|
||||||
'strike-thr',
|
'strike-thr',
|
||||||
'subscript_',
|
'subscript_',
|
||||||
'superscrip' ,
|
'superscrip' ,
|
||||||
'underlined' ,
|
'underlined' ,
|
||||||
]
|
]
|
||||||
self.__state = 'before_body'
|
|
||||||
self.__action_dict = {
|
self.__action_dict = {
|
||||||
'before_body' : self.__before_body_func,
|
'before_body' : self.__before_body_func,
|
||||||
'in_body' : self.__check_tokens_func,
|
'in_body' : self.__check_tokens_func,
|
||||||
'after_pard' : self.__after_pard_func,
|
'after_pard' : self.__after_pard_func,
|
||||||
}
|
}
|
||||||
self.__is_old = 0
|
|
||||||
|
def __initiate_values(self):
|
||||||
|
self.__previous_token = ''
|
||||||
|
self.__state = 'before_body'
|
||||||
self.__found_new = 0
|
self.__found_new = 0
|
||||||
|
self.__ob_group = 0
|
||||||
|
|
||||||
def __check_tokens_func(self, line):
|
def __check_tokens_func(self, line):
|
||||||
if self.__inline_info in self.__allowable:
|
if self.__inline_info in self.__allowable:
|
||||||
if self.__ob_group == self.__base_ob_count:
|
if self.__ob_group == self.__base_ob_count:
|
||||||
@ -80,48 +84,56 @@ class OldRtf:
|
|||||||
self.__found_new += 1
|
self.__found_new += 1
|
||||||
elif self.__token_info == 'cw<pf<par-def___':
|
elif self.__token_info == 'cw<pf<par-def___':
|
||||||
self.__state = 'after_pard'
|
self.__state = 'after_pard'
|
||||||
|
|
||||||
def __before_body_func(self, line):
|
def __before_body_func(self, line):
|
||||||
if self.__token_info == 'mi<mk<body-open_':
|
if self.__token_info == 'mi<mk<body-open_':
|
||||||
self.__state = 'in_body'
|
self.__state = 'in_body'
|
||||||
self.__base_ob_count = self.__ob_group
|
self.__base_ob_count = self.__ob_group
|
||||||
|
|
||||||
def __after_pard_func(self, line):
|
def __after_pard_func(self, line):
|
||||||
if line[0:2] != 'cw':
|
if line[0:2] != 'cw':
|
||||||
self.__state = 'in_body'
|
self.__state = 'in_body'
|
||||||
|
|
||||||
def check_if_old_rtf(self):
|
def check_if_old_rtf(self):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
nothing
|
nothing
|
||||||
Returns:
|
Returns:
|
||||||
1 if file is older RTf
|
True if file is older RTf
|
||||||
0 if file is newer RTF
|
False if file is newer RTF
|
||||||
"""
|
"""
|
||||||
|
self.__initiate_values()
|
||||||
read_obj = open(self.__file, 'r')
|
|
||||||
line = 1
|
|
||||||
line_num = 0
|
line_num = 0
|
||||||
while line:
|
with open(self.__file, 'r') as read_obj:
|
||||||
line = read_obj.readline()
|
for line in read_obj:
|
||||||
line_num += 1
|
line_num += 1
|
||||||
self.__token_info = line[:16]
|
self.__token_info = line[:16]
|
||||||
if self.__token_info == 'mi<mk<body-close':
|
if self.__token_info == 'mi<mk<body-close':
|
||||||
return 0
|
return False
|
||||||
self.__ob_group = 0
|
if self.__token_info == 'ob<nu<open-brack':
|
||||||
if self.__token_info == 'ob<nu<open-brack':
|
self.__ob_group += 1
|
||||||
self.__ob_group += 1
|
self.__ob_count = line[-5:-1]
|
||||||
self.__ob_count = line[-5:-1]
|
if self.__token_info == 'cb<nu<clos-brack':
|
||||||
if self.__token_info == 'cb<nu<clos-brack':
|
self.__ob_group -= 1
|
||||||
self.__ob_group -= 1
|
self.__cb_count = line[-5:-1]
|
||||||
self.__cb_count = line[-5:-1]
|
self.__inline_info = line[6:16]
|
||||||
self.__inline_info = line[6:16]
|
if self.__state == 'after_body':
|
||||||
if self.__state == 'after_body':
|
return False
|
||||||
return 0
|
action = self.__action_dict.get(self.__state)
|
||||||
action = self.__action_dict.get(self.__state)
|
if action is None:
|
||||||
if not action:
|
try:
|
||||||
sys.stderr.write('No action for state!\n')
|
sys.stderr.write('No action for this state!\n')
|
||||||
result = action(line)
|
except:
|
||||||
if result == 'new_rtf':
|
pass
|
||||||
return 0
|
result = action(line)
|
||||||
elif result == 'old_rtf':
|
if result == 'new_rtf':
|
||||||
return 1
|
return False
|
||||||
self.__previous_token = line[6:16]
|
elif result == 'old_rtf':
|
||||||
return 0
|
if self.__run_level > 3:
|
||||||
|
sys.stderr.write(
|
||||||
|
'Old rtf construction %s (bracket %s, line %s)\n'
|
||||||
|
% (self.__inline_info, str(self.__ob_group), line_num)
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
self.__previous_token = line[6:16]
|
||||||
|
return False
|
||||||
|
@ -10,7 +10,9 @@
|
|||||||
# #
|
# #
|
||||||
# #
|
# #
|
||||||
#########################################################################
|
#########################################################################
|
||||||
import sys, os, codecs
|
import sys, os
|
||||||
|
# , codecs
|
||||||
|
|
||||||
class Output:
|
class Output:
|
||||||
"""
|
"""
|
||||||
Output file
|
Output file
|
||||||
@ -19,7 +21,8 @@ class Output:
|
|||||||
file,
|
file,
|
||||||
orig_file,
|
orig_file,
|
||||||
output_dir = None,
|
output_dir = None,
|
||||||
out_file = None
|
out_file = None,
|
||||||
|
no_ask = True
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Required:
|
Required:
|
||||||
@ -33,8 +36,9 @@ class Output:
|
|||||||
self.__file = file
|
self.__file = file
|
||||||
self.__orig_file = orig_file
|
self.__orig_file = orig_file
|
||||||
self.__output_dir = output_dir
|
self.__output_dir = output_dir
|
||||||
self.__no_ask = 1
|
self.__no_ask = no_ask
|
||||||
self.__out_file = out_file
|
self.__out_file = out_file
|
||||||
|
|
||||||
def output(self):
|
def output(self):
|
||||||
"""
|
"""
|
||||||
Required:
|
Required:
|
||||||
@ -45,13 +49,14 @@ class Output:
|
|||||||
output the line to the screen if no output file given. Otherwise, output to
|
output the line to the screen if no output file given. Otherwise, output to
|
||||||
the file.
|
the file.
|
||||||
"""
|
"""
|
||||||
# self.__output_xml(self.__file, self.__out_file)
|
|
||||||
if self.__output_dir:
|
if self.__output_dir:
|
||||||
self.__output_to_dir_func()
|
self.__output_to_dir_func()
|
||||||
elif self.__out_file:
|
elif self.__out_file:
|
||||||
self.__output_xml(self.__file, self.__out_file)
|
self.__output_to_file_func()
|
||||||
|
# self.__output_xml(self.__file, self.__out_file)
|
||||||
else:
|
else:
|
||||||
self.__output_to_standard_func()
|
self.__output_to_standard_func()
|
||||||
|
|
||||||
def __output_to_dir_func(self):
|
def __output_to_dir_func(self):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
@ -64,32 +69,25 @@ class Output:
|
|||||||
"""
|
"""
|
||||||
base_name = os.path.basename(self.__orig_file)
|
base_name = os.path.basename(self.__orig_file)
|
||||||
base_name, ext = os.path.splitext(base_name)
|
base_name, ext = os.path.splitext(base_name)
|
||||||
output_file = '%s.xml' % base_name
|
output_file = os.path.join(self.__output_dir, '%s.xml' % base_name)
|
||||||
output_file = os.path.join(self.__output_dir, output_file)
|
|
||||||
# change if user wants to output to a specific file
|
# change if user wants to output to a specific file
|
||||||
if self.__out_file:
|
if self.__out_file:
|
||||||
output_file = os.path.join(self.__output_dir, self.__out_file)
|
output_file = os.path.join(self.__output_dir, self.__out_file)
|
||||||
user_response = 'o'
|
user_response = 'o'
|
||||||
if os.path.isfile(output_file):
|
if os.path.isfile(output_file) and not self.__no_ask:
|
||||||
if self.__no_ask:
|
msg = 'Do you want to overwrite %s?\n' % output_file
|
||||||
user_response = 'o'
|
msg += ('Type "o" to overwrite.\n'
|
||||||
else:
|
'Type any other key to print to standard output.\n')
|
||||||
msg = 'Do you want to over-write %s?\n' % output_file
|
sys.stderr.write(msg)
|
||||||
msg += 'Type "o" to over-write.\n'
|
user_response = raw_input()
|
||||||
msg += 'Type any other key to print to standard output.\n'
|
|
||||||
sys.stderr.write(msg)
|
|
||||||
user_response = raw_input()
|
|
||||||
if user_response == 'o':
|
if user_response == 'o':
|
||||||
read_obj = open(self.__file, 'r')
|
with open(self.__file, 'r') as read_obj:
|
||||||
write_obj = open(output_file, 'w')
|
with open(self.output_file, 'w') as write_obj:
|
||||||
line = 1
|
for line in read_obj:
|
||||||
while line:
|
write_obj.write(line)
|
||||||
line = read_obj.readline()
|
|
||||||
write_obj.write(line)
|
|
||||||
read_obj.close()
|
|
||||||
write_obj.close()
|
|
||||||
else:
|
else:
|
||||||
self.__output_to_standard_func()
|
self.__output_to_standard_func()
|
||||||
|
|
||||||
def __output_to_file_func(self):
|
def __output_to_file_func(self):
|
||||||
"""
|
"""
|
||||||
Required:
|
Required:
|
||||||
@ -99,14 +97,11 @@ class Output:
|
|||||||
Logic:
|
Logic:
|
||||||
read one line at a time. Output to standard
|
read one line at a time. Output to standard
|
||||||
"""
|
"""
|
||||||
read_obj = open(self.__file, 'r')
|
with open(self.__file, 'r') as read_obj:
|
||||||
write_obj = open(self.__out_file, 'w')
|
with open(self.__out_file, 'w') as write_obj:
|
||||||
line = 1
|
for line in read_obj:
|
||||||
while line:
|
write_obj.write(line)
|
||||||
line = read_obj.readline()
|
|
||||||
write_obj.write(line)
|
|
||||||
read_obj.close()
|
|
||||||
write_obj.close()
|
|
||||||
def __output_to_standard_func(self):
|
def __output_to_standard_func(self):
|
||||||
"""
|
"""
|
||||||
Required:
|
Required:
|
||||||
@ -116,26 +111,24 @@ class Output:
|
|||||||
Logic:
|
Logic:
|
||||||
read one line at a time. Output to standard
|
read one line at a time. Output to standard
|
||||||
"""
|
"""
|
||||||
read_obj = open(self.__file, 'r')
|
with open(self.__file, 'r') as read_obj:
|
||||||
line = 1
|
for line in read_obj:
|
||||||
while line:
|
sys.stdout.write(line)
|
||||||
line = read_obj.readline()
|
|
||||||
sys.stdout.write(line)
|
# def __output_xml(self, in_file, out_file):
|
||||||
read_obj.close()
|
# """
|
||||||
def __output_xml(self, in_file, out_file):
|
# output the ill-formed xml file
|
||||||
"""
|
# """
|
||||||
output the ill-formed xml file
|
# (utf8_encode, utf8_decode, utf8_reader, utf8_writer) = codecs.lookup("utf-8")
|
||||||
"""
|
# write_obj = utf8_writer(open(out_file, 'w'))
|
||||||
(utf8_encode, utf8_decode, utf8_reader, utf8_writer) = codecs.lookup("utf-8")
|
# write_obj = open(out_file, 'w')
|
||||||
write_obj = utf8_writer(open(out_file, 'w'))
|
# read_obj = utf8_writer(open(in_file, 'r'))
|
||||||
write_obj = open(out_file, 'w')
|
# read_obj = open(in_file, 'r')
|
||||||
read_obj = utf8_writer(open(in_file, 'r'))
|
# line = 1
|
||||||
read_obj = open(in_file, 'r')
|
# while line:
|
||||||
line = 1
|
# line = read_obj.readline()
|
||||||
while line:
|
# if isinstance(line, type(u"")):
|
||||||
line = read_obj.readline()
|
# line = line.encode("utf-8")
|
||||||
if isinstance(line, type(u"")):
|
# write_obj.write(line)
|
||||||
line = line.encode("utf-8")
|
# read_obj.close()
|
||||||
write_obj.write(line)
|
# write_obj.close()
|
||||||
read_obj.close()
|
|
||||||
write_obj.close()
|
|
||||||
|
@ -11,31 +11,32 @@
|
|||||||
# #
|
# #
|
||||||
#########################################################################
|
#########################################################################
|
||||||
import sys, os
|
import sys, os
|
||||||
|
|
||||||
from calibre.ebooks.rtf2xml import copy
|
from calibre.ebooks.rtf2xml import copy
|
||||||
from calibre.ptempfile import better_mktemp
|
from calibre.ptempfile import better_mktemp
|
||||||
|
|
||||||
class Paragraphs:
|
class Paragraphs:
|
||||||
"""
|
"""
|
||||||
=================
|
=================
|
||||||
Purpose
|
Purpose
|
||||||
=================
|
=================
|
||||||
Write paragraph tags for a tokenized file. (This module won't be any use to use
|
Write paragraph tags for a tokenized file. (This module won't be any use to use
|
||||||
to you unless you use it as part of the other modules.)
|
to you unless you use it as part of the other modules.)
|
||||||
-------------
|
-------------
|
||||||
Method
|
Method
|
||||||
-------------
|
-------------
|
||||||
RTF does not tell you when a paragraph begins. It only tells you when the
|
RTF does not tell you when a paragraph begins. It only tells you when the
|
||||||
paragraph ends.
|
paragraph ends.
|
||||||
In order to make paragraphs out of this limited info, the parser starts in the
|
In order to make paragraphs out of this limited info, the parser starts in the
|
||||||
body of the documents and assumes it is not in a paragraph. It looks for clues
|
body of the documents and assumes it is not in a paragraph. It looks for clues
|
||||||
to begin a paragraph. Text starts a paragraph; so does an inline field or
|
to begin a paragraph. Text starts a paragraph; so does an inline field or
|
||||||
list-text. If an end of paragraph marker (\par) is found, then this indicates
|
list-text. If an end of paragraph marker (\par) is found, then this indicates
|
||||||
a blank paragraph.
|
a blank paragraph.
|
||||||
Once a paragraph is found, the state changes to 'paragraph.' In this state,
|
Once a paragraph is found, the state changes to 'paragraph.' In this state,
|
||||||
clues are looked to for the end of a paragraph. The end of a paragraph marker
|
clues are looked to for the end of a paragraph. The end of a paragraph marker
|
||||||
(\par) marks the end of a paragraph. So does the end of a footnote or heading;
|
(\par) marks the end of a paragraph. So does the end of a footnote or heading;
|
||||||
a paragraph definintion; the end of a field-block; and the beginning of a
|
a paragraph definition; the end of a field-block; and the beginning of a
|
||||||
section. (How about the end of a section or the end of a field-block?)
|
section. (How about the end of a section or the end of a field-block?)
|
||||||
"""
|
"""
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
in_file,
|
in_file,
|
||||||
@ -60,6 +61,7 @@ section. (How about the end of a section or the end of a field-block?)
|
|||||||
self.__write_empty_para = write_empty_para
|
self.__write_empty_para = write_empty_para
|
||||||
self.__run_level = run_level
|
self.__run_level = run_level
|
||||||
self.__write_to = better_mktemp()
|
self.__write_to = better_mktemp()
|
||||||
|
|
||||||
def __initiate_values(self):
|
def __initiate_values(self):
|
||||||
"""
|
"""
|
||||||
Initiate all values.
|
Initiate all values.
|
||||||
@ -77,7 +79,7 @@ section. (How about the end of a section or the end of a field-block?)
|
|||||||
self.__paragraph_dict = {
|
self.__paragraph_dict = {
|
||||||
'cw<pf<par-end___' : self.__close_para_func, # end of paragraph
|
'cw<pf<par-end___' : self.__close_para_func, # end of paragraph
|
||||||
'mi<mk<headi_-end' : self.__close_para_func, # end of header or footer
|
'mi<mk<headi_-end' : self.__close_para_func, # end of header or footer
|
||||||
##'cw<pf<par-def___' : self.__close_para_func, # paragraph definition
|
## 'cw<pf<par-def___' : self.__close_para_func, # paragraph definition
|
||||||
# 'mi<mk<fld-bk-end' : self.__close_para_func, # end of field-block
|
# 'mi<mk<fld-bk-end' : self.__close_para_func, # end of field-block
|
||||||
'mi<mk<fldbk-end_' : self.__close_para_func, # end of field-block
|
'mi<mk<fldbk-end_' : self.__close_para_func, # end of field-block
|
||||||
'mi<mk<body-close' : self.__close_para_func, # end of body
|
'mi<mk<body-close' : self.__close_para_func, # end of body
|
||||||
@ -99,6 +101,7 @@ section. (How about the end of a section or the end of a field-block?)
|
|||||||
'mi<mk<pict-start' : self.__start_para_func,
|
'mi<mk<pict-start' : self.__start_para_func,
|
||||||
'cw<pf<page-break' : self.__empty_pgbk_func, # page break
|
'cw<pf<page-break' : self.__empty_pgbk_func, # page break
|
||||||
}
|
}
|
||||||
|
|
||||||
def __before_body_func(self, line):
|
def __before_body_func(self, line):
|
||||||
"""
|
"""
|
||||||
Required:
|
Required:
|
||||||
@ -112,6 +115,7 @@ section. (How about the end of a section or the end of a field-block?)
|
|||||||
if self.__token_info == 'mi<mk<body-open_':
|
if self.__token_info == 'mi<mk<body-open_':
|
||||||
self.__state = 'not_paragraph'
|
self.__state = 'not_paragraph'
|
||||||
self.__write_obj.write(line)
|
self.__write_obj.write(line)
|
||||||
|
|
||||||
def __not_paragraph_func(self, line):
|
def __not_paragraph_func(self, line):
|
||||||
"""
|
"""
|
||||||
Required:
|
Required:
|
||||||
@ -127,6 +131,7 @@ section. (How about the end of a section or the end of a field-block?)
|
|||||||
if action:
|
if action:
|
||||||
action(line)
|
action(line)
|
||||||
self.__write_obj.write(line)
|
self.__write_obj.write(line)
|
||||||
|
|
||||||
def __paragraph_func(self, line):
|
def __paragraph_func(self, line):
|
||||||
"""
|
"""
|
||||||
Required:
|
Required:
|
||||||
@ -144,6 +149,7 @@ section. (How about the end of a section or the end of a field-block?)
|
|||||||
action(line)
|
action(line)
|
||||||
else:
|
else:
|
||||||
self.__write_obj.write(line)
|
self.__write_obj.write(line)
|
||||||
|
|
||||||
def __start_para_func(self, line):
|
def __start_para_func(self, line):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
@ -160,6 +166,7 @@ section. (How about the end of a section or the end of a field-block?)
|
|||||||
)
|
)
|
||||||
self.__write_obj.write(self.__start2_marker)
|
self.__write_obj.write(self.__start2_marker)
|
||||||
self.__state = 'paragraph'
|
self.__state = 'paragraph'
|
||||||
|
|
||||||
def __empty_para_func(self, line):
|
def __empty_para_func(self, line):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
@ -176,6 +183,7 @@ section. (How about the end of a section or the end of a field-block?)
|
|||||||
'mi<tg<empty_____<para\n'
|
'mi<tg<empty_____<para\n'
|
||||||
)
|
)
|
||||||
self.__write_obj.write(self.__end_marker) # marker for later parsing
|
self.__write_obj.write(self.__end_marker) # marker for later parsing
|
||||||
|
|
||||||
def __empty_pgbk_func(self, line):
|
def __empty_pgbk_func(self, line):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
@ -188,6 +196,7 @@ section. (How about the end of a section or the end of a field-block?)
|
|||||||
self.__write_obj.write(
|
self.__write_obj.write(
|
||||||
'mi<tg<empty_____<page-break\n'
|
'mi<tg<empty_____<page-break\n'
|
||||||
)
|
)
|
||||||
|
|
||||||
def __close_para_func(self, line):
|
def __close_para_func(self, line):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
@ -205,6 +214,7 @@ section. (How about the end of a section or the end of a field-block?)
|
|||||||
self.__write_obj.write(self.__end_marker) # marker for later parser
|
self.__write_obj.write(self.__end_marker) # marker for later parser
|
||||||
self.__write_obj.write(line)
|
self.__write_obj.write(line)
|
||||||
self.__state = 'not_paragraph'
|
self.__state = 'not_paragraph'
|
||||||
|
|
||||||
def __bogus_para__def_func(self, line):
|
def __bogus_para__def_func(self, line):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
@ -215,6 +225,7 @@ section. (How about the end of a section or the end of a field-block?)
|
|||||||
if a \pard occurs in a paragraph, I want to ignore it. (I believe)
|
if a \pard occurs in a paragraph, I want to ignore it. (I believe)
|
||||||
"""
|
"""
|
||||||
self.__write_obj.write('mi<mk<bogus-pard\n')
|
self.__write_obj.write('mi<mk<bogus-pard\n')
|
||||||
|
|
||||||
def make_paragraphs(self):
|
def make_paragraphs(self):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
@ -229,20 +240,18 @@ section. (How about the end of a section or the end of a field-block?)
|
|||||||
only other state is 'paragraph'.
|
only other state is 'paragraph'.
|
||||||
"""
|
"""
|
||||||
self.__initiate_values()
|
self.__initiate_values()
|
||||||
read_obj = open(self.__file, 'r')
|
with open(self.__file, 'r') as read_obj:
|
||||||
self.__write_obj = open(self.__write_to, 'w')
|
with open(self.__write_to, 'w') as self.__write_obj:
|
||||||
line_to_read = 1
|
for line in read_obj:
|
||||||
while line_to_read:
|
self.__token_info = line[:16]
|
||||||
line_to_read = read_obj.readline()
|
action = self.__state_dict.get(self.__state)
|
||||||
line = line_to_read
|
if action is None:
|
||||||
self.__token_info = line[:16]
|
try:
|
||||||
action = self.__state_dict.get(self.__state)
|
sys.stderr.write('no matching state in module paragraphs.py\n')
|
||||||
if action == None:
|
sys.stderr.write(self.__state + '\n')
|
||||||
sys.stderr.write('no no matching state in module sections.py\n')
|
except:
|
||||||
sys.stderr.write(self.__state + '\n')
|
pass
|
||||||
action(line)
|
action(line)
|
||||||
read_obj.close()
|
|
||||||
self.__write_obj.close()
|
|
||||||
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
||||||
if self.__copy:
|
if self.__copy:
|
||||||
copy_obj.copy_file(self.__write_to, "paragraphs.data")
|
copy_obj.copy_file(self.__write_to, "paragraphs.data")
|
||||||
|
@ -11,16 +11,24 @@
|
|||||||
# #
|
# #
|
||||||
#########################################################################
|
#########################################################################
|
||||||
import sys,os
|
import sys,os
|
||||||
|
|
||||||
from calibre.ebooks.rtf2xml import copy
|
from calibre.ebooks.rtf2xml import copy
|
||||||
|
|
||||||
class Preamble:
|
class Preamble:
|
||||||
"""
|
"""
|
||||||
Fix the reamaing parts of the preamble. This module does very little. It
|
Fix the reamaing parts of the preamble. This module does very little. It
|
||||||
makes sure that no text gets put in the revision of list table. In the
|
makes sure that no text gets put in the revision of list table. In the
|
||||||
future, when I understand how to interprett he revision table and list
|
future, when I understand how to interpret the revision table and list
|
||||||
table, I will make these methods more functional.
|
table, I will make these methods more functional.
|
||||||
"""
|
"""
|
||||||
def __init__(self, file, bug_handler, platform, default_font, code_page,
|
def __init__(self, file,
|
||||||
copy=None, temp_dir=None):
|
bug_handler,
|
||||||
|
platform,
|
||||||
|
default_font,
|
||||||
|
code_page,
|
||||||
|
copy=None,
|
||||||
|
temp_dir=None,
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
Required:
|
Required:
|
||||||
file--file to parse
|
file--file to parse
|
||||||
@ -44,6 +52,7 @@ class Preamble:
|
|||||||
self.__write_to = os.path.join(temp_dir,"info_table_info.data")
|
self.__write_to = os.path.join(temp_dir,"info_table_info.data")
|
||||||
else:
|
else:
|
||||||
self.__write_to = "info_table_info.data"
|
self.__write_to = "info_table_info.data"
|
||||||
|
|
||||||
def __initiate_values(self):
|
def __initiate_values(self):
|
||||||
"""
|
"""
|
||||||
Initiate all values.
|
Initiate all values.
|
||||||
@ -62,12 +71,14 @@ class Preamble:
|
|||||||
'mi<mk<revtbl-beg' : self.__found_revision_table_func,
|
'mi<mk<revtbl-beg' : self.__found_revision_table_func,
|
||||||
'mi<mk<body-open_' : self.__found_body_func,
|
'mi<mk<body-open_' : self.__found_body_func,
|
||||||
}
|
}
|
||||||
|
|
||||||
def __default_func(self, line):
|
def __default_func(self, line):
|
||||||
action = self.__default_dict.get(self.__token_info)
|
action = self.__default_dict.get(self.__token_info)
|
||||||
if action:
|
if action:
|
||||||
action(line)
|
action(line)
|
||||||
else:
|
else:
|
||||||
self.__write_obj.write(line)
|
self.__write_obj.write(line)
|
||||||
|
|
||||||
def __found_rtf_head_func(self, line):
|
def __found_rtf_head_func(self, line):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
@ -84,8 +95,10 @@ class Preamble:
|
|||||||
'<platform>%s\n' % (self.__default_font, self.__code_page,
|
'<platform>%s\n' % (self.__default_font, self.__code_page,
|
||||||
self.__platform)
|
self.__platform)
|
||||||
)
|
)
|
||||||
|
|
||||||
def __found_list_table_func(self, line):
|
def __found_list_table_func(self, line):
|
||||||
self.__state = 'list_table'
|
self.__state = 'list_table'
|
||||||
|
|
||||||
def __list_table_func(self, line):
|
def __list_table_func(self, line):
|
||||||
if self.__token_info == 'mi<mk<listabend_':
|
if self.__token_info == 'mi<mk<listabend_':
|
||||||
self.__state = 'default'
|
self.__state = 'default'
|
||||||
@ -93,8 +106,10 @@ class Preamble:
|
|||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
self.__write_obj.write(line)
|
self.__write_obj.write(line)
|
||||||
|
|
||||||
def __found_revision_table_func(self, line):
|
def __found_revision_table_func(self, line):
|
||||||
self.__state = 'revision'
|
self.__state = 'revision'
|
||||||
|
|
||||||
def __revision_table_func(self, line):
|
def __revision_table_func(self, line):
|
||||||
if self.__token_info == 'mi<mk<revtbl-end':
|
if self.__token_info == 'mi<mk<revtbl-end':
|
||||||
self.__state = 'default'
|
self.__state = 'default'
|
||||||
@ -102,11 +117,14 @@ class Preamble:
|
|||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
self.__write_obj.write(line)
|
self.__write_obj.write(line)
|
||||||
|
|
||||||
def __found_body_func(self, line):
|
def __found_body_func(self, line):
|
||||||
self.__state = 'body'
|
self.__state = 'body'
|
||||||
self.__write_obj.write(line)
|
self.__write_obj.write(line)
|
||||||
|
|
||||||
def __body_func(self, line):
|
def __body_func(self, line):
|
||||||
self.__write_obj.write(line)
|
self.__write_obj.write(line)
|
||||||
|
|
||||||
def fix_preamble(self):
|
def fix_preamble(self):
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
@ -119,20 +137,15 @@ class Preamble:
|
|||||||
the list table.
|
the list table.
|
||||||
"""
|
"""
|
||||||
self.__initiate_values()
|
self.__initiate_values()
|
||||||
read_obj = open(self.__file, 'r')
|
with open(self.__file, 'r') as read_obj:
|
||||||
self.__write_obj = open(self.__write_to, 'w')
|
with open(self.__write_to, 'w') as self.__write_obj:
|
||||||
line_to_read = 1
|
for line in read_obj:
|
||||||
while line_to_read:
|
self.__token_info = line[:16]
|
||||||
line_to_read = read_obj.readline()
|
action = self.__state_dict.get(self.__state)
|
||||||
line = line_to_read
|
if action is None:
|
||||||
self.__token_info = line[:16]
|
sys.stderr.write(
|
||||||
action = self.__state_dict.get(self.__state)
|
'no matching state in module preamble_rest.py\n' + self.__state + '\n')
|
||||||
if action == None:
|
action(line)
|
||||||
sys.stderr.write('no no matching state in module preamble_rest.py\n')
|
|
||||||
sys.stderr.write(self.__state + '\n')
|
|
||||||
action(line)
|
|
||||||
read_obj.close()
|
|
||||||
self.__write_obj.close()
|
|
||||||
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
||||||
if self.__copy:
|
if self.__copy:
|
||||||
copy_obj.copy_file(self.__write_to, "preamble_div.data")
|
copy_obj.copy_file(self.__write_to, "preamble_div.data")
|
||||||
|
@ -11,43 +11,44 @@
|
|||||||
# #
|
# #
|
||||||
#########################################################################
|
#########################################################################
|
||||||
import sys, os
|
import sys, os
|
||||||
|
|
||||||
from calibre.ebooks.rtf2xml import copy
|
from calibre.ebooks.rtf2xml import copy
|
||||||
from calibre.ptempfile import better_mktemp
|
from calibre.ptempfile import better_mktemp
|
||||||
|
|
||||||
class Sections:
|
class Sections:
|
||||||
"""
|
"""
|
||||||
=================
|
=================
|
||||||
Purpose
|
Purpose
|
||||||
=================
|
=================
|
||||||
Write section tags for a tokenized file. (This module won't be any use to use
|
Write section tags for a tokenized file. (This module won't be any use to use
|
||||||
to you unless you use it as part of the other modules.)
|
to you unless you use it as part of the other modules.)
|
||||||
---------------
|
---------------
|
||||||
logic
|
logic
|
||||||
---------------
|
---------------
|
||||||
The tags for the first section breaks have already been written.
|
The tags for the first section breaks have already been written.
|
||||||
RTF stores section breaks with the \sect tag. Each time this tag is
|
RTF stores section breaks with the \sect tag. Each time this tag is
|
||||||
encountered, add one to the counter.
|
encountered, add one to the counter.
|
||||||
When I encounter the \sectd tag, I want to collect all the appropriate tokens
|
When I encounter the \sectd tag, I want to collect all the appropriate tokens
|
||||||
that describe the section. When I reach a \pard, I know I an stop collecting
|
that describe the section. When I reach a \pard, I know I an stop collecting
|
||||||
tokens and write the section tags.
|
tokens and write the section tags.
|
||||||
The exception to this method occurs when sections occur in field blocks, such
|
The exception to this method occurs when sections occur in field blocks, such
|
||||||
as the index. Normally, two section break occur within the index and other
|
as the index. Normally, two section break occur within the index and other
|
||||||
field-blocks. (If less or more section breaks occurr, this code may not work.)
|
field-blocks. (If less or more section breaks occurr, this code may not work.)
|
||||||
I want the sections to occurr outside of the index. That is, the index
|
I want the sections to occur outside of the index. That is, the index
|
||||||
should be nested inside one section tag. After the index is complete, a new
|
should be nested inside one section tag. After the index is complete, a new
|
||||||
section should begin.
|
section should begin.
|
||||||
In order to write the sections outside of the field blocks, I have to store
|
In order to write the sections outside of the field blocks, I have to store
|
||||||
all of the field block as a string. When I ecounter the \sect tag, add one to
|
all of the field block as a string. When I ecounter the \sect tag, add one to
|
||||||
the section counter, but store this number in a list. Likewise, store the
|
the section counter, but store this number in a list. Likewise, store the
|
||||||
information describing the section in another list.
|
information describing the section in another list.
|
||||||
When I reach the end of the field block, choose the first item from the
|
When I reach the end of the field block, choose the first item from the
|
||||||
numbered list as the section number. Choose the first item in the description
|
numbered list as the section number. Choose the first item in the description
|
||||||
list as the values and attributes of the section. Enclose the field string
|
list as the values and attributes of the section. Enclose the field string
|
||||||
between the section tags.
|
between the section tags.
|
||||||
Start a new section outside the field-block strings. Use the second number in
|
Start a new section outside the field-block strings. Use the second number in
|
||||||
the list; use the second item in the description list.
|
the list; use the second item in the description list.
|
||||||
CHANGE (2004-04-26) No longer write sections that occurr in field-blocks.
|
CHANGE (2004-04-26) No longer write sections that occurr in field-blocks.
|
||||||
Instead, ingore all section information in a field-block.
|
Instead, ingore all section information in a field-block.
|
||||||
"""
|
"""
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
in_file,
|
in_file,
|
||||||
|
@ -137,8 +137,9 @@ def _config(): # {{{
|
|||||||
c.add_opt('LRF_ebook_viewer_options', default=None,
|
c.add_opt('LRF_ebook_viewer_options', default=None,
|
||||||
help=_('Options for the LRF ebook viewer'))
|
help=_('Options for the LRF ebook viewer'))
|
||||||
c.add_opt('internally_viewed_formats', default=['LRF', 'EPUB', 'LIT',
|
c.add_opt('internally_viewed_formats', default=['LRF', 'EPUB', 'LIT',
|
||||||
'MOBI', 'PRC', 'AZW', 'HTML', 'FB2', 'PDB', 'RB', 'SNB', 'HTMLZ'],
|
'MOBI', 'PRC', 'POBI', 'AZW', 'AZW3', 'HTML', 'FB2', 'PDB', 'RB',
|
||||||
help=_('Formats that are viewed using the internal viewer'))
|
'SNB', 'HTMLZ'], help=_(
|
||||||
|
'Formats that are viewed using the internal viewer'))
|
||||||
c.add_opt('column_map', default=ALL_COLUMNS,
|
c.add_opt('column_map', default=ALL_COLUMNS,
|
||||||
help=_('Columns to be displayed in the book list'))
|
help=_('Columns to be displayed in the book list'))
|
||||||
c.add_opt('autolaunch_server', default=False, help=_('Automatically launch content server on application startup'))
|
c.add_opt('autolaunch_server', default=False, help=_('Automatically launch content server on application startup'))
|
||||||
|
@ -10,7 +10,7 @@ from functools import partial
|
|||||||
|
|
||||||
from PyQt4.Qt import (QMenu, Qt, QInputDialog, QToolButton, QDialog,
|
from PyQt4.Qt import (QMenu, Qt, QInputDialog, QToolButton, QDialog,
|
||||||
QDialogButtonBox, QGridLayout, QLabel, QLineEdit, QIcon, QSize,
|
QDialogButtonBox, QGridLayout, QLabel, QLineEdit, QIcon, QSize,
|
||||||
QCoreApplication)
|
QCoreApplication, pyqtSignal)
|
||||||
|
|
||||||
from calibre import isbytestring, sanitize_file_name_unicode
|
from calibre import isbytestring, sanitize_file_name_unicode
|
||||||
from calibre.constants import filesystem_encoding, iswindows
|
from calibre.constants import filesystem_encoding, iswindows
|
||||||
@ -142,6 +142,7 @@ class ChooseLibraryAction(InterfaceAction):
|
|||||||
dont_add_to = frozenset(['context-menu-device'])
|
dont_add_to = frozenset(['context-menu-device'])
|
||||||
action_add_menu = True
|
action_add_menu = True
|
||||||
action_menu_clone_qaction = _('Switch/create library...')
|
action_menu_clone_qaction = _('Switch/create library...')
|
||||||
|
restore_view_state = pyqtSignal(object)
|
||||||
|
|
||||||
def genesis(self):
|
def genesis(self):
|
||||||
self.base_text = _('%d books')
|
self.base_text = _('%d books')
|
||||||
@ -206,6 +207,17 @@ class ChooseLibraryAction(InterfaceAction):
|
|||||||
self.maintenance_menu.addAction(ac)
|
self.maintenance_menu.addAction(ac)
|
||||||
|
|
||||||
self.choose_menu.addMenu(self.maintenance_menu)
|
self.choose_menu.addMenu(self.maintenance_menu)
|
||||||
|
self.view_state_map = {}
|
||||||
|
self.restore_view_state.connect(self._restore_view_state,
|
||||||
|
type=Qt.QueuedConnection)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def preserve_state_on_switch(self):
|
||||||
|
ans = getattr(self, '_preserve_state_on_switch', None)
|
||||||
|
if ans is None:
|
||||||
|
self._preserve_state_on_switch = ans = \
|
||||||
|
self.gui.library_view.preserve_state(require_selected_ids=False)
|
||||||
|
return ans
|
||||||
|
|
||||||
def pick_random(self, *args):
|
def pick_random(self, *args):
|
||||||
self.gui.iactions['Pick Random Book'].pick_random()
|
self.gui.iactions['Pick Random Book'].pick_random()
|
||||||
@ -221,6 +233,13 @@ class ChooseLibraryAction(InterfaceAction):
|
|||||||
def library_changed(self, db):
|
def library_changed(self, db):
|
||||||
self.stats.library_used(db)
|
self.stats.library_used(db)
|
||||||
self.build_menus()
|
self.build_menus()
|
||||||
|
state = self.view_state_map.get(self.stats.canonicalize_path(
|
||||||
|
db.library_path), None)
|
||||||
|
if state is not None:
|
||||||
|
self.restore_view_state.emit(state)
|
||||||
|
|
||||||
|
def _restore_view_state(self, state):
|
||||||
|
self.preserve_state_on_switch.state = state
|
||||||
|
|
||||||
def initialization_complete(self):
|
def initialization_complete(self):
|
||||||
self.library_changed(self.gui.library_view.model().db)
|
self.library_changed(self.gui.library_view.model().db)
|
||||||
@ -401,8 +420,11 @@ class ChooseLibraryAction(InterfaceAction):
|
|||||||
def switch_requested(self, location):
|
def switch_requested(self, location):
|
||||||
if not self.change_library_allowed():
|
if not self.change_library_allowed():
|
||||||
return
|
return
|
||||||
|
db = self.gui.library_view.model().db
|
||||||
|
current_lib = self.stats.canonicalize_path(db.library_path)
|
||||||
|
self.view_state_map[current_lib] = self.preserve_state_on_switch.state
|
||||||
loc = location.replace('/', os.sep)
|
loc = location.replace('/', os.sep)
|
||||||
exists = self.gui.library_view.model().db.exists_at(loc)
|
exists = db.exists_at(loc)
|
||||||
if not exists:
|
if not exists:
|
||||||
d = MovedDialog(self.stats, location, self.gui)
|
d = MovedDialog(self.stats, location, self.gui)
|
||||||
ret = d.exec_()
|
ret = d.exec_()
|
||||||
|
@ -6,6 +6,7 @@ __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
|||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
from functools import partial
|
from functools import partial
|
||||||
|
from collections import Counter
|
||||||
|
|
||||||
from PyQt4.Qt import QObject, QTimer
|
from PyQt4.Qt import QObject, QTimer
|
||||||
|
|
||||||
@ -117,13 +118,14 @@ class DeleteAction(InterfaceAction):
|
|||||||
|
|
||||||
def _get_selected_formats(self, msg, ids):
|
def _get_selected_formats(self, msg, ids):
|
||||||
from calibre.gui2.dialogs.select_formats import SelectFormats
|
from calibre.gui2.dialogs.select_formats import SelectFormats
|
||||||
fmts = set([])
|
c = Counter()
|
||||||
db = self.gui.library_view.model().db
|
db = self.gui.library_view.model().db
|
||||||
for x in ids:
|
for x in ids:
|
||||||
fmts_ = db.formats(x, index_is_id=True, verify_formats=False)
|
fmts_ = db.formats(x, index_is_id=True, verify_formats=False)
|
||||||
if fmts_:
|
if fmts_:
|
||||||
fmts.update(frozenset([x.lower() for x in fmts_.split(',')]))
|
for x in frozenset([x.lower() for x in fmts_.split(',')]):
|
||||||
d = SelectFormats(list(sorted(fmts)), msg, parent=self.gui)
|
c[x] += 1
|
||||||
|
d = SelectFormats(c, msg, parent=self.gui)
|
||||||
if d.exec_() != d.Accepted:
|
if d.exec_() != d.Accepted:
|
||||||
return None
|
return None
|
||||||
return d.selected_formats
|
return d.selected_formats
|
||||||
|
@ -12,11 +12,11 @@ from PyQt4.Qt import (QDialog, QVBoxLayout, QHBoxLayout, QRadioButton, QFrame,
|
|||||||
|
|
||||||
from calibre import as_unicode
|
from calibre import as_unicode
|
||||||
from calibre.constants import isosx
|
from calibre.constants import isosx
|
||||||
from calibre.gui2 import error_dialog, question_dialog, open_local_file
|
from calibre.gui2 import error_dialog, question_dialog, open_local_file, gprefs
|
||||||
from calibre.gui2.actions import InterfaceAction
|
from calibre.gui2.actions import InterfaceAction
|
||||||
from calibre.ptempfile import (PersistentTemporaryDirectory,
|
from calibre.ptempfile import (PersistentTemporaryDirectory,
|
||||||
PersistentTemporaryFile)
|
PersistentTemporaryFile)
|
||||||
from calibre.utils.config import prefs
|
from calibre.utils.config import prefs, tweaks
|
||||||
|
|
||||||
class TweakBook(QDialog):
|
class TweakBook(QDialog):
|
||||||
|
|
||||||
@ -32,11 +32,16 @@ class TweakBook(QDialog):
|
|||||||
index_is_id=True))
|
index_is_id=True))
|
||||||
|
|
||||||
button = self.fmt_choice_buttons[0]
|
button = self.fmt_choice_buttons[0]
|
||||||
|
button_map = {unicode(x.text()):x for x in self.fmt_choice_buttons}
|
||||||
of = prefs['output_format'].upper()
|
of = prefs['output_format'].upper()
|
||||||
for x in self.fmt_choice_buttons:
|
df = tweaks.get('default_tweak_format', None)
|
||||||
if unicode(x.text()) == of:
|
lf = gprefs.get('last_tweak_format', None)
|
||||||
button = x
|
if df and df.lower() == 'remember' and lf in button_map:
|
||||||
break
|
button = button_map[lf]
|
||||||
|
elif df and df.upper() in button_map:
|
||||||
|
button = button_map[df.upper()]
|
||||||
|
elif of in button_map:
|
||||||
|
button = button_map[of]
|
||||||
button.setChecked(True)
|
button.setChecked(True)
|
||||||
|
|
||||||
self.init_state()
|
self.init_state()
|
||||||
@ -148,6 +153,8 @@ class TweakBook(QDialog):
|
|||||||
|
|
||||||
def explode(self):
|
def explode(self):
|
||||||
self.show_msg(_('Exploding, please wait...'))
|
self.show_msg(_('Exploding, please wait...'))
|
||||||
|
if len(self.fmt_choice_buttons) > 1:
|
||||||
|
gprefs.set('last_tweak_format', self.current_format.upper())
|
||||||
QTimer.singleShot(5, self.do_explode)
|
QTimer.singleShot(5, self.do_explode)
|
||||||
|
|
||||||
def ask_question(self, msg):
|
def ask_question(self, msg):
|
||||||
|
@ -161,8 +161,14 @@ class EditorWidget(QWebView): # {{{
|
|||||||
self.page().setContentEditable(True)
|
self.page().setContentEditable(True)
|
||||||
|
|
||||||
def clear_text(self, *args):
|
def clear_text(self, *args):
|
||||||
|
us = self.page().undoStack()
|
||||||
|
us.beginMacro('clear all text')
|
||||||
self.action_select_all.trigger()
|
self.action_select_all.trigger()
|
||||||
self.action_cut.trigger()
|
self.action_remove_format.trigger()
|
||||||
|
self.exec_command('delete')
|
||||||
|
us.endMacro()
|
||||||
|
self.set_font_style()
|
||||||
|
self.setFocus(Qt.OtherFocusReason)
|
||||||
|
|
||||||
def link_clicked(self, url):
|
def link_clicked(self, url):
|
||||||
open_url(url)
|
open_url(url)
|
||||||
@ -262,20 +268,22 @@ class EditorWidget(QWebView): # {{{
|
|||||||
|
|
||||||
def fset(self, val):
|
def fset(self, val):
|
||||||
self.setHtml(val)
|
self.setHtml(val)
|
||||||
fi = QFontInfo(QApplication.font(self))
|
self.set_font_style()
|
||||||
f = fi.pixelSize() + 1 + int(tweaks['change_book_details_font_size_by'])
|
|
||||||
fam = unicode(fi.family()).strip().replace('"', '')
|
|
||||||
if not fam:
|
|
||||||
fam = 'sans-serif'
|
|
||||||
style = 'font-size: %fpx; font-family:"%s",sans-serif;' % (f, fam)
|
|
||||||
|
|
||||||
# toList() is needed because PyQt on Debian is old/broken
|
|
||||||
for body in self.page().mainFrame().documentElement().findAll('body').toList():
|
|
||||||
body.setAttribute('style', style)
|
|
||||||
self.page().setContentEditable(True)
|
|
||||||
|
|
||||||
return property(fget=fget, fset=fset)
|
return property(fget=fget, fset=fset)
|
||||||
|
|
||||||
|
def set_font_style(self):
|
||||||
|
fi = QFontInfo(QApplication.font(self))
|
||||||
|
f = fi.pixelSize() + 1 + int(tweaks['change_book_details_font_size_by'])
|
||||||
|
fam = unicode(fi.family()).strip().replace('"', '')
|
||||||
|
if not fam:
|
||||||
|
fam = 'sans-serif'
|
||||||
|
style = 'font-size: %fpx; font-family:"%s",sans-serif;' % (f, fam)
|
||||||
|
|
||||||
|
# toList() is needed because PyQt on Debian is old/broken
|
||||||
|
for body in self.page().mainFrame().documentElement().findAll('body').toList():
|
||||||
|
body.setAttribute('style', style)
|
||||||
|
self.page().setContentEditable(True)
|
||||||
|
|
||||||
def keyPressEvent(self, ev):
|
def keyPressEvent(self, ev):
|
||||||
if ev.key() in (Qt.Key_Tab, Qt.Key_Escape, Qt.Key_Backtab):
|
if ev.key() in (Qt.Key_Tab, Qt.Key_Escape, Qt.Key_Backtab):
|
||||||
ev.ignore()
|
ev.ignore()
|
||||||
@ -627,4 +635,6 @@ if __name__ == '__main__':
|
|||||||
w = Editor()
|
w = Editor()
|
||||||
w.resize(800, 600)
|
w.resize(800, 600)
|
||||||
w.show()
|
w.show()
|
||||||
|
w.html = '<b>testing</b>'
|
||||||
|
app.exec_()
|
||||||
#print w.html
|
#print w.html
|
||||||
|
@ -126,7 +126,8 @@ class BulkConfig(Config):
|
|||||||
def setup_output_formats(self, db, preferred_output_format):
|
def setup_output_formats(self, db, preferred_output_format):
|
||||||
if preferred_output_format:
|
if preferred_output_format:
|
||||||
preferred_output_format = preferred_output_format.lower()
|
preferred_output_format = preferred_output_format.lower()
|
||||||
output_formats = sorted(available_output_formats())
|
output_formats = sorted(available_output_formats(),
|
||||||
|
key=lambda x:{'EPUB':'!A', 'MOBI':'!B'}.get(x.upper(), x))
|
||||||
output_formats.remove('oeb')
|
output_formats.remove('oeb')
|
||||||
preferred_output_format = preferred_output_format if \
|
preferred_output_format = preferred_output_format if \
|
||||||
preferred_output_format and preferred_output_format \
|
preferred_output_format and preferred_output_format \
|
||||||
|
@ -109,12 +109,18 @@
|
|||||||
</item>
|
</item>
|
||||||
<item row="0" column="1">
|
<item row="0" column="1">
|
||||||
<widget class="QDoubleSpinBox" name="opt_margin_left">
|
<widget class="QDoubleSpinBox" name="opt_margin_left">
|
||||||
|
<property name="specialValueText">
|
||||||
|
<string>No margin</string>
|
||||||
|
</property>
|
||||||
<property name="suffix">
|
<property name="suffix">
|
||||||
<string> pt</string>
|
<string> pt</string>
|
||||||
</property>
|
</property>
|
||||||
<property name="decimals">
|
<property name="decimals">
|
||||||
<number>1</number>
|
<number>1</number>
|
||||||
</property>
|
</property>
|
||||||
|
<property name="minimum">
|
||||||
|
<double>-1.000000000000000</double>
|
||||||
|
</property>
|
||||||
<property name="maximum">
|
<property name="maximum">
|
||||||
<double>200.000000000000000</double>
|
<double>200.000000000000000</double>
|
||||||
</property>
|
</property>
|
||||||
@ -132,12 +138,18 @@
|
|||||||
</item>
|
</item>
|
||||||
<item row="1" column="1">
|
<item row="1" column="1">
|
||||||
<widget class="QDoubleSpinBox" name="opt_margin_top">
|
<widget class="QDoubleSpinBox" name="opt_margin_top">
|
||||||
|
<property name="specialValueText">
|
||||||
|
<string>No margin</string>
|
||||||
|
</property>
|
||||||
<property name="suffix">
|
<property name="suffix">
|
||||||
<string> pt</string>
|
<string> pt</string>
|
||||||
</property>
|
</property>
|
||||||
<property name="decimals">
|
<property name="decimals">
|
||||||
<number>1</number>
|
<number>1</number>
|
||||||
</property>
|
</property>
|
||||||
|
<property name="minimum">
|
||||||
|
<double>-1.000000000000000</double>
|
||||||
|
</property>
|
||||||
<property name="maximum">
|
<property name="maximum">
|
||||||
<double>200.000000000000000</double>
|
<double>200.000000000000000</double>
|
||||||
</property>
|
</property>
|
||||||
@ -155,12 +167,18 @@
|
|||||||
</item>
|
</item>
|
||||||
<item row="2" column="1">
|
<item row="2" column="1">
|
||||||
<widget class="QDoubleSpinBox" name="opt_margin_right">
|
<widget class="QDoubleSpinBox" name="opt_margin_right">
|
||||||
|
<property name="specialValueText">
|
||||||
|
<string>No margin</string>
|
||||||
|
</property>
|
||||||
<property name="suffix">
|
<property name="suffix">
|
||||||
<string> pt</string>
|
<string> pt</string>
|
||||||
</property>
|
</property>
|
||||||
<property name="decimals">
|
<property name="decimals">
|
||||||
<number>1</number>
|
<number>1</number>
|
||||||
</property>
|
</property>
|
||||||
|
<property name="minimum">
|
||||||
|
<double>-1.000000000000000</double>
|
||||||
|
</property>
|
||||||
<property name="maximum">
|
<property name="maximum">
|
||||||
<double>200.000000000000000</double>
|
<double>200.000000000000000</double>
|
||||||
</property>
|
</property>
|
||||||
@ -178,12 +196,18 @@
|
|||||||
</item>
|
</item>
|
||||||
<item row="3" column="1">
|
<item row="3" column="1">
|
||||||
<widget class="QDoubleSpinBox" name="opt_margin_bottom">
|
<widget class="QDoubleSpinBox" name="opt_margin_bottom">
|
||||||
|
<property name="specialValueText">
|
||||||
|
<string>No margin</string>
|
||||||
|
</property>
|
||||||
<property name="suffix">
|
<property name="suffix">
|
||||||
<string> pt</string>
|
<string> pt</string>
|
||||||
</property>
|
</property>
|
||||||
<property name="decimals">
|
<property name="decimals">
|
||||||
<number>1</number>
|
<number>1</number>
|
||||||
</property>
|
</property>
|
||||||
|
<property name="minimum">
|
||||||
|
<double>-1.000000000000000</double>
|
||||||
|
</property>
|
||||||
<property name="maximum">
|
<property name="maximum">
|
||||||
<double>200.000000000000000</double>
|
<double>200.000000000000000</double>
|
||||||
</property>
|
</property>
|
||||||
|
@ -242,7 +242,8 @@ class Config(ResizableDialog, Ui_Dialog):
|
|||||||
preferred_output_format):
|
preferred_output_format):
|
||||||
if preferred_output_format:
|
if preferred_output_format:
|
||||||
preferred_output_format = preferred_output_format.lower()
|
preferred_output_format = preferred_output_format.lower()
|
||||||
output_formats = sorted(available_output_formats())
|
output_formats = sorted(available_output_formats(),
|
||||||
|
key=lambda x:{'EPUB':'!A', 'MOBI':'!B'}.get(x.upper(), x))
|
||||||
output_formats.remove('oeb')
|
output_formats.remove('oeb')
|
||||||
input_format, input_formats = get_input_format_for_book(db, book_id,
|
input_format, input_formats = get_input_format_for_book(db, book_id,
|
||||||
preferred_input_format)
|
preferred_input_format)
|
||||||
|
@ -349,7 +349,8 @@ class Text(Base):
|
|||||||
return d.exec_()
|
return d.exec_()
|
||||||
|
|
||||||
def edit(self):
|
def edit(self):
|
||||||
if self.getter() != self.initial_val:
|
if (self.getter() != self.initial_val and (self.getter() or
|
||||||
|
self.initial_val)):
|
||||||
d = self._save_dialog(self.parent, _('Values changed'),
|
d = self._save_dialog(self.parent, _('Values changed'),
|
||||||
_('You have changed the values. In order to use this '
|
_('You have changed the values. In order to use this '
|
||||||
'editor, you must either discard or apply these '
|
'editor, you must either discard or apply these '
|
||||||
|
@ -182,7 +182,8 @@ class SearchDialog(QDialog, Ui_Dialog):
|
|||||||
global box_values
|
global box_values
|
||||||
box_values = copy.deepcopy(self.box_last_values)
|
box_values = copy.deepcopy(self.box_last_values)
|
||||||
if general:
|
if general:
|
||||||
ans.append(unicode(self.general_combo.currentText()) + ':"' + general + '"')
|
ans.append(unicode(self.general_combo.currentText()) + ':"' +
|
||||||
|
self.mc + general + '"')
|
||||||
if ans:
|
if ans:
|
||||||
return ' and '.join(ans)
|
return ' and '.join(ans)
|
||||||
return ''
|
return ''
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user