mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
0.8.52
This commit is contained in:
commit
953ed2e1d9
@ -2,7 +2,6 @@
|
||||
.check-cache.pickle
|
||||
src/calibre/plugins
|
||||
resources/images.qrc
|
||||
resources/compiled_coffeescript.zip
|
||||
src/calibre/ebooks/oeb/display/test/*.js
|
||||
src/calibre/manual/.build/
|
||||
src/calibre/manual/cli/
|
||||
|
@ -19,8 +19,82 @@
|
||||
# new recipes:
|
||||
# - title:
|
||||
|
||||
- version: 0.8.52
|
||||
date: 2012-05-18
|
||||
|
||||
new features:
|
||||
- title: "EPUB Input: When setting the cover for a book that identifies its cover image, but not the html wrapper around the cover, try to detect and remove that wrapper automatically."
|
||||
tickets: [ 999959 ]
|
||||
|
||||
- title: "When deleting books of a specific format, show the number of books with each format available"
|
||||
|
||||
- title: "Linux install: No longer create MAN pages as all utilities have more comprehensive command line --help anyway"
|
||||
|
||||
- title: "Add a tweak Preferences->Tweaks to control the default choice of format for the Tweak Book feature"
|
||||
|
||||
- title: "Conversion: Allow setting negative page margins. A negative page margin means that calibre will not specify any page margin in the output document (for formats that support this)"
|
||||
|
||||
bug fixes:
|
||||
- title: "Tweak book: Fix handling of covers when tweaking KF8 books"
|
||||
|
||||
- title: "KF8 Output: Handle input documents with out of sequence ToC entries. Note that currently section jumping in the KF8 output produced by calibre for such files does not work."
|
||||
tickets: [1000493]
|
||||
|
||||
- title: "Edit metadata dialog: Fix the edit values button for custom tag-like columns showing a unneeded warning about changed values"
|
||||
|
||||
- title: "EPUB Output: Be a little more conservative when removing <form> tags. Only remove them if they have actual forms inside. "
|
||||
tickets: [ 1000384 ]
|
||||
|
||||
- title: "EPUB Input: Correctly update the Cover entry in the ToC even when the entry has a fragment reference. "
|
||||
tickets: [ 999973 ]
|
||||
|
||||
- title: "Update ImagMagick DLLs in all calibre binary builds to fix security vulnerabilities in ImageMagick"
|
||||
tickets: [ 999496 ]
|
||||
|
||||
- title: "Advanced search dialog: Fix equals and regex matching not being applied for custom column searches."
|
||||
tickets: [ 980221 ]
|
||||
|
||||
- title: "RTF Input: Handle old RTF files that have commands without braces."
|
||||
tickets: [ 994133 ]
|
||||
|
||||
- title: "Get Books: Diesel, fix results not showing when only a single match is found"
|
||||
|
||||
- title: "Get Books: Fix DRM status indicators for Kobo and Diesel stores. Fix smashwords not returning results."
|
||||
tickets: [ 993755 ]
|
||||
|
||||
- title: "Fix regression in 0.8.51 that broke viewing of LIT and some EPUB files"
|
||||
tickets: [998248, 998216]
|
||||
|
||||
improved recipes:
|
||||
- Clarin
|
||||
- Spiegel
|
||||
- Spiegel International
|
||||
- Montreal Gazette
|
||||
- Gosc Niedzelny
|
||||
- Ars Technica
|
||||
|
||||
new recipes:
|
||||
- title: "Army/Navy/Air force/Marine Times and News busters"
|
||||
author: jde
|
||||
|
||||
- title: "Ads of the World, Heavy Meta (Italian) and Juve La Stampa"
|
||||
author: faber1971
|
||||
|
||||
- title: "Revista Summa"
|
||||
author: Vakya
|
||||
|
||||
- title: "Strategic culture"
|
||||
author: Darko Miletic
|
||||
|
||||
- title: Stars and Stripes
|
||||
author: adoucette
|
||||
|
||||
- title: Nackdenkseiten
|
||||
author: jrda
|
||||
|
||||
|
||||
- version: 0.8.51
|
||||
date: 2012-06-11
|
||||
date: 2012-05-11
|
||||
|
||||
new features:
|
||||
- title: "When switching libraries preserve the position and selected books if you switch back to a previously opened library."
|
||||
|
26
recipes/ads_of_the_world.recipe
Normal file
26
recipes/ads_of_the_world.recipe
Normal file
@ -0,0 +1,26 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1336986047(BasicNewsRecipe):
|
||||
title = u'Ads of the World'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = False
|
||||
description = 'The best international advertising campaigns'
|
||||
language = 'en'
|
||||
__author__ = 'faber1971'
|
||||
|
||||
no_stylesheets = True
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':'primary'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='ul', attrs={'class':'links inline'})
|
||||
,dict(name='div', attrs={'class':'form-item'})
|
||||
,dict(name='div', attrs={'id':['options', 'comments']})
|
||||
,dict(name='ul', attrs={'id':'nodePager'})
|
||||
]
|
||||
|
||||
reverse_article_order = True
|
||||
masthead_url = 'http://bigcatgroup.co.uk/files/2011/01/05-ads-of-the-world.png'
|
||||
feeds = [(u'Ads of the world', u'http://feeds.feedburner.com/adsoftheworld-latest')]
|
43
recipes/air_force_times.recipe
Normal file
43
recipes/air_force_times.recipe
Normal file
@ -0,0 +1,43 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AirForceTimes(BasicNewsRecipe):
|
||||
title = 'Air Force Times'
|
||||
__author__ = 'jde'
|
||||
__date__ = '16 May 2012'
|
||||
__version__ = '1.0'
|
||||
description = 'News of the U.S. Air Force'
|
||||
language = 'en'
|
||||
publisher = 'AirForceTimes.com'
|
||||
category = 'news, U.S. Air Force'
|
||||
tags = 'news, U.S. Air Force'
|
||||
cover_url = 'http://www.airforcetimes.com/images/logo_airforcetimes_alert.jpg'
|
||||
masthead_url = 'http://www.airforcetimes.com/images/logo_airforcetimes_alert.jpg'
|
||||
oldest_article = 7 #days
|
||||
max_articles_per_feed = 25
|
||||
publication_type = 'newspaper'
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = None
|
||||
recursions = 0
|
||||
needs_subscription = False
|
||||
remove_javascript = True
|
||||
remove_empty_feeds = True
|
||||
auto_cleanup = True
|
||||
|
||||
|
||||
|
||||
feeds = [
|
||||
|
||||
('News', 'http://www.airforcetimes.com/rss_news.php'),
|
||||
('Benefits', 'http://www.airforcetimes.com/rss_benefits.php'),
|
||||
('Money', 'http://www.airforcetimes.com/rss_money.php'),
|
||||
('Careers & Education', 'http://www.airforcetimes.com/rss_careers.php'),
|
||||
('Community', 'http://www.airforcetimes.com/rss_community.php'),
|
||||
('Off Duty', 'http://www.airforcetimes.com/rss_off_duty.php'),
|
||||
('Entertainment', 'http://www.airforcetimes.com/rss_entertainment.php'),
|
||||
('Guard & Reserve', 'http://www.airforcetimes.com/rss_guard.php'),
|
||||
]
|
||||
|
||||
|
||||
|
||||
|
42
recipes/army_times.recipe
Normal file
42
recipes/army_times.recipe
Normal file
@ -0,0 +1,42 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class ArmyTimes(BasicNewsRecipe):
|
||||
title = 'Army Times'
|
||||
__author__ = 'jde'
|
||||
__date__ = '16 May 2012'
|
||||
__version__ = '1.0'
|
||||
description = 'News of the U.S. Army'
|
||||
language = 'en'
|
||||
publisher = 'ArmyTimes.com'
|
||||
category = 'news, U.S. Army'
|
||||
tags = 'news, U.S. Army'
|
||||
cover_url = 'http://www.armytimes.com/images/logo_armytimes_alert.jpg'
|
||||
masthead_url = 'http://www.armytimes.com/images/logo_armytimes_alert.jpg'
|
||||
oldest_article = 7 #days
|
||||
max_articles_per_feed = 25
|
||||
publication_type = 'newspaper'
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = None
|
||||
recursions = 0
|
||||
needs_subscription = False
|
||||
remove_javascript = True
|
||||
remove_empty_feeds = True
|
||||
auto_cleanup = True
|
||||
|
||||
|
||||
|
||||
feeds = [
|
||||
|
||||
('News', 'http://www.armytimes.com/rss_news.php'),
|
||||
('Benefits', 'http://www.armytimes.com/rss_benefits.php'),
|
||||
('Money', 'http://www.armytimes.com/rss_money.php'),
|
||||
('Careers & Education', 'http://www.armytimes.com/rss_careers.php'),
|
||||
('Community', 'http://www.armytimes.com/rss_community.php'),
|
||||
('Off Duty', 'http://www.armytimes.com/rss_off_duty.php'),
|
||||
('Entertainment', 'http://www.armytimes.com/rss_entertainment.php'),
|
||||
('Guard & Reserve', 'http://www.armytimes.com/rss_guard.php'),
|
||||
|
||||
]
|
||||
|
||||
|
||||
|
@ -1,33 +1,34 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2008-2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
arstechnica.com
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
|
||||
class ArsTechnica(BasicNewsRecipe):
|
||||
title = u'Ars Technica'
|
||||
language = 'en'
|
||||
__author__ = 'Darko Miletic, Sujata Raman, Alexis Rohou'
|
||||
description = 'The art of technology'
|
||||
publisher = 'Ars Technica'
|
||||
description = 'Ars Technica: Serving the technologist for 1.2 decades'
|
||||
publisher = 'Conde Nast Publications'
|
||||
category = 'news, IT, technology'
|
||||
oldest_article = 5
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
remove_empty_feeds = True
|
||||
publication_type = 'newsportal'
|
||||
extra_css = '''
|
||||
body {font-family: Arial,Helvetica,sans-serif}
|
||||
.title{text-align: left}
|
||||
body {font-family: Arial,sans-serif}
|
||||
.heading{font-family: "Times New Roman",serif}
|
||||
.byline{font-weight: bold; line-height: 1em; font-size: 0.625em; text-decoration: none}
|
||||
.news-item-figure-caption-text{font-size:small; font-style:italic}
|
||||
.news-item-figure-caption-byline{font-size:small; font-style:italic; font-weight:bold}
|
||||
img{display: block}
|
||||
.caption-text{font-size:small; font-style:italic}
|
||||
.caption-byline{font-size:small; font-style:italic; font-weight:bold}
|
||||
'''
|
||||
ignoreEtcArticles = True # Etc feed items can be ignored, as they're not real stories
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
@ -36,50 +37,38 @@ class ArsTechnica(BasicNewsRecipe):
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
|
||||
#preprocess_regexps = [
|
||||
# (re.compile(r'<div class="news-item-figure', re.DOTALL|re.IGNORECASE),lambda match: '<div class="news-item-figure"')
|
||||
# ,(re.compile(r'</title>.*?</head>', re.DOTALL|re.IGNORECASE),lambda match: '</title></head>')
|
||||
# ]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':['story','etc-story']})]
|
||||
keep_only_tags = [
|
||||
dict(attrs={'class':'standalone'})
|
||||
,dict(attrs={'id':'article-guts'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link','embed'])
|
||||
,dict(name='div', attrs={'class':'read-more-link'})
|
||||
dict(name=['object','link','embed','iframe','meta'])
|
||||
,dict(attrs={'class':'corner-info'})
|
||||
]
|
||||
#remove_attributes=['width','height']
|
||||
remove_attributes = ['lang']
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'Infinite Loop (Apple content)' , u'http://feeds.arstechnica.com/arstechnica/apple/' )
|
||||
,(u'Opposable Thumbs (Gaming content)' , u'http://feeds.arstechnica.com/arstechnica/gaming/' )
|
||||
,(u'Gear and Gadgets' , u'http://feeds.arstechnica.com/arstechnica/gadgets/' )
|
||||
,(u'Chipster (Hardware content)' , u'http://feeds.arstechnica.com/arstechnica/hardware/' )
|
||||
,(u'Uptime (IT content)' , u'http://feeds.arstechnica.com/arstechnica/business/' )
|
||||
,(u'Open Ended (Open Source content)' , u'http://feeds.arstechnica.com/arstechnica/open-source/')
|
||||
,(u'One Microsoft Way' , u'http://feeds.arstechnica.com/arstechnica/microsoft/' )
|
||||
,(u'Nobel Intent (Science content)' , u'http://feeds.arstechnica.com/arstechnica/science/' )
|
||||
,(u'Scientific method (Science content)' , u'http://feeds.arstechnica.com/arstechnica/science/' )
|
||||
,(u'Law & Disorder (Tech policy content)' , u'http://feeds.arstechnica.com/arstechnica/tech-policy/')
|
||||
]
|
||||
|
||||
# This deals with multi-page stories
|
||||
def append_page(self, soup, appendtag, position):
|
||||
pager = soup.find('div',attrs={'class':'pager'})
|
||||
pager = soup.find(attrs={'class':'numbers'})
|
||||
if pager:
|
||||
for atag in pager.findAll('a',href=True):
|
||||
str = self.tag_to_string(atag)
|
||||
if str.startswith('Next'):
|
||||
nurl = 'http://arstechnica.com' + atag['href']
|
||||
nexttag = pager.find(attrs={'class':'next'})
|
||||
if nexttag:
|
||||
nurl = nexttag.parent['href']
|
||||
rawc = self.index_to_soup(nurl,True)
|
||||
soup2 = BeautifulSoup(rawc, fromEncoding=self.encoding)
|
||||
|
||||
readmoretag = soup2.find('div', attrs={'class':'read-more-link'})
|
||||
if readmoretag:
|
||||
readmoretag.extract()
|
||||
texttag = soup2.find('div', attrs={'class':'body'})
|
||||
for it in texttag.findAll(style=True):
|
||||
del it['style']
|
||||
|
||||
texttag = soup2.find(attrs={'id':'article-guts'})
|
||||
newpos = len(texttag.contents)
|
||||
self.append_page(soup2,texttag,newpos)
|
||||
texttag.extract()
|
||||
@ -88,41 +77,24 @@ class ArsTechnica(BasicNewsRecipe):
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
# Adds line breaks near the byline (not sure why this is needed)
|
||||
ftag = soup.find('div', attrs={'class':'byline'})
|
||||
if ftag:
|
||||
brtag = Tag(soup,'br')
|
||||
brtag2 = Tag(soup,'br')
|
||||
ftag.insert(4,brtag)
|
||||
ftag.insert(5,brtag2)
|
||||
|
||||
# Remove style items
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
|
||||
# Remove id
|
||||
for item in soup.findAll(id=True):
|
||||
del item['id']
|
||||
|
||||
# For some reason, links to authors don't have the domainname
|
||||
a_author = soup.find('a',{'href':re.compile("^/author")})
|
||||
if a_author:
|
||||
a_author['href'] = 'http://arstechnica.com'+a_author['href']
|
||||
|
||||
# within div class news-item-figure, we need to grab images
|
||||
|
||||
# Deal with multi-page stories
|
||||
self.append_page(soup, soup.body, 3)
|
||||
|
||||
for item in soup.findAll('a'):
|
||||
limg = item.find('img')
|
||||
if item.string is not None:
|
||||
str = item.string
|
||||
item.replaceWith(str)
|
||||
else:
|
||||
if limg:
|
||||
item.name = 'div'
|
||||
item.attrs = []
|
||||
else:
|
||||
str = self.tag_to_string(item)
|
||||
item.replaceWith(str)
|
||||
for item in soup.findAll('img'):
|
||||
if not item.has_key('alt'):
|
||||
item['alt'] = 'image'
|
||||
return soup
|
||||
|
||||
def get_article_url(self, article):
|
||||
# If the article title starts with Etc:, don't return it
|
||||
if self.ignoreEtcArticles:
|
||||
article_title = article.get('title',None)
|
||||
if re.match('Etc: ',article_title) is not None:
|
||||
return None
|
||||
|
||||
# The actual article is in a guid tag
|
||||
return article.get('guid', None).rpartition('?')[0]
|
||||
def preprocess_raw_html(self, raw, url):
|
||||
return '<html><head>'+raw[raw.find('</head>'):]
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
|
||||
from __future__ import unicode_literals
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2008-2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
clarin.com
|
||||
'''
|
||||
@ -8,9 +8,9 @@ clarin.com
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Clarin(BasicNewsRecipe):
|
||||
title = 'Clarin'
|
||||
title = 'Clarín'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Noticias de Argentina y mundo'
|
||||
description = 'Clarin.com. Noticias de la Argentina y el mundo. Información actualizada las 24 horas y en español. Informate ya'
|
||||
publisher = 'Grupo Clarin'
|
||||
category = 'news, politics, Argentina'
|
||||
oldest_article = 2
|
||||
@ -26,9 +26,7 @@ class Clarin(BasicNewsRecipe):
|
||||
extra_css = """
|
||||
body{font-family: Arial,Helvetica,sans-serif}
|
||||
h2{font-family: Georgia,serif; font-size: xx-large}
|
||||
.hora{font-weight:bold}
|
||||
.hd p{font-size: small}
|
||||
.nombre-autor{color: #0F325A}
|
||||
.info,.nombre-autor,.hora{font-size: small}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
@ -38,38 +36,35 @@ class Clarin(BasicNewsRecipe):
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
keep_only_tags = [dict(attrs={'class':['hd','mt']})]
|
||||
remove_tags = [dict(name=['meta','base','link'])]
|
||||
remove_attributes = ['lang','_mce_bogus']
|
||||
keep_only_tags = [dict(attrs={'class':['hd','mt','bd']})]
|
||||
remove_tags = [dict(name=['meta','base','link','iframe','embed','object'])]
|
||||
remove_attributes = ['lang']
|
||||
|
||||
feeds = [
|
||||
(u'Pagina principal', u'http://www.clarin.com/rss/' )
|
||||
,(u'Politica' , u'http://www.clarin.com/rss/politica/' )
|
||||
,(u'Deportes' , u'http://www.clarin.com/rss/deportes/' )
|
||||
,(u'Economia' , u'http://www.clarin.com/economia/' )
|
||||
,(u'Mundo' , u'http://www.clarin.com/rss/mundo/' )
|
||||
,(u'iEco' , u'http://www.ieco.clarin.com/rss/' )
|
||||
,(u'Espectaculos' , u'http://www.clarin.com/rss/espectaculos/')
|
||||
,(u'Sociedad' , u'http://www.clarin.com/rss/sociedad/' )
|
||||
,(u'Ciudades' , u'http://www.clarin.com/rss/ciudades/' )
|
||||
,(u'Policiales' , u'http://www.clarin.com/rss/policiales/' )
|
||||
,(u'Internet' , u'http://www.clarin.com/rss/internet/' )
|
||||
,(u'Ciudades' , u'http://www.clarin.com/rss/ciudades/' )
|
||||
]
|
||||
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.get('guid', None)
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '?print=1'
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.get('guid', None)
|
||||
|
||||
def get_cover_url(self):
|
||||
cover_url = None
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
cover_item = soup.find('div',attrs={'class':'bb-md bb-md-edicion_papel'})
|
||||
if cover_item:
|
||||
ap = cover_item.find('a',attrs={'href':'/edicion-impresa/'})
|
||||
if ap:
|
||||
cover_url = self.INDEX + ap.img['src']
|
||||
for item in soup.findAll('a', href=True):
|
||||
if item['href'].startswith('/tapas/TAPA_CLA'):
|
||||
cover_url = self.INDEX + item['href']
|
||||
return cover_url
|
||||
return cover_url
|
||||
|
||||
|
30
recipes/economico.recipe
Normal file
30
recipes/economico.recipe
Normal file
@ -0,0 +1,30 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Economico(BasicNewsRecipe):
|
||||
title = u'Economico'
|
||||
language = 'pt'
|
||||
__author__ = 'Krittika Goyal'
|
||||
oldest_article = 1 #days
|
||||
max_articles_per_feed = 25
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
|
||||
no_stylesheets = True
|
||||
auto_cleanup = True
|
||||
|
||||
|
||||
feeds = [
|
||||
('Ultima Hora',
|
||||
'http://economico.sapo.pt/rss/ultimas'),
|
||||
('Em Foco',
|
||||
'http://economico.sapo.pt/rss/emfoco'),
|
||||
('Mercados',
|
||||
'http://economico.sapo.pt/rss/mercados'),
|
||||
('Empresas',
|
||||
'http://economico.sapo.pt/rss/empresas'),
|
||||
('Economia',
|
||||
'http://economico.sapo.pt/rss/economia'),
|
||||
('Politica',
|
||||
'http://economico.sapo.pt/rss/politica'),
|
||||
]
|
||||
|
@ -6,21 +6,20 @@ __copyright__ = '2011, Piotr Kontek, piotr.kontek@gmail.com'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
from datetime import date
|
||||
import re
|
||||
|
||||
class GN(BasicNewsRecipe):
|
||||
EDITION = 0
|
||||
|
||||
__author__ = 'Piotr Kontek'
|
||||
title = u'Gość niedzielny'
|
||||
description = 'Weekly magazine'
|
||||
encoding = 'utf-8'
|
||||
no_stylesheets = True
|
||||
language = 'pl'
|
||||
remove_javascript = True
|
||||
temp_files = []
|
||||
simultaneous_downloads = 1
|
||||
masthead_url = 'http://gosc.pl/files/11/03/12/949089_top.gif'
|
||||
title = u'Gość niedzielny'
|
||||
|
||||
articles_are_obfuscated = True
|
||||
|
||||
@ -56,22 +55,28 @@ class GN(BasicNewsRecipe):
|
||||
self.temp_files[-1].close()
|
||||
return self.temp_files[-1].name
|
||||
|
||||
def find_last_issue(self):
|
||||
soup = self.index_to_soup('http://gosc.pl/wyszukaj/wydania/3.Gosc-Niedzielny')
|
||||
#szukam zdjęcia i linka do porzedniego pełnego numeru
|
||||
def find_last_issue(self, year):
|
||||
soup = self.index_to_soup('http://gosc.pl/wyszukaj/wydania/3.Gosc-Niedzielny/rok/' + str(year))
|
||||
|
||||
#szukam zdjęcia i linka do poprzedniego pełnego numeru
|
||||
first = True
|
||||
for d in soup.findAll('div', attrs={'class':'l release_preview_l'}):
|
||||
img = d.find('img')
|
||||
if img != None:
|
||||
a = img.parent
|
||||
self.EDITION = a['href']
|
||||
self.title = img['alt']
|
||||
self.cover_url = 'http://www.gosc.pl' + img['src']
|
||||
if not first:
|
||||
if year != date.today().year or not first:
|
||||
break
|
||||
first = False
|
||||
|
||||
def parse_index(self):
|
||||
self.find_last_issue()
|
||||
year = date.today().year
|
||||
self.find_last_issue(year)
|
||||
##jeśli to pierwszy numer w roku trzeba pobrać poprzedni rok
|
||||
if self.EDITION == 0:
|
||||
self.find_last_issue(year-1)
|
||||
soup = self.index_to_soup('http://www.gosc.pl' + self.EDITION)
|
||||
feeds = []
|
||||
#wstepniak
|
||||
|
22
recipes/heavy_metal_it.recipe
Normal file
22
recipes/heavy_metal_it.recipe
Normal file
@ -0,0 +1,22 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AdvancedUserRecipe1336289226(BasicNewsRecipe):
|
||||
title = u'Heavy Metal'
|
||||
oldest_article = 15
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = False
|
||||
masthead_url = 'http://net-static2.tccstatic.com/template/tmw/img/tj.gif'
|
||||
feeds = [(u'Heavy Metal', u'http://www.heavy-metal.it/feed/')]
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'entry'})
|
||||
]
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'class':'sociable'})
|
||||
]
|
||||
description = 'An Heavy metal Italian magazine'
|
||||
__author__ = 'faber1971'
|
||||
language = 'it'
|
||||
|
||||
__version__ = 'v1.0'
|
||||
__date__ = '6, May 2012'
|
BIN
recipes/icons/strategic_culture.png
Normal file
BIN
recipes/icons/strategic_culture.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 648 B |
24
recipes/juve_la_stampa.recipe
Normal file
24
recipes/juve_la_stampa.recipe
Normal file
@ -0,0 +1,24 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1336504510(BasicNewsRecipe):
|
||||
title = u'Juve - La Stampa'
|
||||
oldest_article = 1
|
||||
language = 'it'
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
masthead_url = 'http://www3.lastampa.it/fileadmin/media/sport/quijuve/top_quijuve.jpg'
|
||||
feeds = [(u'Qui Juve - La Stampa', u'http://feed43.com/2352784107537677.xml')]
|
||||
remove_tags = [dict(name='div',attrs={'class':['article-toolbar', 'sezione sezione-news', 'intestazione']})]
|
||||
|
||||
extra_css = '''
|
||||
div.dettaglio div.immagine_girata p.news-single-imgcaption {color: #000000; font-family: "Georgia", "Times", serif; font-size: 7px; font-weight: 400;line-height: 1.2; padding-bottom: 12px; text-transform: none; }
|
||||
.sezione {color: #000000; font-family: "Georgia", "Times", serif; font-size: 7px; font-weight: 400;line-height: 1.2; padding-bottom: 12px; text-transform: none; }
|
||||
body {color: #000000; font-family: "Georgia", "Times", serif; font-size: 7px; font-weight: 400;line-height: 1.2; padding-bottom: 12px; text-transform: none; }
|
||||
h3 {color: #000000; font-family: "Georgia", "Times", serif; font-size: 22px; font-weight: 400;line-height: 1.2; padding-bottom: 12px; text-transform: none; }
|
||||
div.dettaglio h2.catenaccio {color: #000000; font-family: "Georgia", "Times", serif; font-size: 18px; font-weight: 400;line-height: 1.2; padding-bottom: 12px; text-transform: none; }
|
||||
'''
|
||||
description = 'News about Juventus from La Stampa'
|
||||
__author__ = 'faber1971'
|
||||
|
||||
__version__ = 'v1.0'
|
||||
__date__ = '8, May 2012'
|
42
recipes/marine_corps_times.recipe
Normal file
42
recipes/marine_corps_times.recipe
Normal file
@ -0,0 +1,42 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class MarineCorpsTimes(BasicNewsRecipe):
|
||||
title = 'Marine Corps Times'
|
||||
__author__ = 'jde'
|
||||
__date__ = '16 May 2012'
|
||||
__version__ = '1.0'
|
||||
description = 'News of the U.S. Marine Corps'
|
||||
language = 'en'
|
||||
publisher = 'MarineCorpsTimes.com'
|
||||
category = 'news, U.S. Marine Corps'
|
||||
tags = 'news, U.S. Marine Corps'
|
||||
cover_url = 'http://www.marinecorpstimes.com/images/logo_marinetimes-alert.jpg'
|
||||
masthead_url = 'http://www.marinecorpstimes.com/images/logo_marinetimes-alert.jpg'
|
||||
oldest_article = 7 #days
|
||||
max_articles_per_feed = 25
|
||||
publication_type = 'newspaper'
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = None
|
||||
recursions = 0
|
||||
needs_subscription = False
|
||||
remove_javascript = True
|
||||
remove_empty_feeds = True
|
||||
auto_cleanup = True
|
||||
|
||||
|
||||
|
||||
feeds = [
|
||||
|
||||
('News', 'http://www.MarineCorpstimes.com/rss_news.php'),
|
||||
('Benefits', 'http://www.MarineCorpstimes.com/rss_benefits.php'),
|
||||
('Money', 'http://www.MarineCorpstimes.com/rss_money.php'),
|
||||
('Careers & Education', 'http://www.MarineCorpstimes.com/rss_careers.php'),
|
||||
('Community', 'http://www.MarineCorpstimes.com/rss_community.php'),
|
||||
('Off Duty', 'http://www.MarineCorpstimes.com/rss_off_duty.php'),
|
||||
('Entertainment', 'http://www.MarineCorpstimes.com/rss_entertainment.php'),
|
||||
('Guard & Reserve', 'http://www.MarineCorpstimes.com/rss_guard.php'),
|
||||
]
|
||||
|
||||
|
||||
|
||||
|
41
recipes/military_times.recipe
Normal file
41
recipes/military_times.recipe
Normal file
@ -0,0 +1,41 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class MilitaryTimes(BasicNewsRecipe):
|
||||
title = 'Military Times'
|
||||
__author__ = 'jde'
|
||||
__date__ = '16 May 2012'
|
||||
__version__ = '1.0'
|
||||
description = 'News of the U.S. Military'
|
||||
language = 'en'
|
||||
publisher = 'MilitaryTimes.com'
|
||||
category = 'news, U.S. Military'
|
||||
tags = 'news, U.S. Military'
|
||||
cover_url = 'http://www.militarytimes.com/images/logo_militarytimes_landing-s.gif'
|
||||
masthead_url = 'http://www.militarytimes.com/images/logo_militarytimes_landing-s.gif'
|
||||
oldest_article = 7 #days
|
||||
max_articles_per_feed = 25
|
||||
publication_type = 'newspaper'
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = None
|
||||
recursions = 0
|
||||
needs_subscription = False
|
||||
remove_javascript = True
|
||||
remove_empty_feeds = True
|
||||
auto_cleanup = True
|
||||
|
||||
|
||||
|
||||
feeds = [
|
||||
|
||||
('News', 'http://www.militarytimes.com/rss_news.php'),
|
||||
('Benefits', 'http://www.militarytimes.com/rss_benefits.php'),
|
||||
('Money', 'http://www.militarytimes.com/rss_money.php'),
|
||||
('Careers & Education', 'http://www.militarytimes.com/rss_careers.php'),
|
||||
('Community', 'http://www.militarytimes.com/rss_community.php'),
|
||||
('Off Duty', 'http://www.militarytimes.com/rss_off_duty.php'),
|
||||
('Entertainment', 'http://www.militarytimes.com/rss_entertainment.php'),
|
||||
('Guard & Reserve', 'http://www.militarytimes.com/rss_guard.php'),
|
||||
|
||||
]
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
|
||||
@ -7,77 +6,21 @@ __license__ = 'GPL v3'
|
||||
www.canada.com
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
|
||||
class CanWestPaper(BasicNewsRecipe):
|
||||
|
||||
# un-comment the following four lines for the Victoria Times Colonist
|
||||
## title = u'Victoria Times Colonist'
|
||||
## url_prefix = 'http://www.timescolonist.com'
|
||||
## description = u'News from Victoria, BC'
|
||||
## fp_tag = 'CAN_TC'
|
||||
|
||||
# un-comment the following four lines for the Vancouver Province
|
||||
## title = u'Vancouver Province'
|
||||
## url_prefix = 'http://www.theprovince.com'
|
||||
## description = u'News from Vancouver, BC'
|
||||
## fp_tag = 'CAN_VP'
|
||||
|
||||
# un-comment the following four lines for the Vancouver Sun
|
||||
## title = u'Vancouver Sun'
|
||||
## url_prefix = 'http://www.vancouversun.com'
|
||||
## description = u'News from Vancouver, BC'
|
||||
## fp_tag = 'CAN_VS'
|
||||
|
||||
# un-comment the following four lines for the Edmonton Journal
|
||||
## title = u'Edmonton Journal'
|
||||
## url_prefix = 'http://www.edmontonjournal.com'
|
||||
## description = u'News from Edmonton, AB'
|
||||
## fp_tag = 'CAN_EJ'
|
||||
|
||||
# un-comment the following four lines for the Calgary Herald
|
||||
## title = u'Calgary Herald'
|
||||
## url_prefix = 'http://www.calgaryherald.com'
|
||||
## description = u'News from Calgary, AB'
|
||||
## fp_tag = 'CAN_CH'
|
||||
|
||||
# un-comment the following four lines for the Regina Leader-Post
|
||||
## title = u'Regina Leader-Post'
|
||||
## url_prefix = 'http://www.leaderpost.com'
|
||||
## description = u'News from Regina, SK'
|
||||
## fp_tag = ''
|
||||
|
||||
# un-comment the following four lines for the Saskatoon Star-Phoenix
|
||||
## title = u'Saskatoon Star-Phoenix'
|
||||
## url_prefix = 'http://www.thestarphoenix.com'
|
||||
## description = u'News from Saskatoon, SK'
|
||||
## fp_tag = ''
|
||||
|
||||
# un-comment the following four lines for the Windsor Star
|
||||
## title = u'Windsor Star'
|
||||
## url_prefix = 'http://www.windsorstar.com'
|
||||
## description = u'News from Windsor, ON'
|
||||
## fp_tag = 'CAN_'
|
||||
|
||||
# un-comment the following four lines for the Ottawa Citizen
|
||||
## title = u'Ottawa Citizen'
|
||||
## url_prefix = 'http://www.ottawacitizen.com'
|
||||
## description = u'News from Ottawa, ON'
|
||||
## fp_tag = 'CAN_OC'
|
||||
|
||||
# un-comment the following four lines for the Montreal Gazette
|
||||
# un-comment the following three lines for the Montreal Gazette
|
||||
title = u'Montreal Gazette'
|
||||
url_prefix = 'http://www.montrealgazette.com'
|
||||
description = u'News from Montreal, QC'
|
||||
fp_tag = 'CAN_MG'
|
||||
|
||||
|
||||
language = 'en_CA'
|
||||
__author__ = 'Nick Redding'
|
||||
no_stylesheets = True
|
||||
auto_cleanup = True
|
||||
auto_cleanup_keep = '//*[@id="imageBox"]'
|
||||
timefmt = ' [%b %d]'
|
||||
extra_css = '''
|
||||
.timestamp { font-size:xx-small; display: block; }
|
||||
@ -87,135 +30,19 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
.byline { font-size:xx-small; }
|
||||
#photocaption { font-size: small; font-style: italic }
|
||||
#photocredit { font-size: xx-small; }'''
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
|
||||
remove_tags = [{'class':'comments'},
|
||||
dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
|
||||
dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
|
||||
dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
|
||||
dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
|
||||
dict(name='div', attrs={'class':'rule_grey_solid'}),
|
||||
dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
|
||||
|
||||
|
||||
def get_cover_url(self):
|
||||
from datetime import timedelta, date
|
||||
if self.fp_tag=='':
|
||||
return None
|
||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
daysback=1
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
while daysback<7:
|
||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str((date.today() - timedelta(days=daysback)).day)+'/lg/'+self.fp_tag+'.jpg'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
daysback = daysback+1
|
||||
continue
|
||||
break
|
||||
if daysback==7:
|
||||
self.log("\nCover unavailable")
|
||||
cover = None
|
||||
return cover
|
||||
|
||||
def fixChars(self,string):
|
||||
# Replace lsquo (\x91)
|
||||
fixed = re.sub("\x91","‘",string)
|
||||
# Replace rsquo (\x92)
|
||||
fixed = re.sub("\x92","’",fixed)
|
||||
# Replace ldquo (\x93)
|
||||
fixed = re.sub("\x93","“",fixed)
|
||||
# Replace rdquo (\x94)
|
||||
fixed = re.sub("\x94","”",fixed)
|
||||
# Replace ndash (\x96)
|
||||
fixed = re.sub("\x96","–",fixed)
|
||||
# Replace mdash (\x97)
|
||||
fixed = re.sub("\x97","—",fixed)
|
||||
fixed = re.sub("’","’",fixed)
|
||||
return fixed
|
||||
|
||||
def massageNCXText(self, description):
|
||||
# Kindle TOC descriptions won't render certain characters
|
||||
if description:
|
||||
massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
|
||||
# Replace '&' with '&'
|
||||
massaged = re.sub("&","&", massaged)
|
||||
return self.fixChars(massaged)
|
||||
else:
|
||||
return description
|
||||
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
if first:
|
||||
picdiv = soup.find('body').find('img')
|
||||
if picdiv is not None:
|
||||
self.add_toc_thumbnail(article,re.sub(r'links\\link\d+\\','',picdiv['src']))
|
||||
xtitle = article.text_summary.strip()
|
||||
if len(xtitle) == 0:
|
||||
desc = soup.find('meta',attrs={'property':'og:description'})
|
||||
if desc is not None:
|
||||
article.summary = article.text_summary = desc['content']
|
||||
|
||||
def strip_anchors(self,soup):
|
||||
paras = soup.findAll(True)
|
||||
for para in paras:
|
||||
aTags = para.findAll('a')
|
||||
for a in aTags:
|
||||
if a.img is None:
|
||||
a.replaceWith(a.renderContents().decode('cp1252','replace'))
|
||||
return soup
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.strip_anchors(soup)
|
||||
|
||||
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
|
||||
feeds = [
|
||||
('News',
|
||||
'http://rss.canada.com/get/?F297'),
|
||||
('Sports',
|
||||
'http://rss.canada.com/get/?F299'),
|
||||
('Entertainment',
|
||||
'http://rss.canada.com/get/?F7366'),
|
||||
('Business',
|
||||
'http://rss.canada.com/get/?F6939'),
|
||||
]
|
||||
|
||||
articles = {}
|
||||
key = 'News'
|
||||
ans = ['News']
|
||||
|
||||
# Find each instance of class="sectiontitle", class="featurecontent"
|
||||
for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
|
||||
#self.log(" div class = %s" % divtag['class'])
|
||||
if divtag['class'].startswith('section_title'):
|
||||
# div contains section title
|
||||
if not divtag.h3:
|
||||
continue
|
||||
key = self.tag_to_string(divtag.h3,False)
|
||||
ans.append(key)
|
||||
self.log("Section name %s" % key)
|
||||
continue
|
||||
# div contains article data
|
||||
h1tag = divtag.find('h1')
|
||||
if not h1tag:
|
||||
continue
|
||||
atag = h1tag.find('a',href=True)
|
||||
if not atag:
|
||||
continue
|
||||
url = self.url_prefix+'/news/todays-paper/'+atag['href']
|
||||
#self.log("Section %s" % key)
|
||||
#self.log("url %s" % url)
|
||||
title = self.tag_to_string(atag,False)
|
||||
#self.log("title %s" % title)
|
||||
pubdate = ''
|
||||
description = ''
|
||||
ptag = divtag.find('p');
|
||||
if ptag:
|
||||
description = self.tag_to_string(ptag,False)
|
||||
#self.log("description %s" % description)
|
||||
author = ''
|
||||
autag = divtag.find('h4')
|
||||
if autag:
|
||||
author = self.tag_to_string(autag,False)
|
||||
#self.log("author %s" % author)
|
||||
if not articles.has_key(key):
|
||||
articles[key] = []
|
||||
articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
|
||||
|
||||
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
||||
return ans
|
||||
|
22
recipes/nachdenkseiten.recipe
Normal file
22
recipes/nachdenkseiten.recipe
Normal file
@ -0,0 +1,22 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Nachdenkseiten(BasicNewsRecipe):
|
||||
title = u'Nachdenkseiten'
|
||||
__author__ = 'jrda'
|
||||
publisher = 'www.nachdenkseiten.de Albrecht Mueller und Dr. Wolfgang Lieb'
|
||||
description = 'NachDenkSeiten - Die kritische Website'
|
||||
category = 'news'
|
||||
oldest_article = 7
|
||||
use_embedded_content = False
|
||||
language = 'de'
|
||||
timefmt = ''
|
||||
max_articles_per_feed = 6
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
remove_javascript = True
|
||||
keep_only_tags = [
|
||||
{'id':'content'}]
|
||||
|
||||
feeds = [
|
||||
('News', 'http://www.nachdenkseiten.de/?feed=rss2'),
|
||||
]
|
42
recipes/navy_times.recipe
Normal file
42
recipes/navy_times.recipe
Normal file
@ -0,0 +1,42 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class NavyTimes(BasicNewsRecipe):
|
||||
title = 'Navy Times'
|
||||
__author__ = 'jde'
|
||||
__date__ = '16 May 2012'
|
||||
__version__ = '1.0'
|
||||
description = 'News of the U.S. Navy'
|
||||
language = 'en'
|
||||
publisher = 'NavyTimes.com'
|
||||
category = 'news, U.S. Navy'
|
||||
tags = 'news, U.S. Navy'
|
||||
cover_url = 'http://www.navytimes.com/images/logo_navytimes_alert.jpg'
|
||||
masthead_url = 'http://www.navytimes.com/images/logo_navytimes_alert.jpg'
|
||||
oldest_article = 7 #days
|
||||
max_articles_per_feed = 25
|
||||
publication_type = 'newspaper'
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = None
|
||||
recursions = 0
|
||||
needs_subscription = False
|
||||
remove_javascript = True
|
||||
remove_empty_feeds = True
|
||||
auto_cleanup = True
|
||||
|
||||
|
||||
|
||||
feeds = [
|
||||
|
||||
('News', 'http://www.navytimes.com/rss_news.php'),
|
||||
('Benefits', 'http://www.navytimes.com/rss_benefits.php'),
|
||||
('Money', 'http://www.navytimes.com/rss_money.php'),
|
||||
('Careers & Education', 'http://www.navytimes.com/rss_careers.php'),
|
||||
('Community', 'http://www.navytimes.com/rss_community.php'),
|
||||
('Off Duty', 'http://www.navytimes.com/rss_off_duty.php'),
|
||||
('Entertainment', 'http://www.navytimes.com/rss_entertainment.php'),
|
||||
('Guard & Reserve', 'http://www.navytimes.com/rss_guard.php'),
|
||||
|
||||
]
|
||||
|
||||
|
||||
|
20
recipes/news_busters.recipe
Normal file
20
recipes/news_busters.recipe
Normal file
@ -0,0 +1,20 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class NewsBusters(BasicNewsRecipe):
|
||||
title = u'News Busters'
|
||||
description = 'Exposing and Combating Liberal Media Bias'
|
||||
__author__ = 'jde'
|
||||
oldest_article = 1#day
|
||||
max_articles_per_feed = 100
|
||||
cover_url = "http://newsbusters.org/sites/all/themes/genesis_nb/images/nb-mrc.png"
|
||||
language = 'en'
|
||||
encoding = 'utf8'
|
||||
needs_subscription = False
|
||||
remove_javascript = True
|
||||
recursions = 0
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
auto_cleanup = True
|
||||
|
||||
feeds = [(u'Blog', u'http://www.newsbusters.org/rss.xml')]
|
||||
|
@ -9,10 +9,10 @@ import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Pescanik(BasicNewsRecipe):
|
||||
title = 'Peščanik'
|
||||
title = u'Peščanik'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Peščanik je udruženje građana osnovano 2006. godine. Glavni proizvod Peščanika je radio emisija koja je emitovana na Radiju B92 od 02.02.2000. do 16.06.2011, a od septembra 2011. se emituje na osam radio stanica u Srbiji, Crnoj Gori i BiH'
|
||||
publisher = 'Peščanik'
|
||||
description = u'Peščanik je udruženje građana osnovano 2006. godine. Glavni proizvod Peščanika je radio emisija koja je emitovana na Radiju B92 od 02.02.2000. do 16.06.2011, a od septembra 2011. se emituje na osam radio stanica u Srbiji, Crnoj Gori i BiH'
|
||||
publisher = u'Peščanik'
|
||||
category = 'news, politics, Serbia'
|
||||
oldest_article = 10
|
||||
max_articles_per_feed = 100
|
||||
|
@ -1,5 +1,5 @@
|
||||
"""
|
||||
Pocket Calibre Recipe v1.1
|
||||
Pocket Calibre Recipe v1.2
|
||||
"""
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '''
|
||||
@ -97,7 +97,12 @@ class Pocket(BasicNewsRecipe):
|
||||
self.readList.append(readLink)
|
||||
totalfeeds.append((feedtitle, articles))
|
||||
if len(self.readList) < self.minimum_articles:
|
||||
raise Exception("Not enough articles in Pocket! Change minimum_articles or add more articles.")
|
||||
self.mark_as_read_after_dl = False
|
||||
if hasattr(self, 'abort_recipe_processing'):
|
||||
self.abort_recipe_processing("Only %d articles retrieved, minimum_articles not reached" % len(self.readList))
|
||||
else:
|
||||
self.log.exception("Only %d articles retrieved, minimum_articles not reached" % len(self.readList))
|
||||
return []
|
||||
return totalfeeds
|
||||
|
||||
def mark_as_read(self, markList):
|
||||
|
22
recipes/revista_summa.recipe
Normal file
22
recipes/revista_summa.recipe
Normal file
@ -0,0 +1,22 @@
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'Vakya'
|
||||
__version__ = 'v1.0'
|
||||
__date__ = '14, May 2012'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1336226255(BasicNewsRecipe):
|
||||
|
||||
title = u'Revista Summa'
|
||||
publisher = u'Summa'
|
||||
__author__ = 'Vakya'
|
||||
description = 'Informacion regional sobre economia y negocios'
|
||||
language = 'es'
|
||||
|
||||
oldest_article = 15
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
remove_tags_before = dict(name='h1')
|
||||
remove_tags_after = dict(name='label')
|
||||
feeds = [(u'Revista Summa', u'http://www.revistasumma.com/rss/rss-v2.0.rss')]
|
||||
|
@ -1,3 +1,4 @@
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
@ -15,6 +16,8 @@ class Spiegel_int(BasicNewsRecipe):
|
||||
language = 'en_DE'
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
auto_cleanup = True
|
||||
auto_cleanup_keep = '//*[@id="spArticleTopAsset"]'
|
||||
encoding = 'cp1252'
|
||||
publisher = 'SPIEGEL ONLINE GmbH'
|
||||
category = 'news, politics, Germany'
|
||||
@ -43,25 +46,25 @@ class Spiegel_int(BasicNewsRecipe):
|
||||
.spPhotoGallery{font-size:x-small; color:#990000 ;}
|
||||
'''
|
||||
|
||||
keep_only_tags = [dict(attrs={'id':'spArticleContent'})]
|
||||
remove_tags_after = dict(attrs={'id':'spArticleBody'})
|
||||
remove_tags = [dict(name=['meta','base','iframe','embed','object'])]
|
||||
remove_attributes = ['clear']
|
||||
#keep_only_tags = [dict(attrs={'id':'spArticleContent'})]
|
||||
#remove_tags_after = dict(attrs={'id':'spArticleBody'})
|
||||
#remove_tags = [dict(name=['meta','base','iframe','embed','object'])]
|
||||
#remove_attributes = ['clear']
|
||||
feeds = [(u'Spiegel Online', u'http://www.spiegel.de/international/index.rss')]
|
||||
|
||||
def print_version(self, url):
|
||||
main, sep, rest = url.rpartition(',')
|
||||
rmain, rsep, rrest = main.rpartition(',')
|
||||
return rmain + ',druck-' + rrest + ',' + rest
|
||||
#def print_version(self, url):
|
||||
#main, sep, rest = url.rpartition(',')
|
||||
#rmain, rsep, rrest = main.rpartition(',')
|
||||
#return rmain + ',druck-' + rrest + ',' + rest
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll('a'):
|
||||
if item.string is not None:
|
||||
str = item.string
|
||||
item.replaceWith(str)
|
||||
else:
|
||||
str = self.tag_to_string(item)
|
||||
item.replaceWith(str)
|
||||
return soup
|
||||
#def preprocess_html(self, soup):
|
||||
#for item in soup.findAll(style=True):
|
||||
#del item['style']
|
||||
#for item in soup.findAll('a'):
|
||||
#if item.string is not None:
|
||||
#str = item.string
|
||||
#item.replaceWith(str)
|
||||
#else:
|
||||
#str = self.tag_to_string(item)
|
||||
#item.replaceWith(str)
|
||||
#return soup
|
||||
|
@ -6,7 +6,6 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
spiegel.de
|
||||
'''
|
||||
|
||||
from time import strftime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Spiegel_ger(BasicNewsRecipe):
|
||||
@ -21,6 +20,8 @@ class Spiegel_ger(BasicNewsRecipe):
|
||||
lang = 'de-DE'
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
auto_cleanup = True
|
||||
auto_cleanup_keep = '//*[@id="spArticleTopAsset"]'
|
||||
encoding = 'cp1252'
|
||||
|
||||
conversion_options = {
|
||||
@ -31,20 +32,9 @@ class Spiegel_ger(BasicNewsRecipe):
|
||||
}
|
||||
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'spArticleContent'})]
|
||||
|
||||
remove_tags = [dict(name=['object','link','base','iframe'])]
|
||||
|
||||
remove_tags_after = dict(name='div', attrs={'id':'spArticleBody'})
|
||||
|
||||
feeds = [(u'Spiegel Online', u'http://www.spiegel.de/schlagzeilen/index.rss')]
|
||||
|
||||
def print_version(self, url):
|
||||
rmt = url.rpartition('#')[0]
|
||||
main, sep, rest = rmt.rpartition(',')
|
||||
rmain, rsep, rrest = main.rpartition(',')
|
||||
purl = rmain + ',druck-' + rrest + ',' + rest
|
||||
return purl
|
||||
|
||||
def get_cover_url(self):
|
||||
return 'http://wissen.spiegel.de/wissen/titel/SP/' + strftime("%Y/%W/%j/titel.jpg")
|
||||
|
||||
|
39
recipes/stars_and_stripes.recipe
Normal file
39
recipes/stars_and_stripes.recipe
Normal file
@ -0,0 +1,39 @@
|
||||
''' Stars and Stripes
|
||||
'''
|
||||
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
|
||||
class AdvancedUserRecipe1308791026(BasicNewsRecipe):
|
||||
title = u'Stars and Stripes'
|
||||
oldest_article = 3
|
||||
max_articles_per_feed = 100
|
||||
__author__ = 'adoucette'
|
||||
description = 'The U.S. militarys independent news source, featuring exclusive reports from Iraq, Afghanistan, Europe and the Far East.'
|
||||
no_stylesheets = True
|
||||
#delay = 1
|
||||
use_embedded_content = False
|
||||
encoding = 'utf8'
|
||||
publisher = 'stripes.com'
|
||||
category = 'news, US, world'
|
||||
language = 'en_US'
|
||||
publication_type = 'newsportal'
|
||||
preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
,'linearize_tables': True
|
||||
}
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['element article']})]
|
||||
remove_tags_after = [dict(name='ul', attrs={'class':'inline-bookmarks'})]
|
||||
feeds = [
|
||||
(u'News', u'http://feeds.stripes.com/starsandstripes/news'),
|
||||
(u'Sports', u'http://feeds.stripes.com/starsandstripes/sports'),
|
||||
(u'Military Life', u'http://feeds.stripes.com/starsandstripes/militarylife'),
|
||||
(u'Opinion', u'http://feeds.stripes.com/starsandstripes/opinion'),
|
||||
(u'Travel', u'http://feeds.stripes.com/starsandstripes/travel')
|
||||
]
|
92
recipes/strategic_culture.recipe
Normal file
92
recipes/strategic_culture.recipe
Normal file
@ -0,0 +1,92 @@
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||
|
||||
'''
|
||||
www.strategic-culture.org
|
||||
'''
|
||||
|
||||
import time
|
||||
from calibre import strftime
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class StrategicCulture(BasicNewsRecipe):
|
||||
title = 'Strategic Culture Foundation'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Online Journal'
|
||||
publisher = 'Strategic Culture Foundation'
|
||||
category = 'news, politics'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
language = 'en'
|
||||
publication_type = 'newsportal'
|
||||
masthead_url = 'http://www.strategic-culture.org/img/logo.jpg'
|
||||
extra_css = '''
|
||||
body{font-family: Arial, sans-serif}
|
||||
h1{font-family: "Times New Roman",Times,serif}
|
||||
img{margin-bottom: 0.8em}
|
||||
'''
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name=['h1','p'])
|
||||
,dict(name='div', attrs={'id':'cke_pastebin'})
|
||||
]
|
||||
|
||||
remove_tags = [dict(name=['object','link','base','meta','iframe'])]
|
||||
|
||||
feeds = [
|
||||
(u'News' , u'http://www.strategic-culture.org/blocks/news.html' )
|
||||
,(u'Politics' , u'http://www.strategic-culture.org/rubrics/politics.html' )
|
||||
,(u'Economics' , u'http://www.strategic-culture.org/rubrics/economics.html' )
|
||||
,(u'History & Culture', u'http://www.strategic-culture.org/rubrics/history-and-culture.html')
|
||||
,(u'Columnists' , u'http://www.strategic-culture.org/rubrics/columnists.html' )
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('-culture.org/news/','-culture.org/pview/')
|
||||
|
||||
def parse_index(self):
|
||||
totalfeeds = []
|
||||
lfeeds = self.get_feeds()
|
||||
for feedobj in lfeeds:
|
||||
feedtitle, feedurl = feedobj
|
||||
self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
|
||||
articles = []
|
||||
soup = self.index_to_soup(feedurl)
|
||||
if feedurl.endswith('news.html'):
|
||||
clname = 'sini14'
|
||||
else:
|
||||
clname = 'h22'
|
||||
checker = []
|
||||
for item in soup.findAll('a', attrs={'class':clname}):
|
||||
atag = item
|
||||
url = atag['href']
|
||||
title = self.tag_to_string(atag)
|
||||
description = ''
|
||||
daypart = url.rpartition('/')[0]
|
||||
mpart,sep,day = daypart.rpartition('/')
|
||||
ypart,sep,month = mpart.rpartition('/')
|
||||
year = ypart.rpartition('/')[2]
|
||||
date = strftime("%a, %d %b %Y %H:%M:%S +0000", time.strptime(day + "/" + month + "/" + year, "%d/%m/%Y"))
|
||||
if url not in checker:
|
||||
checker.append(url)
|
||||
articles.append({
|
||||
'title' :title
|
||||
,'date' :date
|
||||
,'url' :url
|
||||
,'description':description
|
||||
})
|
||||
totalfeeds.append((feedtitle, articles))
|
||||
return totalfeeds
|
||||
|
||||
|
@ -1,8 +1,9 @@
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2012, mkydgr'
|
||||
'''
|
||||
www.wired.com
|
||||
based on the (broken) built-in recipe by Darko Miletic <darko.miletic at gmail.com>
|
||||
'''
|
||||
|
||||
import re
|
||||
@ -11,11 +12,11 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Wired(BasicNewsRecipe):
|
||||
title = 'Wired Magazine'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Gaming news'
|
||||
__author__ = 'mkydgr'
|
||||
description = 'Technology News'
|
||||
publisher = 'Conde Nast Digital'
|
||||
category = 'news, games, IT, gadgets'
|
||||
oldest_article = 32
|
||||
category = ''
|
||||
oldest_article = 500
|
||||
delay = 1
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
@ -25,7 +26,8 @@ class Wired(BasicNewsRecipe):
|
||||
language = 'en'
|
||||
publication_type = 'magazine'
|
||||
extra_css = ' body{font-family: Arial,Verdana,sans-serif} .entryDescription li {display: inline; list-style-type: none} '
|
||||
index = 'http://www.wired.com/magazine/'
|
||||
index = 'http://www.wired.com/magazine'
|
||||
departments = ['features','start','test','play','found', 'reviews']
|
||||
|
||||
preprocess_regexps = [(re.compile(r'<meta name="Title".*<title>', re.DOTALL|re.IGNORECASE),lambda match: '<title>')]
|
||||
conversion_options = {
|
||||
@ -38,80 +40,53 @@ class Wired(BasicNewsRecipe):
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'post'})]
|
||||
remove_tags_after = dict(name='div', attrs={'class':'tweetmeme_button'})
|
||||
remove_tags = [
|
||||
dict(name=['object','embed','iframe','link','meta','base'])
|
||||
dict(name=['object','embed','iframe','link'])
|
||||
,dict(name='div', attrs={'class':['podcast_storyboard','tweetmeme_button']})
|
||||
,dict(attrs={'id':'ff_bottom_nav'})
|
||||
,dict(name='a',attrs={'href':'http://www.wired.com/app'})
|
||||
]
|
||||
remove_attributes = ['height','width','lang','border','clear']
|
||||
remove_attributes = ['height','width']
|
||||
|
||||
|
||||
def parse_index(self):
|
||||
totalfeeds = []
|
||||
|
||||
soup = self.index_to_soup(self.index)
|
||||
majorf = soup.find('div',attrs={'class':'index'})
|
||||
if majorf:
|
||||
pfarticles = []
|
||||
firsta = majorf.find(attrs={'class':'spread-header'})
|
||||
if firsta:
|
||||
pfarticles.append({
|
||||
'title' :self.tag_to_string(firsta.a)
|
||||
,'date' :strftime(self.timefmt)
|
||||
,'url' :'http://www.wired.com' + firsta.a['href']
|
||||
,'description':''
|
||||
})
|
||||
for itt in majorf.findAll('li'):
|
||||
itema = itt.find('a',href=True)
|
||||
if itema:
|
||||
pfarticles.append({
|
||||
'title' :self.tag_to_string(itema)
|
||||
,'date' :strftime(self.timefmt)
|
||||
,'url' :'http://www.wired.com' + itema['href']
|
||||
,'description':''
|
||||
})
|
||||
totalfeeds.append(('Cover', pfarticles))
|
||||
features = soup.find('div',attrs={'id':'my-glider'})
|
||||
if features:
|
||||
farticles = []
|
||||
for item in features.findAll('div',attrs={'class':'section'}):
|
||||
divurl = item.find('div',attrs={'class':'feature-header'})
|
||||
if divurl:
|
||||
divdesc = item.find('div',attrs={'class':'feature-text'})
|
||||
url = divurl.a['href']
|
||||
if not divurl.a['href'].startswith('http://www.wired.com'):
|
||||
url = 'http://www.wired.com' + divurl.a['href']
|
||||
title = self.tag_to_string(divurl.a)
|
||||
description = self.tag_to_string(divdesc)
|
||||
date = strftime(self.timefmt)
|
||||
farticles.append({
|
||||
'title' :title
|
||||
,'date' :date
|
||||
,'url' :url
|
||||
,'description':description
|
||||
})
|
||||
totalfeeds.append(('Featured Articles', farticles))
|
||||
|
||||
#department feeds
|
||||
departments = ['rants','start','test','play','found']
|
||||
dept = soup.find('div',attrs={'id':'magazine-departments'})
|
||||
if dept:
|
||||
for ditem in departments:
|
||||
depts = soup.find('div',attrs={'id':'department-posts'})
|
||||
|
||||
if depts:
|
||||
for ditem in self.departments:
|
||||
darticles = []
|
||||
department = dept.find('div',attrs={'id':'department-'+ditem})
|
||||
department = depts.find('h3',attrs={'id':'department-'+ditem})
|
||||
if department:
|
||||
for item in department.findAll('div'):
|
||||
description = ''
|
||||
feed_link = item.find('a')
|
||||
#print '\n###### Found department %s ########'%(ditem)
|
||||
|
||||
el = department.next
|
||||
while el and (el.__class__.__name__ == 'NavigableString' or el.name != 'h3'):
|
||||
if el.__class__.__name__ != 'NavigableString':
|
||||
#print '\t ... element',el.name
|
||||
if el.name == 'ul':
|
||||
for artitem in el.findAll('li'):
|
||||
#print '\t\t ... article',repr(artitem)
|
||||
feed_link = artitem.find('a')
|
||||
#print '\t\t\t ... link',repr(feed_link)
|
||||
if feed_link and feed_link.has_key('href'):
|
||||
url = feed_link['href']
|
||||
url = self.makeurl(feed_link['href'])
|
||||
title = self.tag_to_string(feed_link)
|
||||
date = strftime(self.timefmt)
|
||||
#print '\t\t ... found "%s" %s'%(title,url)
|
||||
darticles.append({
|
||||
'title' :title
|
||||
,'date' :date
|
||||
,'url' :url
|
||||
,'description':description
|
||||
,'description':''
|
||||
})
|
||||
el = None
|
||||
else:
|
||||
el = el.next
|
||||
|
||||
totalfeeds.append((ditem.capitalize(), darticles))
|
||||
return totalfeeds
|
||||
|
||||
@ -120,7 +95,7 @@ class Wired(BasicNewsRecipe):
|
||||
soup = self.index_to_soup(self.index)
|
||||
cover_item = soup.find('div',attrs={'class':'spread-image'})
|
||||
if cover_item:
|
||||
cover_url = 'http://www.wired.com' + cover_item.a.img['src']
|
||||
cover_url = self.makeurl(cover_item.a.img['src'])
|
||||
return cover_url
|
||||
|
||||
def print_version(self, url):
|
||||
@ -129,17 +104,10 @@ class Wired(BasicNewsRecipe):
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll('a'):
|
||||
if item.string is not None:
|
||||
tstr = item.string
|
||||
item.replaceWith(tstr)
|
||||
else:
|
||||
item.name='span'
|
||||
for atrs in ['href','target','alt','title','name','id']:
|
||||
if item.has_key(atrs):
|
||||
del item[atrs]
|
||||
for item in soup.findAll('img'):
|
||||
if not item.has_key('alt'):
|
||||
item['alt'] = 'image'
|
||||
return soup
|
||||
|
||||
def makeurl(self, addr):
|
||||
if addr[:4] != 'http' : addr='http://www.wired.com' + addr
|
||||
while addr[-2:] == '//' : addr=addr[:-1]
|
||||
return addr
|
||||
|
||||
|
BIN
resources/compiled_coffeescript.zip
Normal file
BIN
resources/compiled_coffeescript.zip
Normal file
Binary file not shown.
@ -506,3 +506,17 @@ change_book_details_font_size_by = 0
|
||||
# No compile: compile_gpm_templates = False
|
||||
compile_gpm_templates = True
|
||||
|
||||
#: What format to default to when using the Tweak feature
|
||||
# The Tweak feature of calibre allows direct editing of a book format.
|
||||
# If multiple formats are available, calibre will offer you a choice
|
||||
# of formats, defaulting to your preferred output format if it is available.
|
||||
# Set this tweak to a specific value of 'EPUB' or 'AZW3' to always default
|
||||
# to that format rather than your output format preference.
|
||||
# Set to a value of 'remember' to use whichever format you chose last time you
|
||||
# used the Tweak feature.
|
||||
# Examples:
|
||||
# default_tweak_format = None (Use output format)
|
||||
# default_tweak_format = 'EPUB'
|
||||
# default_tweak_format = 'remember'
|
||||
default_tweak_format = None
|
||||
|
||||
|
19
session.vim
19
session.vim
@ -20,11 +20,26 @@ vipy.session.initialize(project_name='calibre', src_dir=src_dir,
|
||||
project_dir=project_dir, base_dir=project_dir)
|
||||
|
||||
def recipe_title_callback(raw):
|
||||
return eval(raw.decode('utf-8')).replace(' ', '_')
|
||||
try:
|
||||
raw = eval(raw)
|
||||
if isinstance(raw, bytes):
|
||||
raw = raw.decode('utf-8')
|
||||
return raw.replace(u' ', u'_')
|
||||
except:
|
||||
print ('Failed to decode recipe title: %r'%raw)
|
||||
raise
|
||||
|
||||
vipy.session.add_content_browser('<leader>r', 'Recipe',
|
||||
vipy.session.glob_based_iterator(os.path.join(project_dir, 'recipes', '*.recipe')),
|
||||
vipy.session.regexp_based_matcher(r'title\s*=\s*(?P<title>.+)', 'title', recipe_title_callback))
|
||||
EOFPY
|
||||
|
||||
nmap \log :enew<CR>:read ! bzr log -l 500 <CR>:e Changelog.yaml<CR>:e src/calibre/constants.py<CR>
|
||||
fun! CalibreLog()
|
||||
enew
|
||||
read ! bzr log -l 500
|
||||
set nomodifiable noswapfile buftype=nofile
|
||||
edit Changelog.yaml
|
||||
edit src/calibre/constants.py
|
||||
endfun
|
||||
|
||||
nnoremap \log :call CalibreLog()<CR>
|
||||
|
@ -120,7 +120,7 @@ if iswindows:
|
||||
poppler_lib_dirs = consolidate('POPPLER_LIB_DIR', sw_lib_dir)
|
||||
popplerqt4_lib_dirs = poppler_lib_dirs
|
||||
poppler_libs = ['poppler']
|
||||
magick_inc_dirs = [os.path.join(prefix, 'build', 'ImageMagick-6.6.6')]
|
||||
magick_inc_dirs = [os.path.join(prefix, 'build', 'ImageMagick-6.7.6')]
|
||||
magick_lib_dirs = [os.path.join(magick_inc_dirs[0], 'VisualMagick', 'lib')]
|
||||
magick_libs = ['CORE_RL_wand_', 'CORE_RL_magick_']
|
||||
podofo_inc = os.path.join(sw_inc_dir, 'podofo')
|
||||
|
@ -22,7 +22,8 @@ Do not modify it unless you know what you are doing.
|
||||
import sys, os
|
||||
|
||||
path = os.environ.get('CALIBRE_PYTHON_PATH', {path!r})
|
||||
sys.path.insert(0, path)
|
||||
if path not in sys.path:
|
||||
sys.path.insert(0, path)
|
||||
|
||||
sys.resources_location = os.environ.get('CALIBRE_RESOURCES_PATH', {resources!r})
|
||||
sys.extensions_location = os.environ.get('CALIBRE_EXTENSIONS_PATH', {extensions!r})
|
||||
|
@ -41,8 +41,8 @@ binary_includes = [
|
||||
'/usr/lib/libgthread-2.0.so.0',
|
||||
'/usr/lib/libpng14.so.14',
|
||||
'/usr/lib/libexslt.so.0',
|
||||
MAGICK_PREFIX+'/lib/libMagickWand.so.4',
|
||||
MAGICK_PREFIX+'/lib/libMagickCore.so.4',
|
||||
MAGICK_PREFIX+'/lib/libMagickWand.so.5',
|
||||
MAGICK_PREFIX+'/lib/libMagickCore.so.5',
|
||||
'/usr/lib/libgcrypt.so.11',
|
||||
'/usr/lib/libgpg-error.so.0',
|
||||
'/usr/lib/libphonon.so.4',
|
||||
|
@ -429,7 +429,7 @@ class Py2App(object):
|
||||
def add_imagemagick(self):
|
||||
info('\nAdding ImageMagick')
|
||||
for x in ('Wand', 'Core'):
|
||||
self.install_dylib(os.path.join(SW, 'lib', 'libMagick%s.4.dylib'%x))
|
||||
self.install_dylib(os.path.join(SW, 'lib', 'libMagick%s.5.dylib'%x))
|
||||
idir = glob.glob(os.path.join(SW, 'lib', 'ImageMagick-*'))[-1]
|
||||
dest = os.path.join(self.frameworks_dir, 'ImageMagick')
|
||||
if os.path.exists(dest):
|
||||
|
@ -18,7 +18,7 @@ QT_DIR = 'Q:\\Qt\\4.8.1'
|
||||
QT_DLLS = ['Core', 'Gui', 'Network', 'Svg', 'WebKit', 'Xml', 'XmlPatterns']
|
||||
LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll'
|
||||
SW = r'C:\cygwin\home\kovid\sw'
|
||||
IMAGEMAGICK = os.path.join(SW, 'build', 'ImageMagick-6.6.6',
|
||||
IMAGEMAGICK = os.path.join(SW, 'build', 'ImageMagick-6.7.6',
|
||||
'VisualMagick', 'bin')
|
||||
CRT = r'C:\Microsoft.VC90.CRT'
|
||||
|
||||
|
@ -336,6 +336,8 @@ Index: src/PdfFiltersPrivate.cpp
|
||||
ImageMagick
|
||||
--------------
|
||||
|
||||
Get the source from: http://www.imagemagick.org/download/windows/ImageMagick-windows.zip
|
||||
|
||||
Edit VisualMagick/configure/configure.cpp to set
|
||||
|
||||
int projectType = MULTITHREADEDDLL;
|
||||
@ -349,7 +351,10 @@ Edit magick/magick-config.h
|
||||
Undefine ProvideDllMain and MAGICKCORE_X11_DELEGATE
|
||||
|
||||
Now open VisualMagick/VisualDynamicMT.sln set to Release
|
||||
Remove the CORE_xlib and UTIL_Imdisplay project CORE_Magick++
|
||||
Remove the CORE_xlib, UTIL_Imdisplay and CORE_Magick++ projects.
|
||||
|
||||
F7 for build project, you will get one error due to the removal of xlib, ignore
|
||||
it.
|
||||
|
||||
calibre
|
||||
---------
|
||||
|
1430
setup/iso_639/is.po
1430
setup/iso_639/is.po
File diff suppressed because it is too large
Load Diff
@ -10,14 +10,14 @@ msgstr ""
|
||||
"Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
|
||||
"devel@lists.alioth.debian.org>\n"
|
||||
"POT-Creation-Date: 2011-11-25 14:01+0000\n"
|
||||
"PO-Revision-Date: 2012-04-22 07:11+0000\n"
|
||||
"PO-Revision-Date: 2012-05-12 10:25+0000\n"
|
||||
"Last-Translator: kulkke <Unknown>\n"
|
||||
"Language-Team: Turkish <gnome-turk@gnome.org>\n"
|
||||
"MIME-Version: 1.0\n"
|
||||
"Content-Type: text/plain; charset=UTF-8\n"
|
||||
"Content-Transfer-Encoding: 8bit\n"
|
||||
"X-Launchpad-Export-Date: 2012-04-23 04:45+0000\n"
|
||||
"X-Generator: Launchpad (build 15135)\n"
|
||||
"X-Launchpad-Export-Date: 2012-05-13 04:43+0000\n"
|
||||
"X-Generator: Launchpad (build 15225)\n"
|
||||
"Language: tr\n"
|
||||
|
||||
#. name for aaa
|
||||
@ -406,7 +406,7 @@ msgstr ""
|
||||
|
||||
#. name for aed
|
||||
msgid "Argentine Sign Language"
|
||||
msgstr ""
|
||||
msgstr "Arjantin İşaret Dili"
|
||||
|
||||
#. name for aee
|
||||
msgid "Pashayi; Northeast"
|
||||
@ -1554,7 +1554,7 @@ msgstr "Dano"
|
||||
|
||||
#. name for asp
|
||||
msgid "Algerian Sign Language"
|
||||
msgstr ""
|
||||
msgstr "Cezayir İşaret Dili"
|
||||
|
||||
#. name for asq
|
||||
msgid "Austrian Sign Language"
|
||||
@ -2578,7 +2578,7 @@ msgstr "Blafe"
|
||||
|
||||
#. name for bfi
|
||||
msgid "British Sign Language"
|
||||
msgstr ""
|
||||
msgstr "Britanya İşaret Dili"
|
||||
|
||||
#. name for bfj
|
||||
msgid "Bafanji"
|
||||
@ -4167,7 +4167,7 @@ msgstr "Bukat"
|
||||
|
||||
#. name for bvl
|
||||
msgid "Bolivian Sign Language"
|
||||
msgstr ""
|
||||
msgstr "Bolivya İşaret Dili"
|
||||
|
||||
#. name for bvm
|
||||
msgid "Bamunka"
|
||||
@ -4587,7 +4587,7 @@ msgstr "Biri"
|
||||
|
||||
#. name for bzs
|
||||
msgid "Brazilian Sign Language"
|
||||
msgstr ""
|
||||
msgstr "Brezilya İşaret Dili"
|
||||
|
||||
#. name for bzt
|
||||
msgid "Brithenig"
|
||||
@ -5623,11 +5623,11 @@ msgstr ""
|
||||
|
||||
#. name for csf
|
||||
msgid "Cuba Sign Language"
|
||||
msgstr ""
|
||||
msgstr "Küba İşaret Dili"
|
||||
|
||||
#. name for csg
|
||||
msgid "Chilean Sign Language"
|
||||
msgstr ""
|
||||
msgstr "Şili İşaret Dili"
|
||||
|
||||
#. name for csh
|
||||
msgid "Chin; Asho"
|
||||
@ -5651,7 +5651,7 @@ msgstr ""
|
||||
|
||||
#. name for csn
|
||||
msgid "Colombian Sign Language"
|
||||
msgstr ""
|
||||
msgstr "Kolombiya İşaret Dili"
|
||||
|
||||
#. name for cso
|
||||
msgid "Chinantec; Sochiapan"
|
||||
@ -5663,7 +5663,7 @@ msgstr ""
|
||||
|
||||
#. name for csr
|
||||
msgid "Costa Rican Sign Language"
|
||||
msgstr ""
|
||||
msgstr "Kosta Rika İşaret Dili"
|
||||
|
||||
#. name for css
|
||||
msgid "Ohlone; Southern"
|
||||
@ -7347,7 +7347,7 @@ msgstr ""
|
||||
|
||||
#. name for esl
|
||||
msgid "Egypt Sign Language"
|
||||
msgstr ""
|
||||
msgstr "Mısır İşaret Dili"
|
||||
|
||||
#. name for esm
|
||||
msgid "Esuma"
|
||||
@ -7551,7 +7551,7 @@ msgstr ""
|
||||
|
||||
#. name for fcs
|
||||
msgid "Quebec Sign Language"
|
||||
msgstr ""
|
||||
msgstr "Quebec İşaret Dili"
|
||||
|
||||
#. name for fer
|
||||
msgid "Feroge"
|
||||
@ -8806,7 +8806,7 @@ msgstr ""
|
||||
|
||||
#. name for gsm
|
||||
msgid "Guatemalan Sign Language"
|
||||
msgstr ""
|
||||
msgstr "Guatemala İşaret Dili"
|
||||
|
||||
#. name for gsn
|
||||
msgid "Gusan"
|
||||
@ -10895,7 +10895,7 @@ msgstr ""
|
||||
|
||||
#. name for jos
|
||||
msgid "Jordanian Sign Language"
|
||||
msgstr ""
|
||||
msgstr "Ürdün İşaret Dili"
|
||||
|
||||
#. name for jow
|
||||
msgid "Jowulu"
|
||||
@ -13847,7 +13847,7 @@ msgstr ""
|
||||
|
||||
#. name for lbs
|
||||
msgid "Libyan Sign Language"
|
||||
msgstr ""
|
||||
msgstr "Libya İşaret Dili"
|
||||
|
||||
#. name for lbt
|
||||
msgid "Lachi"
|
||||
@ -15591,7 +15591,7 @@ msgstr ""
|
||||
|
||||
#. name for mfs
|
||||
msgid "Mexican Sign Language"
|
||||
msgstr ""
|
||||
msgstr "Meksika İşaret Dili"
|
||||
|
||||
#. name for mft
|
||||
msgid "Mokerang"
|
||||
@ -17055,7 +17055,7 @@ msgstr ""
|
||||
|
||||
#. name for mul
|
||||
msgid "Multiple languages"
|
||||
msgstr ""
|
||||
msgstr "Çoklu diller"
|
||||
|
||||
#. name for mum
|
||||
msgid "Maiwala"
|
||||
@ -17867,7 +17867,7 @@ msgstr ""
|
||||
|
||||
#. name for ncs
|
||||
msgid "Nicaraguan Sign Language"
|
||||
msgstr ""
|
||||
msgstr "Nikaragua İşaret Dili"
|
||||
|
||||
#. name for nct
|
||||
msgid "Naga; Chothe"
|
||||
@ -19495,7 +19495,7 @@ msgstr ""
|
||||
|
||||
#. name for nzs
|
||||
msgid "New Zealand Sign Language"
|
||||
msgstr ""
|
||||
msgstr "Yeni Zelanda İşaret Dili"
|
||||
|
||||
#. name for nzu
|
||||
msgid "Teke-Nzikou"
|
||||
@ -21219,7 +21219,7 @@ msgstr ""
|
||||
|
||||
#. name for prl
|
||||
msgid "Peruvian Sign Language"
|
||||
msgstr ""
|
||||
msgstr "Peru İşaret Dili"
|
||||
|
||||
#. name for prm
|
||||
msgid "Kibiri"
|
||||
@ -22699,7 +22699,7 @@ msgstr ""
|
||||
|
||||
#. name for sdl
|
||||
msgid "Saudi Arabian Sign Language"
|
||||
msgstr ""
|
||||
msgstr "Suudi Arabistan İşaret Dili"
|
||||
|
||||
#. name for sdm
|
||||
msgid "Semandang"
|
||||
@ -22847,7 +22847,7 @@ msgstr ""
|
||||
|
||||
#. name for sfs
|
||||
msgid "South African Sign Language"
|
||||
msgstr ""
|
||||
msgstr "Güney Afrika İşaret Dili"
|
||||
|
||||
#. name for sfw
|
||||
msgid "Sehwi"
|
||||
@ -25943,7 +25943,7 @@ msgstr ""
|
||||
|
||||
#. name for tse
|
||||
msgid "Tunisian Sign Language"
|
||||
msgstr ""
|
||||
msgstr "Tunus İşaret Dili"
|
||||
|
||||
#. name for tsf
|
||||
msgid "Tamang; Southwestern"
|
||||
@ -27348,7 +27348,7 @@ msgstr ""
|
||||
|
||||
#. name for vsl
|
||||
msgid "Venezuelan Sign Language"
|
||||
msgstr ""
|
||||
msgstr "Venezuela İşaret Dili"
|
||||
|
||||
#. name for vsv
|
||||
msgid "Valencian Sign Language"
|
||||
@ -28760,7 +28760,7 @@ msgstr ""
|
||||
|
||||
#. name for xms
|
||||
msgid "Moroccan Sign Language"
|
||||
msgstr ""
|
||||
msgstr "Fas İşaret Dili"
|
||||
|
||||
#. name for xmt
|
||||
msgid "Matbat"
|
||||
@ -29540,7 +29540,7 @@ msgstr ""
|
||||
|
||||
#. name for yid
|
||||
msgid "Yiddish"
|
||||
msgstr "Yiddiş"
|
||||
msgstr "Yidiş"
|
||||
|
||||
#. name for yif
|
||||
msgid "Ache"
|
||||
|
@ -4,7 +4,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
__appname__ = u'calibre'
|
||||
numeric_version = (0, 8, 51)
|
||||
numeric_version = (0, 8, 52)
|
||||
__version__ = u'.'.join(map(unicode, numeric_version))
|
||||
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"
|
||||
|
||||
|
@ -302,7 +302,9 @@ class OutputFormatPlugin(Plugin):
|
||||
|
||||
:param item: The item (HTML file) being processed
|
||||
:param stylizer: A Stylizer object containing the flattened styles for
|
||||
item. You can get the style for any element by stylizer.style(element).
|
||||
item. You can get the style for any element by
|
||||
stylizer.style(element).
|
||||
|
||||
'''
|
||||
pass
|
||||
|
||||
|
@ -202,7 +202,8 @@ class ANDROID(USBMS):
|
||||
'ANDROID_MID', 'P990_SD_CARD', '.K080', 'LTE_CARD', 'MB853',
|
||||
'A1-07___C0541A4F', 'XT912', 'MB855', 'XT910', 'BOOK_A10_CARD',
|
||||
'USB_2.0_DRIVER', 'I9100T', 'P999DW_SD_CARD', 'KTABLET_PC',
|
||||
'FILE-CD_GADGET', 'GT-I9001_CARD', 'USB_2.0_DRIVER', 'XT875']
|
||||
'FILE-CD_GADGET', 'GT-I9001_CARD', 'USB_2.0_DRIVER', 'XT875',
|
||||
'UMS_COMPOSITE']
|
||||
|
||||
OSX_MAIN_MEM = 'Android Device Main Memory'
|
||||
|
||||
|
@ -57,10 +57,11 @@ class PICO(NEWSMY):
|
||||
gui_name = 'Pico'
|
||||
description = _('Communicate with the Pico reader.')
|
||||
|
||||
VENDOR_NAME = ['TECLAST', 'IMAGIN', 'LASER-']
|
||||
VENDOR_NAME = ['TECLAST', 'IMAGIN', 'LASER-', '']
|
||||
WINDOWS_MAIN_MEM = ['USBDISK__USER', 'EB720']
|
||||
EBOOK_DIR_MAIN = 'Books'
|
||||
FORMATS = ['EPUB', 'FB2', 'TXT', 'LRC', 'PDB', 'PDF', 'HTML', 'WTXT']
|
||||
SCAN_FROM_ROOT = True
|
||||
|
||||
class IPAPYRUS(TECLAST_K3):
|
||||
|
||||
|
@ -1,4 +1,25 @@
|
||||
from __future__ import with_statement
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
|
||||
class ConversionUserFeedBack(Exception):
|
||||
|
||||
def __init__(self, title, msg, level='info', det_msg=''):
|
||||
''' Show a simple message to the user
|
||||
|
||||
:param title: The title (very short description)
|
||||
:param msg: The message to show the user
|
||||
:param level: Must be one of 'info', 'warn' or 'error'
|
||||
:param det_msg: Optional detailed message to show the user
|
||||
'''
|
||||
import json
|
||||
Exception.__init__(self, json.dumps({'msg':msg, 'level':level,
|
||||
'det_msg':det_msg, 'title':title}))
|
||||
self.title, self.msg, self.det_msg = title, msg, det_msg
|
||||
self.level = level
|
||||
|
||||
|
@ -15,6 +15,7 @@ from calibre.utils.logging import Log
|
||||
from calibre.constants import preferred_encoding
|
||||
from calibre.customize.conversion import OptionRecommendation
|
||||
from calibre import patheq
|
||||
from calibre.ebooks.conversion import ConversionUserFeedBack
|
||||
|
||||
USAGE = '%prog ' + _('''\
|
||||
input_file output_file [options]
|
||||
@ -320,7 +321,16 @@ def main(args=sys.argv):
|
||||
if n.dest]
|
||||
plumber.merge_ui_recommendations(recommendations)
|
||||
|
||||
try:
|
||||
plumber.run()
|
||||
except ConversionUserFeedBack as e:
|
||||
ll = {'info': log.info, 'warn': log.warn,
|
||||
'error':log.error}.get(e.level, log.info)
|
||||
ll(e.title)
|
||||
if e.det_msg:
|
||||
log.debug(e.detmsg)
|
||||
ll(e.msg)
|
||||
raise SystemExit(1)
|
||||
|
||||
log(_('Output saved to'), ' ', plumber.output)
|
||||
|
||||
|
@ -207,7 +207,7 @@ class EPUBInput(InputFormatPlugin):
|
||||
if rc:
|
||||
cover_toc_item = None
|
||||
for item in oeb.toc.iterdescendants():
|
||||
if item.href == rc:
|
||||
if item.href and item.href.partition('#')[0] == rc:
|
||||
cover_toc_item = item
|
||||
break
|
||||
spine = {x.href for x in oeb.spine}
|
||||
|
@ -393,8 +393,14 @@ class EPUBOutput(OutputFormatPlugin):
|
||||
for tag in XPath('//h:body/descendant::h:script')(root):
|
||||
tag.getparent().remove(tag)
|
||||
|
||||
formchildren = XPath('./h:input|./h:button|./h:textarea|'
|
||||
'./h:label|./h:fieldset|./h:legend')
|
||||
for tag in XPath('//h:form')(root):
|
||||
if formchildren(tag):
|
||||
tag.getparent().remove(tag)
|
||||
else:
|
||||
# Not a real form
|
||||
tag.tag = XHTML('div')
|
||||
|
||||
for tag in XPath('//h:center')(root):
|
||||
tag.tag = XHTML('div')
|
||||
|
@ -343,21 +343,25 @@ OptionRecommendation(name='remove_fake_margins',
|
||||
OptionRecommendation(name='margin_top',
|
||||
recommended_value=5.0, level=OptionRecommendation.LOW,
|
||||
help=_('Set the top margin in pts. Default is %default. '
|
||||
'Setting this to less than zero will cause no margin to be set. '
|
||||
'Note: 72 pts equals 1 inch')),
|
||||
|
||||
OptionRecommendation(name='margin_bottom',
|
||||
recommended_value=5.0, level=OptionRecommendation.LOW,
|
||||
help=_('Set the bottom margin in pts. Default is %default. '
|
||||
'Setting this to less than zero will cause no margin to be set. '
|
||||
'Note: 72 pts equals 1 inch')),
|
||||
|
||||
OptionRecommendation(name='margin_left',
|
||||
recommended_value=5.0, level=OptionRecommendation.LOW,
|
||||
help=_('Set the left margin in pts. Default is %default. '
|
||||
'Setting this to less than zero will cause no margin to be set. '
|
||||
'Note: 72 pts equals 1 inch')),
|
||||
|
||||
OptionRecommendation(name='margin_right',
|
||||
recommended_value=5.0, level=OptionRecommendation.LOW,
|
||||
help=_('Set the right margin in pts. Default is %default. '
|
||||
'Setting this to less than zero will cause no margin to be set. '
|
||||
'Note: 72 pts equals 1 inch')),
|
||||
|
||||
OptionRecommendation(name='change_justification',
|
||||
@ -885,7 +889,10 @@ OptionRecommendation(name='search_replace',
|
||||
self.log.debug('Resolved conversion options')
|
||||
try:
|
||||
self.log.debug('calibre version:', __version__)
|
||||
self.log.debug(pprint.pformat(self.opts.__dict__))
|
||||
odict = dict(self.opts.__dict__)
|
||||
for x in ('username', 'password'):
|
||||
odict.pop(x, None)
|
||||
self.log.debug(pprint.pformat(odict))
|
||||
except:
|
||||
self.log.exception('Failed to get resolved conversion options')
|
||||
|
||||
|
@ -18,7 +18,7 @@ from calibre.ebooks.metadata import check_isbn
|
||||
from calibre.ebooks.metadata.sources.base import (Source, Option, fixcase,
|
||||
fixauthors)
|
||||
from calibre.ebooks.metadata.book.base import Metadata
|
||||
from calibre.utils.date import parse_date
|
||||
from calibre.utils.date import parse_only_date
|
||||
from calibre.utils.localization import canonicalize_lang
|
||||
|
||||
class Worker(Thread): # Get details {{{
|
||||
@ -471,7 +471,7 @@ class Worker(Thread): # Get details {{{
|
||||
ans = x.tail
|
||||
date = ans.rpartition('(')[-1].replace(')', '').strip()
|
||||
date = self.delocalize_datestr(date)
|
||||
return parse_date(date, assume_utc=True)
|
||||
return parse_only_date(date, assume_utc=True)
|
||||
|
||||
def parse_language(self, pd):
|
||||
for x in reversed(pd.xpath(self.language_xpath)):
|
||||
|
@ -189,7 +189,7 @@ class MOBIFile(object):
|
||||
def read_tbs(self):
|
||||
from calibre.ebooks.mobi.writer8.tbs import (Entry, DOC,
|
||||
collect_indexing_data, encode_strands_as_sequences,
|
||||
sequences_to_bytes)
|
||||
sequences_to_bytes, calculate_all_tbs, NegativeStrandIndex)
|
||||
entry_map = []
|
||||
for index in self.ncx_index:
|
||||
vals = list(index)[:-1] + [None, None, None, None]
|
||||
@ -206,6 +206,14 @@ class MOBIFile(object):
|
||||
the start of the text record.
|
||||
|
||||
''')]
|
||||
|
||||
tbs_type = 8
|
||||
try:
|
||||
calculate_all_tbs(indexing_data)
|
||||
except NegativeStrandIndex:
|
||||
calculate_all_tbs(indexing_data, tbs_type=5)
|
||||
tbs_type = 5
|
||||
|
||||
for i, strands in enumerate(indexing_data):
|
||||
rec = self.text_records[i]
|
||||
tbs_bytes = rec.trailing_data.get('indexing', b'')
|
||||
@ -236,8 +244,12 @@ class MOBIFile(object):
|
||||
desc.append('Sequence #%d: %r %r'%(j, seq[0], seq[1]))
|
||||
if tbs_bytes:
|
||||
desc.append('Remaining bytes: %s'%format_bytes(tbs_bytes))
|
||||
calculated_sequences = encode_strands_as_sequences(strands)
|
||||
calculated_sequences = encode_strands_as_sequences(strands,
|
||||
tbs_type=tbs_type)
|
||||
try:
|
||||
calculated_bytes = sequences_to_bytes(calculated_sequences)
|
||||
except:
|
||||
calculated_bytes = b'failed to calculate tbs bytes'
|
||||
if calculated_bytes != otbs:
|
||||
print ('WARNING: TBS mismatch for record %d'%i)
|
||||
desc.append('WARNING: TBS mismatch!')
|
||||
|
@ -71,6 +71,14 @@ def explode(path, dest, question=lambda x:True):
|
||||
return fork_job('calibre.ebooks.mobi.tweak', 'do_explode', args=(path,
|
||||
dest), no_output=True)['result']
|
||||
|
||||
def set_cover(oeb):
|
||||
if 'cover' not in oeb.guide or oeb.metadata['cover']: return
|
||||
cover = oeb.guide['cover']
|
||||
if cover.href in oeb.manifest.hrefs:
|
||||
item = oeb.manifest.hrefs[cover.href]
|
||||
oeb.metadata.clear('cover')
|
||||
oeb.metadata.add('cover', item.id)
|
||||
|
||||
def do_rebuild(opf, dest_path):
|
||||
plumber = Plumber(opf, dest_path, default_log)
|
||||
plumber.setup_options()
|
||||
@ -79,6 +87,7 @@ def do_rebuild(opf, dest_path):
|
||||
|
||||
plumber.opts.mobi_passthrough = True
|
||||
oeb = create_oebbook(default_log, opf, plumber.opts)
|
||||
set_cover(oeb)
|
||||
outp.convert(oeb, dest_path, inp, plumber.opts, default_log)
|
||||
|
||||
def rebuild(src_dir, dest_path):
|
||||
|
@ -56,7 +56,7 @@ def build_exth(metadata, prefer_author_sort=False, is_periodical=False,
|
||||
items][:1]
|
||||
else:
|
||||
creators = [unicode(c) for c in items]
|
||||
items = ['; '.join(creators)]
|
||||
items = creators
|
||||
for item in items:
|
||||
data = unicode(item)
|
||||
if term != 'description':
|
||||
|
@ -106,6 +106,9 @@ def collect_indexing_data(entries, text_record_lengths):
|
||||
|
||||
return data
|
||||
|
||||
class NegativeStrandIndex(Exception):
|
||||
pass
|
||||
|
||||
def encode_strands_as_sequences(strands, tbs_type=8):
|
||||
''' Encode the list of strands for a single text record into a list of
|
||||
sequences, ready to be converted into TBS bytes. '''
|
||||
@ -144,10 +147,16 @@ def encode_strands_as_sequences(strands, tbs_type=8):
|
||||
index = entries[0].index - (entries[0].parent or 0)
|
||||
if ans and not strand_seqs:
|
||||
# We are in the second or later strands, so we need to use a
|
||||
# special flag and index value. The index value if the entry
|
||||
# special flag and index value. The index value is the entry
|
||||
# index - the index of the last entry in the previous strand.
|
||||
extra[0b1000] = True
|
||||
index = last_index - entries[0].index
|
||||
if index < 0:
|
||||
if tbs_type == 5:
|
||||
index = -index
|
||||
else:
|
||||
raise NegativeStrandIndex()
|
||||
else:
|
||||
extra[0b1000] = True
|
||||
last_index = entries[-1].index
|
||||
strand_seqs.append((index, extra))
|
||||
|
||||
@ -167,20 +176,31 @@ def sequences_to_bytes(sequences):
|
||||
flag_size = 3
|
||||
for val, extra in sequences:
|
||||
ans.append(encode_tbs(val, extra, flag_size))
|
||||
flag_size = 4 # only the first seuqence has flag size 3 as all
|
||||
flag_size = 4 # only the first sequence has flag size 3 as all
|
||||
# subsequent sequences could need the 0b1000 flag
|
||||
return b''.join(ans)
|
||||
|
||||
def calculate_all_tbs(indexing_data, tbs_type=8):
|
||||
rmap = {}
|
||||
for i, strands in enumerate(indexing_data):
|
||||
sequences = encode_strands_as_sequences(strands, tbs_type=tbs_type)
|
||||
tbs_bytes = sequences_to_bytes(sequences)
|
||||
rmap[i+1] = tbs_bytes
|
||||
return rmap
|
||||
|
||||
def apply_trailing_byte_sequences(index_table, records, text_record_lengths):
|
||||
entries = tuple(Entry(r['index'], r['offset'], r['length'], r['depth'],
|
||||
r.get('parent', None), r.get('first_child', None), r.get('last_child',
|
||||
None), r['label'], None, None, None, None) for r in index_table)
|
||||
|
||||
indexing_data = collect_indexing_data(entries, text_record_lengths)
|
||||
for i, strands in enumerate(indexing_data):
|
||||
sequences = encode_strands_as_sequences(strands)
|
||||
tbs_bytes = sequences_to_bytes(sequences)
|
||||
records[i+1] += encode_trailing_data(tbs_bytes)
|
||||
try:
|
||||
rmap = calculate_all_tbs(indexing_data)
|
||||
except NegativeStrandIndex:
|
||||
rmap = calculate_all_tbs(indexing_data, tbs_type=5)
|
||||
|
||||
for i, tbs_bytes in rmap.iteritems():
|
||||
records[i] += encode_trailing_data(tbs_bytes)
|
||||
|
||||
return True
|
||||
|
||||
|
@ -966,7 +966,7 @@ class Manifest(object):
|
||||
data = data.cssText
|
||||
if isinstance(data, unicode):
|
||||
data = data.encode('utf-8')
|
||||
return data
|
||||
return data + b'\n'
|
||||
return str(data)
|
||||
|
||||
def __unicode__(self):
|
||||
|
@ -62,7 +62,10 @@ class IndexEntry(object):
|
||||
self.text = toc_entry.text or _('Unknown')
|
||||
self.key = toc_entry.abspath
|
||||
self.anchor = self.start_anchor = toc_entry.fragment or None
|
||||
try:
|
||||
self.spine_pos = spine.index(self.key)
|
||||
except ValueError:
|
||||
self.spine_pos = -1
|
||||
self.anchor_pos = 0
|
||||
if self.spine_pos > -1:
|
||||
self.anchor_pos = spine[self.spine_pos].anchor_map.get(self.anchor,
|
||||
|
@ -157,9 +157,11 @@ class CSSFlattener(object):
|
||||
bs = body.get('style', '').split(';')
|
||||
bs.append('margin-top: 0pt')
|
||||
bs.append('margin-bottom: 0pt')
|
||||
bs.append('margin-left : %fpt'%\
|
||||
if float(self.context.margin_left) >= 0:
|
||||
bs.append('margin-left : %gpt'%\
|
||||
float(self.context.margin_left))
|
||||
bs.append('margin-right : %fpt'%\
|
||||
if float(self.context.margin_right) >= 0:
|
||||
bs.append('margin-right : %gpt'%\
|
||||
float(self.context.margin_right))
|
||||
bs.extend(['padding-left: 0pt', 'padding-right: 0pt'])
|
||||
if self.page_break_on_body:
|
||||
@ -393,6 +395,7 @@ class CSSFlattener(object):
|
||||
l = etree.SubElement(head, XHTML('link'),
|
||||
rel='stylesheet', type=CSS_MIME, href=href)
|
||||
l.tail='\n'
|
||||
if global_href:
|
||||
href = item.relhref(global_href)
|
||||
l = etree.SubElement(head, XHTML('link'),
|
||||
rel='stylesheet', type=CSS_MIME, href=href)
|
||||
@ -413,14 +416,16 @@ class CSSFlattener(object):
|
||||
global_css = defaultdict(list)
|
||||
for item in self.oeb.spine:
|
||||
stylizer = self.stylizers[item]
|
||||
if float(self.context.margin_top) >= 0:
|
||||
stylizer.page_rule['margin-top'] = '%gpt'%\
|
||||
float(self.context.margin_top)
|
||||
if float(self.context.margin_bottom) >= 0:
|
||||
stylizer.page_rule['margin-bottom'] = '%gpt'%\
|
||||
float(self.context.margin_bottom)
|
||||
items = stylizer.page_rule.items()
|
||||
items.sort()
|
||||
css = ';\n'.join("%s: %s" % (key, val) for key, val in items)
|
||||
css = '@page {\n%s\n}\n'%css
|
||||
css = ('@page {\n%s\n}\n'%css) if items else ''
|
||||
rules = [r.cssText for r in stylizer.font_face_rules]
|
||||
raw = '\n\n'.join(rules)
|
||||
css += '\n\n' + raw
|
||||
@ -429,6 +434,8 @@ class CSSFlattener(object):
|
||||
gc_map = {}
|
||||
manifest = self.oeb.manifest
|
||||
for css in global_css:
|
||||
href = None
|
||||
if css.strip():
|
||||
id_, href = manifest.generate('page_css', 'page_styles.css')
|
||||
manifest.add(id_, href, CSS_MIME, data=cssutils.parseString(css,
|
||||
validate=False))
|
||||
|
@ -6,7 +6,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os
|
||||
import os, re
|
||||
from calibre.utils.date import isoformat, now
|
||||
from calibre import guess_type
|
||||
|
||||
@ -141,7 +141,7 @@ class MergeMetadata(object):
|
||||
item = self.oeb.manifest.hrefs[old_cover.href]
|
||||
if not cdata:
|
||||
return item.id
|
||||
self.oeb.manifest.remove(item)
|
||||
self.remove_old_cover(item)
|
||||
elif not cdata:
|
||||
id = self.oeb.manifest.generate(id='cover')
|
||||
self.oeb.manifest.add(id, old_cover.href, 'image/jpeg')
|
||||
@ -152,3 +152,41 @@ class MergeMetadata(object):
|
||||
self.oeb.guide.add('cover', 'Cover', href)
|
||||
return id
|
||||
|
||||
def remove_old_cover(self, cover_item):
|
||||
from calibre.ebooks.oeb.base import XPath
|
||||
from lxml import etree
|
||||
|
||||
self.oeb.manifest.remove(cover_item)
|
||||
|
||||
# Remove any references to the cover in the HTML
|
||||
affected_items = set()
|
||||
for item in self.oeb.spine:
|
||||
try:
|
||||
images = XPath('//h:img[@src]')(item.data)
|
||||
except:
|
||||
images = []
|
||||
removed = False
|
||||
for img in images:
|
||||
href = item.abshref(img.get('src'))
|
||||
if href == cover_item.href:
|
||||
img.getparent().remove(img)
|
||||
removed = True
|
||||
if removed:
|
||||
affected_items.add(item)
|
||||
|
||||
# Check if the resulting HTML has no content, if so remove it
|
||||
for item in affected_items:
|
||||
body = XPath('//h:body')(item.data)
|
||||
if body:
|
||||
text = etree.tostring(body[0], method='text', encoding=unicode)
|
||||
else:
|
||||
text = ''
|
||||
text = re.sub(r'\s+', '', text)
|
||||
if not text and not XPath('//h:img|//svg:svg')(item.data):
|
||||
self.log('Removing %s as it is a wrapper around'
|
||||
' the cover image'%item.href)
|
||||
self.oeb.spine.remove(item)
|
||||
self.oeb.manifest.remove(item)
|
||||
|
||||
|
||||
|
||||
|
@ -372,8 +372,8 @@ class ParseRtf:
|
||||
old_rtf = old_rtf_obj.check_if_old_rtf()
|
||||
if old_rtf:
|
||||
if self.__run_level > 5:
|
||||
msg = 'Older RTF\n'
|
||||
msg += 'self.__run_level is "%s"\n' % self.__run_level
|
||||
msg = 'Older RTF\n' \
|
||||
'self.__run_level is "%s"\n' % self.__run_level
|
||||
raise RtfInvalidCodeException, msg
|
||||
if self.__run_level > 1:
|
||||
sys.stderr.write('File could be older RTF...\n')
|
||||
@ -381,7 +381,7 @@ class ParseRtf:
|
||||
if self.__run_level > 1:
|
||||
sys.stderr.write(
|
||||
'File also has newer RTF.\n'
|
||||
'Will do the best to convert.\n'
|
||||
'Will do the best to convert...\n'
|
||||
)
|
||||
add_brackets_obj = add_brackets.AddBrackets(
|
||||
in_file = self.__temp_file,
|
||||
|
@ -20,6 +20,9 @@ class AddBrackets:
|
||||
"""
|
||||
Add brackets for old RTF.
|
||||
Logic:
|
||||
When control words without their own brackets are encountered
|
||||
and in the list of allowed words, this will add brackets
|
||||
to facilitate the treatment of the file
|
||||
"""
|
||||
def __init__(self, in_file,
|
||||
bug_handler,
|
||||
@ -41,26 +44,17 @@ class AddBrackets:
|
||||
self.__copy = copy
|
||||
self.__write_to = better_mktemp()
|
||||
self.__run_level = run_level
|
||||
|
||||
def __initiate_values(self):
|
||||
"""
|
||||
"""
|
||||
self.__state_dict = {
|
||||
'before_body' : self.__before_body_func,
|
||||
'in_body' : self.__in_body_func,
|
||||
'after_control_word' : self.__after_control_word_func,
|
||||
'in_ignore' : self.__ignore_func,
|
||||
}
|
||||
self.__state = 'before_body'
|
||||
self.__inline = {}
|
||||
self.__temp_group = []
|
||||
self.__open_bracket = 0
|
||||
self.__found_brackets = 0
|
||||
self.__accept = [
|
||||
'cw<ci<bold______',
|
||||
'cw<ci<bold______' ,
|
||||
'cw<ci<annotation' ,
|
||||
'cw<ci<blue______' ,
|
||||
'cw<ci<bold______' ,
|
||||
# 'cw<ci<bold______' ,
|
||||
'cw<ci<caps______' ,
|
||||
'cw<ci<char-style' ,
|
||||
'cw<ci<dbl-strike' ,
|
||||
@ -70,24 +64,36 @@ class AddBrackets:
|
||||
'cw<ci<font-down_' ,
|
||||
'cw<ci<font-size_' ,
|
||||
'cw<ci<font-style' ,
|
||||
'cw<ci<font-up___',
|
||||
'cw<ci<footnot-mk',
|
||||
'cw<ci<font-up___' ,
|
||||
'cw<ci<footnot-mk' ,
|
||||
'cw<ci<green_____' ,
|
||||
'cw<ci<hidden____',
|
||||
'cw<ci<hidden____' ,
|
||||
'cw<ci<italics___' ,
|
||||
'cw<ci<outline___',
|
||||
'cw<ci<outline___' ,
|
||||
'cw<ci<red_______' ,
|
||||
'cw<ci<shadow____',
|
||||
'cw<ci<shadow____' ,
|
||||
'cw<ci<small-caps' ,
|
||||
'cw<ci<strike-thr',
|
||||
'cw<ci<strike-thr' ,
|
||||
'cw<ci<subscript_' ,
|
||||
'cw<ci<superscrip',
|
||||
'cw<ci<superscrip' ,
|
||||
'cw<ci<underlined' ,
|
||||
# 'cw<ul<underlined' ,
|
||||
]
|
||||
|
||||
def __initiate_values(self):
|
||||
"""
|
||||
Init temp values
|
||||
"""
|
||||
self.__state = 'before_body'
|
||||
self.__inline = {}
|
||||
self.__temp_group = []
|
||||
self.__open_bracket = False
|
||||
self.__found_brackets = False
|
||||
|
||||
|
||||
def __before_body_func(self, line):
|
||||
"""
|
||||
If we are before the body, not interest in changing anything
|
||||
"""
|
||||
if self.__token_info == 'mi<mk<body-open_':
|
||||
self.__state = 'in_body'
|
||||
@ -95,6 +101,14 @@ class AddBrackets:
|
||||
|
||||
def __in_body_func(self, line):
|
||||
"""
|
||||
Select what action to take in body:
|
||||
1-At the end of the file close the braket if a bracket was opened
|
||||
This happens if there is achange
|
||||
2-If an open bracket is found the code inside is ignore
|
||||
(written without modifications)
|
||||
3-If an accepted control word is found put the line
|
||||
in a buffer then chage state to after cw
|
||||
4-Else simply write the line
|
||||
"""
|
||||
if line == 'cb<nu<clos-brack<0001\n' and self.__open_bracket:
|
||||
self.__write_obj.write(
|
||||
@ -102,7 +116,7 @@ class AddBrackets:
|
||||
)
|
||||
self.__write_obj.write(line)
|
||||
elif self.__token_info == 'ob<nu<open-brack':
|
||||
self.__found_brackets = 1
|
||||
self.__found_brackets = True
|
||||
self.__state = 'in_ignore'
|
||||
self.__ignore_count = self.__ob_count
|
||||
self.__write_obj.write(line)
|
||||
@ -114,6 +128,10 @@ class AddBrackets:
|
||||
|
||||
def __after_control_word_func(self, line):
|
||||
"""
|
||||
After a cw either add next allowed cw to temporary list or
|
||||
change groupe and write it.
|
||||
If the token leading to an exit is an open bracket go to
|
||||
ignore otherwise goto in body
|
||||
"""
|
||||
if self.__token_info in self.__accept:
|
||||
self.__temp_group.append(line)
|
||||
@ -129,75 +147,77 @@ class AddBrackets:
|
||||
|
||||
def __write_group(self):
|
||||
"""
|
||||
Write a tempory group after accepted control words end
|
||||
But this is mostly useless in my opinion as there is no list of rejected cw
|
||||
This may be a way to implement future old rtf processing for cw
|
||||
Utility: open a group to just put brackets but why be so complicated?
|
||||
Scheme: open brackets, write cw then go to body and back with cw after
|
||||
"""
|
||||
if self.__open_bracket:
|
||||
self.__write_obj.write(
|
||||
'cb<nu<clos-brack<0003\n'
|
||||
)
|
||||
self.__open_bracket = 0
|
||||
inline_string = ''
|
||||
the_keys = self.__inline.keys()
|
||||
for the_key in the_keys:
|
||||
value = self.__inline[the_key]
|
||||
if value != 'false':
|
||||
inline_string += '%s<nu<%s\n' % (the_key, value)
|
||||
self.__open_bracket = False
|
||||
|
||||
inline_string = ''.join(['%s<nu<%s\n' % (k, v) \
|
||||
for k, v in self.__inline.iteritems() \
|
||||
if v != 'false'])
|
||||
if inline_string:
|
||||
self.__write_obj.write('ob<nu<open-brack<0003\n')
|
||||
self.__write_obj.write(inline_string)
|
||||
self.__open_bracket = 1
|
||||
self.__write_obj.write('ob<nu<open-brack<0003\n'
|
||||
'%s' % inline_string)
|
||||
self.__open_bracket = True
|
||||
self.__temp_group = []
|
||||
|
||||
def __change_permanent_group(self):
|
||||
"""
|
||||
use temp group to change permanent group
|
||||
Use temp group to change permanent group
|
||||
If the control word is not accepted remove it
|
||||
What is the interest as it is build to accept only accepted cw
|
||||
in __after_control_word_func?
|
||||
"""
|
||||
for line in self.__temp_group:
|
||||
token_info = line[:16]
|
||||
if token_info in self.__accept:
|
||||
att = line[20:-1]
|
||||
self.__inline[token_info] = att
|
||||
self.__inline = {line[:16] : line[20:-1]\
|
||||
for line in self.__temp_group\
|
||||
# Is this really necessary?
|
||||
if line[:16] in self.__accept}
|
||||
|
||||
|
||||
def __ignore_func(self, line):
|
||||
"""
|
||||
Don't add any brackets while inside of brackets RTF has already
|
||||
added.
|
||||
Just copy data inside of RTF brackets already here.
|
||||
"""
|
||||
self.__write_obj.write(line)
|
||||
if self.__token_info == 'cb<nu<clos-brack'and\
|
||||
self.__cb_count == self.__ignore_count:
|
||||
if self.__token_info == 'cb<nu<clos-brack'\
|
||||
and self.__cb_count == self.__ignore_count:
|
||||
self.__state = 'in_body'
|
||||
|
||||
def __check_brackets(self, in_file):
|
||||
self.__check_brack_obj = check_brackets.CheckBrackets\
|
||||
"""
|
||||
Return True if brackets match
|
||||
"""
|
||||
check_brack_obj = check_brackets.CheckBrackets\
|
||||
(file = in_file)
|
||||
good_br = self.__check_brack_obj.check_brackets()[0]
|
||||
if not good_br:
|
||||
return 1
|
||||
return check_brack_obj.check_brackets()[0]
|
||||
|
||||
def add_brackets(self):
|
||||
"""
|
||||
"""
|
||||
self.__initiate_values()
|
||||
read_obj = open(self.__file, 'r')
|
||||
self.__write_obj = open(self.__write_to, 'w')
|
||||
line_to_read = 1
|
||||
while line_to_read:
|
||||
line_to_read = read_obj.readline()
|
||||
line = line_to_read
|
||||
with open(self.__file, 'r') as read_obj:
|
||||
with open(self.__write_to, 'w') as self.__write_obj:
|
||||
for line in read_obj:
|
||||
self.__token_info = line[:16]
|
||||
if self.__token_info == 'ob<nu<open-brack':
|
||||
self.__ob_count = line[-5:-1]
|
||||
if self.__token_info == 'cb<nu<clos-brack':
|
||||
self.__cb_count = line[-5:-1]
|
||||
action = self.__state_dict.get(self.__state)
|
||||
if action == None:
|
||||
sys.stderr.write('No matching state in module add_brackets.py\n')
|
||||
sys.stderr.write(self.__state + '\n')
|
||||
if action is None:
|
||||
sys.stderr.write(
|
||||
'No matching state in module add_brackets.py\n'
|
||||
'%s\n' % self.__state)
|
||||
action(line)
|
||||
read_obj.close()
|
||||
self.__write_obj.close()
|
||||
bad_brackets = self.__check_brackets(self.__write_to)
|
||||
if not bad_brackets:
|
||||
#Check bad brackets
|
||||
if self.__check_brackets(self.__write_to):
|
||||
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
||||
if self.__copy:
|
||||
copy_obj.copy_file(self.__write_to, "add_brackets.data")
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,4 +1,5 @@
|
||||
import os, sys
|
||||
from codecs import EncodedFile
|
||||
|
||||
from calibre.ebooks.rtf2xml import copy, check_encoding
|
||||
from calibre.ptempfile import better_mktemp
|
||||
@ -41,6 +42,7 @@ class ConvertToTags:
|
||||
self.__run_level = run_level
|
||||
self.__write_to = better_mktemp()
|
||||
self.__convert_utf = False
|
||||
self.__bad_encoding = False
|
||||
|
||||
def __initiate_values(self):
|
||||
"""
|
||||
@ -213,13 +215,14 @@ class ConvertToTags:
|
||||
|
||||
if not check_encoding_obj.check_encoding(self.__file, verbose=False):
|
||||
self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
|
||||
elif not check_encoding_obj.check_encoding(self.__file, self.__encoding):
|
||||
elif not check_encoding_obj.check_encoding(self.__file, self.__encoding, verbose=False):
|
||||
self.__write_obj.write('<?xml version="1.0" encoding="UTF-8" ?>')
|
||||
self.__convert_utf = True
|
||||
else:
|
||||
self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
|
||||
sys.stderr.write('Bad RTF encoding, revert to US-ASCII chars and'
|
||||
' hope for the best')
|
||||
self.__bad_encoding = True
|
||||
self.__new_line = 0
|
||||
self.__write_new_line()
|
||||
if self.__no_dtd:
|
||||
@ -247,7 +250,7 @@ class ConvertToTags:
|
||||
the appropriate function.
|
||||
The functions that are called:
|
||||
a text function for text
|
||||
an open funciton for open tags
|
||||
an open function for open tags
|
||||
an open with attribute function for tags with attributes
|
||||
an empty with attribute function for tags that are empty but have
|
||||
attribtes.
|
||||
@ -263,20 +266,19 @@ class ConvertToTags:
|
||||
action = self.__state_dict.get(self.__token_info)
|
||||
if action is not None:
|
||||
action(line)
|
||||
self.__write_obj.close()
|
||||
#convert all encodings to UTF8 to avoid unsupported encodings in lxml
|
||||
if self.__convert_utf:
|
||||
#convert all encodings to UTF8 or ASCII to avoid unsupported encodings in lxml
|
||||
if self.__convert_utf or self.__bad_encoding:
|
||||
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
||||
copy_obj.rename(self.__write_to, self.__file)
|
||||
file_encoding = "utf-8"
|
||||
if self.__bad_encoding:
|
||||
file_encoding = "us-ascii"
|
||||
with open(self.__file, 'r') as read_obj:
|
||||
with open(self.__write_to, 'w') as write_obj:
|
||||
file = read_obj.read()
|
||||
try:
|
||||
file = file.decode(self.__encoding)
|
||||
write_obj.write(file.encode('utf-8'))
|
||||
except:
|
||||
sys.stderr.write('Conversion to UTF-8 is not possible,'
|
||||
' encoding should be very carefully checked')
|
||||
write_objenc = EncodedFile(write_obj, self.__encoding,
|
||||
file_encoding, 'replace')
|
||||
for line in read_obj:
|
||||
write_objenc.write(line)
|
||||
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
||||
if self.__copy:
|
||||
copy_obj.copy_file(self.__write_to, "convert_to_tags.data")
|
||||
|
@ -11,6 +11,7 @@
|
||||
# #
|
||||
#########################################################################
|
||||
import sys, os
|
||||
|
||||
from calibre.ebooks.rtf2xml import copy
|
||||
from calibre.ptempfile import better_mktemp
|
||||
|
||||
@ -31,29 +32,29 @@ class Header:
|
||||
self.__bug_handler = bug_handler
|
||||
self.__copy = copy
|
||||
self.__write_to = better_mktemp()
|
||||
self.__found_a_header = 0
|
||||
self.__found_a_header = False
|
||||
|
||||
def __in_header_func(self, line):
|
||||
"""
|
||||
Handle all tokens that are part of header
|
||||
"""
|
||||
if self.__cb_count == self.__header_bracket_count:
|
||||
self.__in_header = 0
|
||||
self.__in_header = False
|
||||
self.__write_obj.write(line)
|
||||
self.__write_to_head_obj.write(
|
||||
'mi<mk<head___clo\n')
|
||||
self.__write_to_head_obj.write(
|
||||
'mi<tg<close_____<header-or-footer\n')
|
||||
self.__write_to_head_obj.write(
|
||||
'mi<mk<head___clo\n' \
|
||||
'mi<tg<close_____<header-or-footer\n' \
|
||||
'mi<mk<header-clo\n')
|
||||
else:
|
||||
self.__write_to_head_obj.write(line)
|
||||
|
||||
def __found_header(self, line):
|
||||
"""
|
||||
Found a header
|
||||
"""
|
||||
# but this could be header or footer
|
||||
self.__found_a_header = 1
|
||||
self.__in_header = 1
|
||||
self.__found_a_header = True
|
||||
self.__in_header = True
|
||||
self.__header_count += 1
|
||||
# temporarily set this to zero so I can enter loop
|
||||
self.__cb_count = 0
|
||||
@ -69,18 +70,23 @@ class Header:
|
||||
'mi<tg<open-att__<header-or-footer<type>%s\n' % (type)
|
||||
)
|
||||
else:
|
||||
sys.stderr.write('module is header\n')
|
||||
sys.stderr.write('method is __found_header\n')
|
||||
sys.stderr.write('no dict entry\n')
|
||||
sys.stderr.write('line is %s' % line)
|
||||
sys.stderr.write(
|
||||
'module is header\n' \
|
||||
'method is __found_header\n' \
|
||||
'no dict entry\n' \
|
||||
'line is %s' % line)
|
||||
self.__write_to_head_obj.write(
|
||||
'mi<tg<open-att__<header-or-footer<type>none\n'
|
||||
)
|
||||
|
||||
def __default_sep(self, line):
|
||||
"""Handle all tokens that are not header tokens"""
|
||||
"""
|
||||
Handle all tokens that are not header tokens
|
||||
"""
|
||||
if self.__token_info[3:5] == 'hf':
|
||||
self.__found_header(line)
|
||||
self.__write_obj.write(line)
|
||||
|
||||
def __initiate_sep_values(self):
|
||||
"""
|
||||
initiate counters for separate_footnotes method.
|
||||
@ -89,7 +95,7 @@ class Header:
|
||||
self.__ob_count = 0
|
||||
self.__cb_count = 0
|
||||
self.__header_bracket_count = 0
|
||||
self.__in_header = 0
|
||||
self.__in_header = False
|
||||
self.__header_count = 0
|
||||
self.__head_dict = {
|
||||
'head-left_' : ('header-left'),
|
||||
@ -101,6 +107,7 @@ class Header:
|
||||
'header____' : ('header' ),
|
||||
'footer____' : ('footer' ),
|
||||
}
|
||||
|
||||
def separate_headers(self):
|
||||
"""
|
||||
Separate all the footnotes in an RTF file and put them at the bottom,
|
||||
@ -110,14 +117,11 @@ class Header:
|
||||
bottom of the main file.
|
||||
"""
|
||||
self.__initiate_sep_values()
|
||||
read_obj = open(self.__file)
|
||||
self.__write_obj = open(self.__write_to, 'w')
|
||||
self.__header_holder = better_mktemp()
|
||||
self.__write_to_head_obj = open(self.__header_holder, 'w')
|
||||
line_to_read = 1
|
||||
while line_to_read:
|
||||
line_to_read = read_obj.readline()
|
||||
line = line_to_read
|
||||
with open(self.__file) as read_obj:
|
||||
with open(self.__write_to, 'w') as self.__write_obj:
|
||||
with open(self.__header_holder, 'w') as self.__write_to_head_obj:
|
||||
for line in read_obj:
|
||||
self.__token_info = line[:16]
|
||||
# keep track of opening and closing brackets
|
||||
if self.__token_info == 'ob<nu<open-brack':
|
||||
@ -130,33 +134,30 @@ class Header:
|
||||
# not in the middle of footnote text
|
||||
else:
|
||||
self.__default_sep(line)
|
||||
self.__write_obj.close()
|
||||
read_obj.close()
|
||||
self.__write_to_head_obj.close()
|
||||
read_obj = open(self.__header_holder, 'r')
|
||||
write_obj = open(self.__write_to, 'a')
|
||||
|
||||
with open(self.__header_holder, 'r') as read_obj:
|
||||
with open(self.__write_to, 'a') as write_obj:
|
||||
write_obj.write(
|
||||
'mi<mk<header-beg\n')
|
||||
line = 1
|
||||
while line:
|
||||
line = read_obj.readline()
|
||||
for line in read_obj:
|
||||
write_obj.write(line)
|
||||
write_obj.write(
|
||||
'mi<mk<header-end\n')
|
||||
read_obj.close()
|
||||
write_obj.close()
|
||||
os.remove(self.__header_holder)
|
||||
|
||||
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
||||
if self.__copy:
|
||||
copy_obj.copy_file(self.__write_to, "header_separate.info")
|
||||
copy_obj.copy_file(self.__write_to, "header_separate.data")
|
||||
copy_obj.rename(self.__write_to, self.__file)
|
||||
os.remove(self.__write_to)
|
||||
|
||||
def update_info(self, file, copy):
|
||||
"""
|
||||
Unused method
|
||||
"""
|
||||
self.__file = file
|
||||
self.__copy = copy
|
||||
|
||||
def __get_head_body_func(self, line):
|
||||
"""
|
||||
Process lines in main body and look for beginning of headers.
|
||||
@ -166,6 +167,7 @@ class Header:
|
||||
self.__state = 'head'
|
||||
else:
|
||||
self.__write_obj.write(line)
|
||||
|
||||
def __get_head_head_func(self, line):
|
||||
"""
|
||||
Copy headers and footers from bottom of file to a separate, temporary file.
|
||||
@ -174,6 +176,7 @@ class Header:
|
||||
self.__state = 'body'
|
||||
else:
|
||||
self.__write_to_head_obj.write(line)
|
||||
|
||||
def __get_headers(self):
|
||||
"""
|
||||
Private method to remove footnotes from main file. Read one line from
|
||||
@ -182,21 +185,16 @@ class Header:
|
||||
These two functions do the work of separating the footnotes form the
|
||||
body.
|
||||
"""
|
||||
read_obj = open(self.__file)
|
||||
self.__write_obj = open(self.__write_to, 'w')
|
||||
# self.__write_to = "footnote_info.data"
|
||||
self.__write_to_head_obj = open(self.__header_holder, 'w')
|
||||
line = 1
|
||||
while line:
|
||||
line = read_obj.readline()
|
||||
with open(self.__file) as read_obj:
|
||||
with open(self.__write_to, 'w') as self.__write_obj:
|
||||
with open(self.__header_holder, 'w') as self.__write_to_head_obj:
|
||||
for line in read_obj:
|
||||
self.__token_info = line[:16]
|
||||
if self.__state == 'body':
|
||||
self.__get_head_body_func(line)
|
||||
elif self.__state == 'head':
|
||||
self.__get_head_head_func(line)
|
||||
read_obj.close()
|
||||
self.__write_obj.close()
|
||||
self.__write_to_head_obj.close()
|
||||
|
||||
def __get_head_from_temp(self, num):
|
||||
"""
|
||||
Private method for joining headers and footers to body. This method
|
||||
@ -205,18 +203,17 @@ class Header:
|
||||
returns them as a string.
|
||||
"""
|
||||
look_for = 'mi<mk<header-ope<' + num + '\n'
|
||||
found_head = 0
|
||||
found_head = False
|
||||
string_to_return = ''
|
||||
line = 1
|
||||
while line:
|
||||
line = self.__read_from_head_obj.readline()
|
||||
for line in self.__read_from_head_obj:
|
||||
if found_head:
|
||||
if line == 'mi<mk<header-clo\n':
|
||||
return string_to_return
|
||||
string_to_return = string_to_return + line
|
||||
string_to_return += line
|
||||
else:
|
||||
if line == look_for:
|
||||
found_head = 1
|
||||
found_head = True
|
||||
|
||||
def __join_from_temp(self):
|
||||
"""
|
||||
Private method for rejoining footnotes to body. Read from the
|
||||
@ -227,15 +224,13 @@ class Header:
|
||||
If no footnote marker is found, simply print out the token (line).
|
||||
"""
|
||||
self.__read_from_head_obj = open(self.__header_holder, 'r')
|
||||
read_obj = open(self.__write_to, 'r')
|
||||
self.__write_obj = open(self.__write_to2, 'w')
|
||||
line = 1
|
||||
while line:
|
||||
line = read_obj.readline()
|
||||
with open(self.__write_to, 'r') as read_obj:
|
||||
for line in read_obj:
|
||||
if line[:16] == 'mi<mk<header-ind':
|
||||
line = self.__get_head_from_temp(line[17:-1])
|
||||
self.__write_obj.write(line)
|
||||
read_obj.close()
|
||||
|
||||
def join_headers(self):
|
||||
"""
|
||||
Join the footnotes from the bottom of the file and put them in their
|
||||
|
@ -181,7 +181,7 @@ class Hex2Utf8:
|
||||
self.__dingbats_dict.update(dingbats_base_dict)
|
||||
self.__dingbats_dict.update(ms_dingbats_dict)
|
||||
# load dictionary for caps, and make a string for the replacement
|
||||
self.__caps_uni_dict = char_map_obj.get_char_map(map='caps_uni')
|
||||
self.__caps_uni_dict = char_map_obj.get_char_map(map = 'caps_uni')
|
||||
# # print self.__caps_uni_dict
|
||||
# don't think I'll need this
|
||||
##keys = self.__caps_uni_dict.keys()
|
||||
|
@ -11,14 +11,18 @@
|
||||
# #
|
||||
#########################################################################
|
||||
import sys
|
||||
"""
|
||||
"""
|
||||
|
||||
class OldRtf:
|
||||
"""
|
||||
Check to see if the RTF is an older version
|
||||
Logic:
|
||||
If allowable control word/properties happen in text without being enclosed
|
||||
in brackets the file will be considered old rtf
|
||||
"""
|
||||
def __init__(self, in_file, bug_handler, run_level ):
|
||||
def __init__(self, in_file,
|
||||
bug_handler,
|
||||
run_level,
|
||||
):
|
||||
"""
|
||||
Required:
|
||||
'file'--file to parse
|
||||
@ -32,11 +36,7 @@ class OldRtf:
|
||||
"""
|
||||
self.__file = in_file
|
||||
self.__bug_handler = bug_handler
|
||||
self.__initiate_values()
|
||||
self.__ob_group = 0
|
||||
def __initiate_values(self):
|
||||
self.__previous_token = ''
|
||||
self.__new_found = 0
|
||||
self.__run_level = run_level
|
||||
self.__allowable = [
|
||||
'annotation' ,
|
||||
'blue______' ,
|
||||
@ -64,14 +64,18 @@ class OldRtf:
|
||||
'superscrip' ,
|
||||
'underlined' ,
|
||||
]
|
||||
self.__state = 'before_body'
|
||||
self.__action_dict = {
|
||||
'before_body' : self.__before_body_func,
|
||||
'in_body' : self.__check_tokens_func,
|
||||
'after_pard' : self.__after_pard_func,
|
||||
}
|
||||
self.__is_old = 0
|
||||
|
||||
def __initiate_values(self):
|
||||
self.__previous_token = ''
|
||||
self.__state = 'before_body'
|
||||
self.__found_new = 0
|
||||
self.__ob_group = 0
|
||||
|
||||
def __check_tokens_func(self, line):
|
||||
if self.__inline_info in self.__allowable:
|
||||
if self.__ob_group == self.__base_ob_count:
|
||||
@ -80,32 +84,32 @@ class OldRtf:
|
||||
self.__found_new += 1
|
||||
elif self.__token_info == 'cw<pf<par-def___':
|
||||
self.__state = 'after_pard'
|
||||
|
||||
def __before_body_func(self, line):
|
||||
if self.__token_info == 'mi<mk<body-open_':
|
||||
self.__state = 'in_body'
|
||||
self.__base_ob_count = self.__ob_group
|
||||
|
||||
def __after_pard_func(self, line):
|
||||
if line[0:2] != 'cw':
|
||||
self.__state = 'in_body'
|
||||
|
||||
def check_if_old_rtf(self):
|
||||
"""
|
||||
Requires:
|
||||
nothing
|
||||
Returns:
|
||||
1 if file is older RTf
|
||||
0 if file is newer RTF
|
||||
True if file is older RTf
|
||||
False if file is newer RTF
|
||||
"""
|
||||
|
||||
read_obj = open(self.__file, 'r')
|
||||
line = 1
|
||||
self.__initiate_values()
|
||||
line_num = 0
|
||||
while line:
|
||||
line = read_obj.readline()
|
||||
with open(self.__file, 'r') as read_obj:
|
||||
for line in read_obj:
|
||||
line_num += 1
|
||||
self.__token_info = line[:16]
|
||||
if self.__token_info == 'mi<mk<body-close':
|
||||
return 0
|
||||
self.__ob_group = 0
|
||||
return False
|
||||
if self.__token_info == 'ob<nu<open-brack':
|
||||
self.__ob_group += 1
|
||||
self.__ob_count = line[-5:-1]
|
||||
@ -114,14 +118,22 @@ class OldRtf:
|
||||
self.__cb_count = line[-5:-1]
|
||||
self.__inline_info = line[6:16]
|
||||
if self.__state == 'after_body':
|
||||
return 0
|
||||
return False
|
||||
action = self.__action_dict.get(self.__state)
|
||||
if not action:
|
||||
sys.stderr.write('No action for state!\n')
|
||||
if action is None:
|
||||
try:
|
||||
sys.stderr.write('No action for this state!\n')
|
||||
except:
|
||||
pass
|
||||
result = action(line)
|
||||
if result == 'new_rtf':
|
||||
return 0
|
||||
return False
|
||||
elif result == 'old_rtf':
|
||||
return 1
|
||||
if self.__run_level > 3:
|
||||
sys.stderr.write(
|
||||
'Old rtf construction %s (bracket %s, line %s)\n'
|
||||
% (self.__inline_info, str(self.__ob_group), line_num)
|
||||
)
|
||||
return True
|
||||
self.__previous_token = line[6:16]
|
||||
return 0
|
||||
return False
|
||||
|
@ -10,7 +10,9 @@
|
||||
# #
|
||||
# #
|
||||
#########################################################################
|
||||
import sys, os, codecs
|
||||
import sys, os
|
||||
# , codecs
|
||||
|
||||
class Output:
|
||||
"""
|
||||
Output file
|
||||
@ -19,7 +21,8 @@ class Output:
|
||||
file,
|
||||
orig_file,
|
||||
output_dir = None,
|
||||
out_file = None
|
||||
out_file = None,
|
||||
no_ask = True
|
||||
):
|
||||
"""
|
||||
Required:
|
||||
@ -33,8 +36,9 @@ class Output:
|
||||
self.__file = file
|
||||
self.__orig_file = orig_file
|
||||
self.__output_dir = output_dir
|
||||
self.__no_ask = 1
|
||||
self.__no_ask = no_ask
|
||||
self.__out_file = out_file
|
||||
|
||||
def output(self):
|
||||
"""
|
||||
Required:
|
||||
@ -45,13 +49,14 @@ class Output:
|
||||
output the line to the screen if no output file given. Otherwise, output to
|
||||
the file.
|
||||
"""
|
||||
# self.__output_xml(self.__file, self.__out_file)
|
||||
if self.__output_dir:
|
||||
self.__output_to_dir_func()
|
||||
elif self.__out_file:
|
||||
self.__output_xml(self.__file, self.__out_file)
|
||||
self.__output_to_file_func()
|
||||
# self.__output_xml(self.__file, self.__out_file)
|
||||
else:
|
||||
self.__output_to_standard_func()
|
||||
|
||||
def __output_to_dir_func(self):
|
||||
"""
|
||||
Requires:
|
||||
@ -64,32 +69,25 @@ class Output:
|
||||
"""
|
||||
base_name = os.path.basename(self.__orig_file)
|
||||
base_name, ext = os.path.splitext(base_name)
|
||||
output_file = '%s.xml' % base_name
|
||||
output_file = os.path.join(self.__output_dir, output_file)
|
||||
output_file = os.path.join(self.__output_dir, '%s.xml' % base_name)
|
||||
# change if user wants to output to a specific file
|
||||
if self.__out_file:
|
||||
output_file = os.path.join(self.__output_dir, self.__out_file)
|
||||
user_response = 'o'
|
||||
if os.path.isfile(output_file):
|
||||
if self.__no_ask:
|
||||
user_response = 'o'
|
||||
else:
|
||||
msg = 'Do you want to over-write %s?\n' % output_file
|
||||
msg += 'Type "o" to over-write.\n'
|
||||
msg += 'Type any other key to print to standard output.\n'
|
||||
if os.path.isfile(output_file) and not self.__no_ask:
|
||||
msg = 'Do you want to overwrite %s?\n' % output_file
|
||||
msg += ('Type "o" to overwrite.\n'
|
||||
'Type any other key to print to standard output.\n')
|
||||
sys.stderr.write(msg)
|
||||
user_response = raw_input()
|
||||
if user_response == 'o':
|
||||
read_obj = open(self.__file, 'r')
|
||||
write_obj = open(output_file, 'w')
|
||||
line = 1
|
||||
while line:
|
||||
line = read_obj.readline()
|
||||
with open(self.__file, 'r') as read_obj:
|
||||
with open(self.output_file, 'w') as write_obj:
|
||||
for line in read_obj:
|
||||
write_obj.write(line)
|
||||
read_obj.close()
|
||||
write_obj.close()
|
||||
else:
|
||||
self.__output_to_standard_func()
|
||||
|
||||
def __output_to_file_func(self):
|
||||
"""
|
||||
Required:
|
||||
@ -99,14 +97,11 @@ class Output:
|
||||
Logic:
|
||||
read one line at a time. Output to standard
|
||||
"""
|
||||
read_obj = open(self.__file, 'r')
|
||||
write_obj = open(self.__out_file, 'w')
|
||||
line = 1
|
||||
while line:
|
||||
line = read_obj.readline()
|
||||
with open(self.__file, 'r') as read_obj:
|
||||
with open(self.__out_file, 'w') as write_obj:
|
||||
for line in read_obj:
|
||||
write_obj.write(line)
|
||||
read_obj.close()
|
||||
write_obj.close()
|
||||
|
||||
def __output_to_standard_func(self):
|
||||
"""
|
||||
Required:
|
||||
@ -116,26 +111,24 @@ class Output:
|
||||
Logic:
|
||||
read one line at a time. Output to standard
|
||||
"""
|
||||
read_obj = open(self.__file, 'r')
|
||||
line = 1
|
||||
while line:
|
||||
line = read_obj.readline()
|
||||
with open(self.__file, 'r') as read_obj:
|
||||
for line in read_obj:
|
||||
sys.stdout.write(line)
|
||||
read_obj.close()
|
||||
def __output_xml(self, in_file, out_file):
|
||||
"""
|
||||
output the ill-formed xml file
|
||||
"""
|
||||
(utf8_encode, utf8_decode, utf8_reader, utf8_writer) = codecs.lookup("utf-8")
|
||||
write_obj = utf8_writer(open(out_file, 'w'))
|
||||
write_obj = open(out_file, 'w')
|
||||
read_obj = utf8_writer(open(in_file, 'r'))
|
||||
read_obj = open(in_file, 'r')
|
||||
line = 1
|
||||
while line:
|
||||
line = read_obj.readline()
|
||||
if isinstance(line, type(u"")):
|
||||
line = line.encode("utf-8")
|
||||
write_obj.write(line)
|
||||
read_obj.close()
|
||||
write_obj.close()
|
||||
|
||||
# def __output_xml(self, in_file, out_file):
|
||||
# """
|
||||
# output the ill-formed xml file
|
||||
# """
|
||||
# (utf8_encode, utf8_decode, utf8_reader, utf8_writer) = codecs.lookup("utf-8")
|
||||
# write_obj = utf8_writer(open(out_file, 'w'))
|
||||
# write_obj = open(out_file, 'w')
|
||||
# read_obj = utf8_writer(open(in_file, 'r'))
|
||||
# read_obj = open(in_file, 'r')
|
||||
# line = 1
|
||||
# while line:
|
||||
# line = read_obj.readline()
|
||||
# if isinstance(line, type(u"")):
|
||||
# line = line.encode("utf-8")
|
||||
# write_obj.write(line)
|
||||
# read_obj.close()
|
||||
# write_obj.close()
|
||||
|
@ -11,31 +11,32 @@
|
||||
# #
|
||||
#########################################################################
|
||||
import sys, os
|
||||
|
||||
from calibre.ebooks.rtf2xml import copy
|
||||
from calibre.ptempfile import better_mktemp
|
||||
|
||||
class Paragraphs:
|
||||
"""
|
||||
=================
|
||||
Purpose
|
||||
=================
|
||||
Write paragraph tags for a tokenized file. (This module won't be any use to use
|
||||
to you unless you use it as part of the other modules.)
|
||||
-------------
|
||||
Method
|
||||
-------------
|
||||
RTF does not tell you when a paragraph begins. It only tells you when the
|
||||
paragraph ends.
|
||||
In order to make paragraphs out of this limited info, the parser starts in the
|
||||
body of the documents and assumes it is not in a paragraph. It looks for clues
|
||||
to begin a paragraph. Text starts a paragraph; so does an inline field or
|
||||
list-text. If an end of paragraph marker (\par) is found, then this indicates
|
||||
a blank paragraph.
|
||||
Once a paragraph is found, the state changes to 'paragraph.' In this state,
|
||||
clues are looked to for the end of a paragraph. The end of a paragraph marker
|
||||
(\par) marks the end of a paragraph. So does the end of a footnote or heading;
|
||||
a paragraph definintion; the end of a field-block; and the beginning of a
|
||||
section. (How about the end of a section or the end of a field-block?)
|
||||
=================
|
||||
Purpose
|
||||
=================
|
||||
Write paragraph tags for a tokenized file. (This module won't be any use to use
|
||||
to you unless you use it as part of the other modules.)
|
||||
-------------
|
||||
Method
|
||||
-------------
|
||||
RTF does not tell you when a paragraph begins. It only tells you when the
|
||||
paragraph ends.
|
||||
In order to make paragraphs out of this limited info, the parser starts in the
|
||||
body of the documents and assumes it is not in a paragraph. It looks for clues
|
||||
to begin a paragraph. Text starts a paragraph; so does an inline field or
|
||||
list-text. If an end of paragraph marker (\par) is found, then this indicates
|
||||
a blank paragraph.
|
||||
Once a paragraph is found, the state changes to 'paragraph.' In this state,
|
||||
clues are looked to for the end of a paragraph. The end of a paragraph marker
|
||||
(\par) marks the end of a paragraph. So does the end of a footnote or heading;
|
||||
a paragraph definition; the end of a field-block; and the beginning of a
|
||||
section. (How about the end of a section or the end of a field-block?)
|
||||
"""
|
||||
def __init__(self,
|
||||
in_file,
|
||||
@ -60,6 +61,7 @@ section. (How about the end of a section or the end of a field-block?)
|
||||
self.__write_empty_para = write_empty_para
|
||||
self.__run_level = run_level
|
||||
self.__write_to = better_mktemp()
|
||||
|
||||
def __initiate_values(self):
|
||||
"""
|
||||
Initiate all values.
|
||||
@ -77,7 +79,7 @@ section. (How about the end of a section or the end of a field-block?)
|
||||
self.__paragraph_dict = {
|
||||
'cw<pf<par-end___' : self.__close_para_func, # end of paragraph
|
||||
'mi<mk<headi_-end' : self.__close_para_func, # end of header or footer
|
||||
##'cw<pf<par-def___' : self.__close_para_func, # paragraph definition
|
||||
## 'cw<pf<par-def___' : self.__close_para_func, # paragraph definition
|
||||
# 'mi<mk<fld-bk-end' : self.__close_para_func, # end of field-block
|
||||
'mi<mk<fldbk-end_' : self.__close_para_func, # end of field-block
|
||||
'mi<mk<body-close' : self.__close_para_func, # end of body
|
||||
@ -99,6 +101,7 @@ section. (How about the end of a section or the end of a field-block?)
|
||||
'mi<mk<pict-start' : self.__start_para_func,
|
||||
'cw<pf<page-break' : self.__empty_pgbk_func, # page break
|
||||
}
|
||||
|
||||
def __before_body_func(self, line):
|
||||
"""
|
||||
Required:
|
||||
@ -112,6 +115,7 @@ section. (How about the end of a section or the end of a field-block?)
|
||||
if self.__token_info == 'mi<mk<body-open_':
|
||||
self.__state = 'not_paragraph'
|
||||
self.__write_obj.write(line)
|
||||
|
||||
def __not_paragraph_func(self, line):
|
||||
"""
|
||||
Required:
|
||||
@ -127,6 +131,7 @@ section. (How about the end of a section or the end of a field-block?)
|
||||
if action:
|
||||
action(line)
|
||||
self.__write_obj.write(line)
|
||||
|
||||
def __paragraph_func(self, line):
|
||||
"""
|
||||
Required:
|
||||
@ -144,6 +149,7 @@ section. (How about the end of a section or the end of a field-block?)
|
||||
action(line)
|
||||
else:
|
||||
self.__write_obj.write(line)
|
||||
|
||||
def __start_para_func(self, line):
|
||||
"""
|
||||
Requires:
|
||||
@ -160,6 +166,7 @@ section. (How about the end of a section or the end of a field-block?)
|
||||
)
|
||||
self.__write_obj.write(self.__start2_marker)
|
||||
self.__state = 'paragraph'
|
||||
|
||||
def __empty_para_func(self, line):
|
||||
"""
|
||||
Requires:
|
||||
@ -176,6 +183,7 @@ section. (How about the end of a section or the end of a field-block?)
|
||||
'mi<tg<empty_____<para\n'
|
||||
)
|
||||
self.__write_obj.write(self.__end_marker) # marker for later parsing
|
||||
|
||||
def __empty_pgbk_func(self, line):
|
||||
"""
|
||||
Requires:
|
||||
@ -188,6 +196,7 @@ section. (How about the end of a section or the end of a field-block?)
|
||||
self.__write_obj.write(
|
||||
'mi<tg<empty_____<page-break\n'
|
||||
)
|
||||
|
||||
def __close_para_func(self, line):
|
||||
"""
|
||||
Requires:
|
||||
@ -205,6 +214,7 @@ section. (How about the end of a section or the end of a field-block?)
|
||||
self.__write_obj.write(self.__end_marker) # marker for later parser
|
||||
self.__write_obj.write(line)
|
||||
self.__state = 'not_paragraph'
|
||||
|
||||
def __bogus_para__def_func(self, line):
|
||||
"""
|
||||
Requires:
|
||||
@ -215,6 +225,7 @@ section. (How about the end of a section or the end of a field-block?)
|
||||
if a \pard occurs in a paragraph, I want to ignore it. (I believe)
|
||||
"""
|
||||
self.__write_obj.write('mi<mk<bogus-pard\n')
|
||||
|
||||
def make_paragraphs(self):
|
||||
"""
|
||||
Requires:
|
||||
@ -229,20 +240,18 @@ section. (How about the end of a section or the end of a field-block?)
|
||||
only other state is 'paragraph'.
|
||||
"""
|
||||
self.__initiate_values()
|
||||
read_obj = open(self.__file, 'r')
|
||||
self.__write_obj = open(self.__write_to, 'w')
|
||||
line_to_read = 1
|
||||
while line_to_read:
|
||||
line_to_read = read_obj.readline()
|
||||
line = line_to_read
|
||||
with open(self.__file, 'r') as read_obj:
|
||||
with open(self.__write_to, 'w') as self.__write_obj:
|
||||
for line in read_obj:
|
||||
self.__token_info = line[:16]
|
||||
action = self.__state_dict.get(self.__state)
|
||||
if action == None:
|
||||
sys.stderr.write('no no matching state in module sections.py\n')
|
||||
if action is None:
|
||||
try:
|
||||
sys.stderr.write('no matching state in module paragraphs.py\n')
|
||||
sys.stderr.write(self.__state + '\n')
|
||||
except:
|
||||
pass
|
||||
action(line)
|
||||
read_obj.close()
|
||||
self.__write_obj.close()
|
||||
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
||||
if self.__copy:
|
||||
copy_obj.copy_file(self.__write_to, "paragraphs.data")
|
||||
|
@ -11,16 +11,24 @@
|
||||
# #
|
||||
#########################################################################
|
||||
import sys,os
|
||||
|
||||
from calibre.ebooks.rtf2xml import copy
|
||||
|
||||
class Preamble:
|
||||
"""
|
||||
Fix the reamaing parts of the preamble. This module does very little. It
|
||||
makes sure that no text gets put in the revision of list table. In the
|
||||
future, when I understand how to interprett he revision table and list
|
||||
future, when I understand how to interpret the revision table and list
|
||||
table, I will make these methods more functional.
|
||||
"""
|
||||
def __init__(self, file, bug_handler, platform, default_font, code_page,
|
||||
copy=None, temp_dir=None):
|
||||
def __init__(self, file,
|
||||
bug_handler,
|
||||
platform,
|
||||
default_font,
|
||||
code_page,
|
||||
copy=None,
|
||||
temp_dir=None,
|
||||
):
|
||||
"""
|
||||
Required:
|
||||
file--file to parse
|
||||
@ -44,6 +52,7 @@ class Preamble:
|
||||
self.__write_to = os.path.join(temp_dir,"info_table_info.data")
|
||||
else:
|
||||
self.__write_to = "info_table_info.data"
|
||||
|
||||
def __initiate_values(self):
|
||||
"""
|
||||
Initiate all values.
|
||||
@ -62,12 +71,14 @@ class Preamble:
|
||||
'mi<mk<revtbl-beg' : self.__found_revision_table_func,
|
||||
'mi<mk<body-open_' : self.__found_body_func,
|
||||
}
|
||||
|
||||
def __default_func(self, line):
|
||||
action = self.__default_dict.get(self.__token_info)
|
||||
if action:
|
||||
action(line)
|
||||
else:
|
||||
self.__write_obj.write(line)
|
||||
|
||||
def __found_rtf_head_func(self, line):
|
||||
"""
|
||||
Requires:
|
||||
@ -84,8 +95,10 @@ class Preamble:
|
||||
'<platform>%s\n' % (self.__default_font, self.__code_page,
|
||||
self.__platform)
|
||||
)
|
||||
|
||||
def __found_list_table_func(self, line):
|
||||
self.__state = 'list_table'
|
||||
|
||||
def __list_table_func(self, line):
|
||||
if self.__token_info == 'mi<mk<listabend_':
|
||||
self.__state = 'default'
|
||||
@ -93,8 +106,10 @@ class Preamble:
|
||||
pass
|
||||
else:
|
||||
self.__write_obj.write(line)
|
||||
|
||||
def __found_revision_table_func(self, line):
|
||||
self.__state = 'revision'
|
||||
|
||||
def __revision_table_func(self, line):
|
||||
if self.__token_info == 'mi<mk<revtbl-end':
|
||||
self.__state = 'default'
|
||||
@ -102,11 +117,14 @@ class Preamble:
|
||||
pass
|
||||
else:
|
||||
self.__write_obj.write(line)
|
||||
|
||||
def __found_body_func(self, line):
|
||||
self.__state = 'body'
|
||||
self.__write_obj.write(line)
|
||||
|
||||
def __body_func(self, line):
|
||||
self.__write_obj.write(line)
|
||||
|
||||
def fix_preamble(self):
|
||||
"""
|
||||
Requires:
|
||||
@ -119,20 +137,15 @@ class Preamble:
|
||||
the list table.
|
||||
"""
|
||||
self.__initiate_values()
|
||||
read_obj = open(self.__file, 'r')
|
||||
self.__write_obj = open(self.__write_to, 'w')
|
||||
line_to_read = 1
|
||||
while line_to_read:
|
||||
line_to_read = read_obj.readline()
|
||||
line = line_to_read
|
||||
with open(self.__file, 'r') as read_obj:
|
||||
with open(self.__write_to, 'w') as self.__write_obj:
|
||||
for line in read_obj:
|
||||
self.__token_info = line[:16]
|
||||
action = self.__state_dict.get(self.__state)
|
||||
if action == None:
|
||||
sys.stderr.write('no no matching state in module preamble_rest.py\n')
|
||||
sys.stderr.write(self.__state + '\n')
|
||||
if action is None:
|
||||
sys.stderr.write(
|
||||
'no matching state in module preamble_rest.py\n' + self.__state + '\n')
|
||||
action(line)
|
||||
read_obj.close()
|
||||
self.__write_obj.close()
|
||||
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
|
||||
if self.__copy:
|
||||
copy_obj.copy_file(self.__write_to, "preamble_div.data")
|
||||
|
@ -11,43 +11,44 @@
|
||||
# #
|
||||
#########################################################################
|
||||
import sys, os
|
||||
|
||||
from calibre.ebooks.rtf2xml import copy
|
||||
from calibre.ptempfile import better_mktemp
|
||||
|
||||
class Sections:
|
||||
"""
|
||||
=================
|
||||
Purpose
|
||||
=================
|
||||
Write section tags for a tokenized file. (This module won't be any use to use
|
||||
to you unless you use it as part of the other modules.)
|
||||
---------------
|
||||
logic
|
||||
---------------
|
||||
The tags for the first section breaks have already been written.
|
||||
RTF stores section breaks with the \sect tag. Each time this tag is
|
||||
encountered, add one to the counter.
|
||||
When I encounter the \sectd tag, I want to collect all the appropriate tokens
|
||||
that describe the section. When I reach a \pard, I know I an stop collecting
|
||||
tokens and write the section tags.
|
||||
The exception to this method occurs when sections occur in field blocks, such
|
||||
as the index. Normally, two section break occur within the index and other
|
||||
field-blocks. (If less or more section breaks occurr, this code may not work.)
|
||||
I want the sections to occurr outside of the index. That is, the index
|
||||
should be nested inside one section tag. After the index is complete, a new
|
||||
section should begin.
|
||||
In order to write the sections outside of the field blocks, I have to store
|
||||
all of the field block as a string. When I ecounter the \sect tag, add one to
|
||||
the section counter, but store this number in a list. Likewise, store the
|
||||
information describing the section in another list.
|
||||
When I reach the end of the field block, choose the first item from the
|
||||
numbered list as the section number. Choose the first item in the description
|
||||
list as the values and attributes of the section. Enclose the field string
|
||||
between the section tags.
|
||||
Start a new section outside the field-block strings. Use the second number in
|
||||
the list; use the second item in the description list.
|
||||
CHANGE (2004-04-26) No longer write sections that occurr in field-blocks.
|
||||
Instead, ingore all section information in a field-block.
|
||||
=================
|
||||
Purpose
|
||||
=================
|
||||
Write section tags for a tokenized file. (This module won't be any use to use
|
||||
to you unless you use it as part of the other modules.)
|
||||
---------------
|
||||
logic
|
||||
---------------
|
||||
The tags for the first section breaks have already been written.
|
||||
RTF stores section breaks with the \sect tag. Each time this tag is
|
||||
encountered, add one to the counter.
|
||||
When I encounter the \sectd tag, I want to collect all the appropriate tokens
|
||||
that describe the section. When I reach a \pard, I know I an stop collecting
|
||||
tokens and write the section tags.
|
||||
The exception to this method occurs when sections occur in field blocks, such
|
||||
as the index. Normally, two section break occur within the index and other
|
||||
field-blocks. (If less or more section breaks occurr, this code may not work.)
|
||||
I want the sections to occur outside of the index. That is, the index
|
||||
should be nested inside one section tag. After the index is complete, a new
|
||||
section should begin.
|
||||
In order to write the sections outside of the field blocks, I have to store
|
||||
all of the field block as a string. When I ecounter the \sect tag, add one to
|
||||
the section counter, but store this number in a list. Likewise, store the
|
||||
information describing the section in another list.
|
||||
When I reach the end of the field block, choose the first item from the
|
||||
numbered list as the section number. Choose the first item in the description
|
||||
list as the values and attributes of the section. Enclose the field string
|
||||
between the section tags.
|
||||
Start a new section outside the field-block strings. Use the second number in
|
||||
the list; use the second item in the description list.
|
||||
CHANGE (2004-04-26) No longer write sections that occurr in field-blocks.
|
||||
Instead, ingore all section information in a field-block.
|
||||
"""
|
||||
def __init__(self,
|
||||
in_file,
|
||||
|
@ -6,6 +6,7 @@ __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from functools import partial
|
||||
from collections import Counter
|
||||
|
||||
from PyQt4.Qt import QObject, QTimer
|
||||
|
||||
@ -117,13 +118,14 @@ class DeleteAction(InterfaceAction):
|
||||
|
||||
def _get_selected_formats(self, msg, ids):
|
||||
from calibre.gui2.dialogs.select_formats import SelectFormats
|
||||
fmts = set([])
|
||||
c = Counter()
|
||||
db = self.gui.library_view.model().db
|
||||
for x in ids:
|
||||
fmts_ = db.formats(x, index_is_id=True, verify_formats=False)
|
||||
if fmts_:
|
||||
fmts.update(frozenset([x.lower() for x in fmts_.split(',')]))
|
||||
d = SelectFormats(list(sorted(fmts)), msg, parent=self.gui)
|
||||
for x in frozenset([x.lower() for x in fmts_.split(',')]):
|
||||
c[x] += 1
|
||||
d = SelectFormats(c, msg, parent=self.gui)
|
||||
if d.exec_() != d.Accepted:
|
||||
return None
|
||||
return d.selected_formats
|
||||
|
@ -12,11 +12,11 @@ from PyQt4.Qt import (QDialog, QVBoxLayout, QHBoxLayout, QRadioButton, QFrame,
|
||||
|
||||
from calibre import as_unicode
|
||||
from calibre.constants import isosx
|
||||
from calibre.gui2 import error_dialog, question_dialog, open_local_file
|
||||
from calibre.gui2 import error_dialog, question_dialog, open_local_file, gprefs
|
||||
from calibre.gui2.actions import InterfaceAction
|
||||
from calibre.ptempfile import (PersistentTemporaryDirectory,
|
||||
PersistentTemporaryFile)
|
||||
from calibre.utils.config import prefs
|
||||
from calibre.utils.config import prefs, tweaks
|
||||
|
||||
class TweakBook(QDialog):
|
||||
|
||||
@ -32,11 +32,16 @@ class TweakBook(QDialog):
|
||||
index_is_id=True))
|
||||
|
||||
button = self.fmt_choice_buttons[0]
|
||||
button_map = {unicode(x.text()):x for x in self.fmt_choice_buttons}
|
||||
of = prefs['output_format'].upper()
|
||||
for x in self.fmt_choice_buttons:
|
||||
if unicode(x.text()) == of:
|
||||
button = x
|
||||
break
|
||||
df = tweaks.get('default_tweak_format', None)
|
||||
lf = gprefs.get('last_tweak_format', None)
|
||||
if df and df.lower() == 'remember' and lf in button_map:
|
||||
button = button_map[lf]
|
||||
elif df and df.upper() in button_map:
|
||||
button = button_map[df.upper()]
|
||||
elif of in button_map:
|
||||
button = button_map[of]
|
||||
button.setChecked(True)
|
||||
|
||||
self.init_state()
|
||||
@ -148,6 +153,8 @@ class TweakBook(QDialog):
|
||||
|
||||
def explode(self):
|
||||
self.show_msg(_('Exploding, please wait...'))
|
||||
if len(self.fmt_choice_buttons) > 1:
|
||||
gprefs.set('last_tweak_format', self.current_format.upper())
|
||||
QTimer.singleShot(5, self.do_explode)
|
||||
|
||||
def ask_question(self, msg):
|
||||
|
@ -161,8 +161,14 @@ class EditorWidget(QWebView): # {{{
|
||||
self.page().setContentEditable(True)
|
||||
|
||||
def clear_text(self, *args):
|
||||
us = self.page().undoStack()
|
||||
us.beginMacro('clear all text')
|
||||
self.action_select_all.trigger()
|
||||
self.action_cut.trigger()
|
||||
self.action_remove_format.trigger()
|
||||
self.exec_command('delete')
|
||||
us.endMacro()
|
||||
self.set_font_style()
|
||||
self.setFocus(Qt.OtherFocusReason)
|
||||
|
||||
def link_clicked(self, url):
|
||||
open_url(url)
|
||||
@ -262,6 +268,10 @@ class EditorWidget(QWebView): # {{{
|
||||
|
||||
def fset(self, val):
|
||||
self.setHtml(val)
|
||||
self.set_font_style()
|
||||
return property(fget=fget, fset=fset)
|
||||
|
||||
def set_font_style(self):
|
||||
fi = QFontInfo(QApplication.font(self))
|
||||
f = fi.pixelSize() + 1 + int(tweaks['change_book_details_font_size_by'])
|
||||
fam = unicode(fi.family()).strip().replace('"', '')
|
||||
@ -274,8 +284,6 @@ class EditorWidget(QWebView): # {{{
|
||||
body.setAttribute('style', style)
|
||||
self.page().setContentEditable(True)
|
||||
|
||||
return property(fget=fget, fset=fset)
|
||||
|
||||
def keyPressEvent(self, ev):
|
||||
if ev.key() in (Qt.Key_Tab, Qt.Key_Escape, Qt.Key_Backtab):
|
||||
ev.ignore()
|
||||
@ -627,4 +635,6 @@ if __name__ == '__main__':
|
||||
w = Editor()
|
||||
w.resize(800, 600)
|
||||
w.show()
|
||||
w.html = '<b>testing</b>'
|
||||
app.exec_()
|
||||
#print w.html
|
||||
|
@ -109,12 +109,18 @@
|
||||
</item>
|
||||
<item row="0" column="1">
|
||||
<widget class="QDoubleSpinBox" name="opt_margin_left">
|
||||
<property name="specialValueText">
|
||||
<string>No margin</string>
|
||||
</property>
|
||||
<property name="suffix">
|
||||
<string> pt</string>
|
||||
</property>
|
||||
<property name="decimals">
|
||||
<number>1</number>
|
||||
</property>
|
||||
<property name="minimum">
|
||||
<double>-1.000000000000000</double>
|
||||
</property>
|
||||
<property name="maximum">
|
||||
<double>200.000000000000000</double>
|
||||
</property>
|
||||
@ -132,12 +138,18 @@
|
||||
</item>
|
||||
<item row="1" column="1">
|
||||
<widget class="QDoubleSpinBox" name="opt_margin_top">
|
||||
<property name="specialValueText">
|
||||
<string>No margin</string>
|
||||
</property>
|
||||
<property name="suffix">
|
||||
<string> pt</string>
|
||||
</property>
|
||||
<property name="decimals">
|
||||
<number>1</number>
|
||||
</property>
|
||||
<property name="minimum">
|
||||
<double>-1.000000000000000</double>
|
||||
</property>
|
||||
<property name="maximum">
|
||||
<double>200.000000000000000</double>
|
||||
</property>
|
||||
@ -155,12 +167,18 @@
|
||||
</item>
|
||||
<item row="2" column="1">
|
||||
<widget class="QDoubleSpinBox" name="opt_margin_right">
|
||||
<property name="specialValueText">
|
||||
<string>No margin</string>
|
||||
</property>
|
||||
<property name="suffix">
|
||||
<string> pt</string>
|
||||
</property>
|
||||
<property name="decimals">
|
||||
<number>1</number>
|
||||
</property>
|
||||
<property name="minimum">
|
||||
<double>-1.000000000000000</double>
|
||||
</property>
|
||||
<property name="maximum">
|
||||
<double>200.000000000000000</double>
|
||||
</property>
|
||||
@ -178,12 +196,18 @@
|
||||
</item>
|
||||
<item row="3" column="1">
|
||||
<widget class="QDoubleSpinBox" name="opt_margin_bottom">
|
||||
<property name="specialValueText">
|
||||
<string>No margin</string>
|
||||
</property>
|
||||
<property name="suffix">
|
||||
<string> pt</string>
|
||||
</property>
|
||||
<property name="decimals">
|
||||
<number>1</number>
|
||||
</property>
|
||||
<property name="minimum">
|
||||
<double>-1.000000000000000</double>
|
||||
</property>
|
||||
<property name="maximum">
|
||||
<double>200.000000000000000</double>
|
||||
</property>
|
||||
|
@ -349,7 +349,8 @@ class Text(Base):
|
||||
return d.exec_()
|
||||
|
||||
def edit(self):
|
||||
if self.getter() != self.initial_val:
|
||||
if (self.getter() != self.initial_val and (self.getter() or
|
||||
self.initial_val)):
|
||||
d = self._save_dialog(self.parent, _('Values changed'),
|
||||
_('You have changed the values. In order to use this '
|
||||
'editor, you must either discard or apply these '
|
||||
|
@ -182,7 +182,8 @@ class SearchDialog(QDialog, Ui_Dialog):
|
||||
global box_values
|
||||
box_values = copy.deepcopy(self.box_last_values)
|
||||
if general:
|
||||
ans.append(unicode(self.general_combo.currentText()) + ':"' + general + '"')
|
||||
ans.append(unicode(self.general_combo.currentText()) + ':"' +
|
||||
self.mc + general + '"')
|
||||
if ans:
|
||||
return ' and '.join(ans)
|
||||
return ''
|
||||
|
@ -14,9 +14,10 @@ from calibre.gui2 import NONE, file_icon_provider
|
||||
|
||||
class Formats(QAbstractListModel):
|
||||
|
||||
def __init__(self, fmts):
|
||||
def __init__(self, fmt_count):
|
||||
QAbstractListModel.__init__(self)
|
||||
self.fmts = sorted(fmts)
|
||||
self.fmts = sorted(set(fmt_count))
|
||||
self.counts = fmt_count
|
||||
self.fi = file_icon_provider()
|
||||
|
||||
def rowCount(self, parent):
|
||||
@ -25,9 +26,17 @@ class Formats(QAbstractListModel):
|
||||
def data(self, index, role):
|
||||
row = index.row()
|
||||
if role == Qt.DisplayRole:
|
||||
return QVariant(self.fmts[row].upper())
|
||||
fmt = self.fmts[row]
|
||||
count = self.counts[fmt]
|
||||
return QVariant('%s [%d]'%(fmt.upper(), count))
|
||||
if role == Qt.DecorationRole:
|
||||
return QVariant(self.fi.icon_from_ext(self.fmts[row].lower()))
|
||||
if role == Qt.ToolTipRole:
|
||||
fmt = self.fmts[row]
|
||||
count = self.counts[fmt]
|
||||
return QVariant(
|
||||
_('The are %(count)d book(s) with the %(fmt)s format')%dict(
|
||||
count=count, fmt=fmt.upper()))
|
||||
return NONE
|
||||
|
||||
def flags(self, index):
|
||||
@ -38,7 +47,7 @@ class Formats(QAbstractListModel):
|
||||
|
||||
class SelectFormats(QDialog):
|
||||
|
||||
def __init__(self, fmt_list, msg, single=False, parent=None):
|
||||
def __init__(self, fmt_count, msg, single=False, parent=None):
|
||||
QDialog.__init__(self, parent)
|
||||
self._l = QVBoxLayout(self)
|
||||
self.setLayout(self._l)
|
||||
@ -46,7 +55,7 @@ class SelectFormats(QDialog):
|
||||
self._m = QLabel(msg)
|
||||
self._m.setWordWrap(True)
|
||||
self._l.addWidget(self._m)
|
||||
self.formats = Formats(fmt_list)
|
||||
self.formats = Formats(fmt_count)
|
||||
self.fview = QListView(self)
|
||||
self._l.addWidget(self.fview)
|
||||
self.fview.setModel(self.formats)
|
||||
|
@ -157,8 +157,6 @@ class AmazonKindleStore(StorePlugin):
|
||||
title_xpath = './/div[@class="productTitle"]/a/text()'
|
||||
price_xpath = './/div[@class="newPrice"]//span//text()'
|
||||
|
||||
|
||||
|
||||
for data in doc.xpath(data_xpath):
|
||||
if counter <= 0:
|
||||
break
|
||||
|
@ -7,6 +7,7 @@ __copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import random
|
||||
import re
|
||||
import urllib
|
||||
from contextlib import closing
|
||||
|
||||
@ -52,6 +53,38 @@ class DieselEbooksStore(BasicStoreConfig, StorePlugin):
|
||||
counter = max_results
|
||||
with closing(br.open(url, timeout=timeout)) as f:
|
||||
doc = html.fromstring(f.read())
|
||||
|
||||
if doc.xpath('not(boolean(//select[contains(@id, "selection")]))'):
|
||||
id = ''.join(doc.xpath('//div[@class="price_fat"]//a/@href'))
|
||||
mo = re.search('(?<=id=).+?(?=&)', id)
|
||||
if not mo:
|
||||
yield None
|
||||
id = mo.group()
|
||||
|
||||
cover_url = ''.join(doc.xpath('//div[@class="cover"]/a/@href'))
|
||||
|
||||
title = ''.join(doc.xpath('//div[@class="desc_fat"]//h1/text()'))
|
||||
author = ''.join(doc.xpath('//div[@class="desc_fat"]//span[@itemprop="author"]/text()'))
|
||||
price = ''.join(doc.xpath('//div[@class="price_fat"]//h1/text()'))
|
||||
|
||||
formats = ', '.join(doc.xpath('//div[@class="desc_fat"]//p[contains(text(), "Format")]/text()'))
|
||||
a, b, formats = formats.partition('Format:')
|
||||
|
||||
drm = SearchResult.DRM_LOCKED
|
||||
if 'drm free' in formats.lower():
|
||||
drm = SearchResult.DRM_UNLOCKED
|
||||
|
||||
s = SearchResult()
|
||||
s.cover_url = cover_url
|
||||
s.title = title.strip()
|
||||
s.author = author.strip()
|
||||
s.price = price.strip()
|
||||
s.detail_item = id.strip()
|
||||
s.formats = formats
|
||||
s.drm = drm
|
||||
|
||||
yield s
|
||||
else:
|
||||
for data in doc.xpath('//div[contains(@class, "item")]'):
|
||||
if counter <= 0:
|
||||
break
|
||||
@ -72,10 +105,9 @@ class DieselEbooksStore(BasicStoreConfig, StorePlugin):
|
||||
formats = ', '.join(data.xpath('.//div[@class="book-info"]//text()')).strip()
|
||||
a, b, formats = formats.partition('Format:')
|
||||
drm = SearchResult.DRM_LOCKED
|
||||
if 'drm free' not in formats.lower():
|
||||
if 'drm free' in formats.lower():
|
||||
drm = SearchResult.DRM_UNLOCKED
|
||||
|
||||
|
||||
counter -= 1
|
||||
|
||||
s = SearchResult()
|
||||
|
@ -69,7 +69,7 @@ class KoboStore(BasicStoreConfig, StorePlugin):
|
||||
|
||||
title = ''.join(data.xpath('.//div[@class="SCItemHeader"]/h1/a[1]/text()'))
|
||||
author = ', '.join(data.xpath('.//div[@class="SCItemSummary"]//span//a/text()'))
|
||||
drm = data.xpath('boolean(.//span[@class="SCAvailibilityFormatsText" and contains(text(), "DRM")])')
|
||||
drm = data.xpath('boolean(.//span[@class="SCAvailibilityFormatsText" and not(contains(text(), "DRM-Free"))])')
|
||||
|
||||
counter -= 1
|
||||
|
||||
|
@ -53,7 +53,7 @@ class SmashwordsStore(BasicStoreConfig, StorePlugin):
|
||||
counter = max_results
|
||||
with closing(br.open(url, timeout=timeout)) as f:
|
||||
doc = html.fromstring(f.read())
|
||||
for data in doc.xpath('//div[@id="pageCenterContent2"]//div[@class="bookCoverImg"]'):
|
||||
for data in doc.xpath('//div[@id="pageCenterContent"]//div[@class="bookCoverImg"]'):
|
||||
if counter <= 0:
|
||||
break
|
||||
data = html.fromstring(html.tostring(data))
|
||||
|
@ -24,9 +24,9 @@ from calibre.utils.config import prefs, dynamic
|
||||
from calibre.utils.ipc.server import Server
|
||||
from calibre.library.database2 import LibraryDatabase2
|
||||
from calibre.customize.ui import interface_actions, available_store_plugins
|
||||
from calibre.gui2 import error_dialog, GetMetadata, open_url, \
|
||||
gprefs, max_available_height, config, info_dialog, Dispatcher, \
|
||||
question_dialog
|
||||
from calibre.gui2 import (error_dialog, GetMetadata, open_url,
|
||||
gprefs, max_available_height, config, info_dialog, Dispatcher,
|
||||
question_dialog, warning_dialog)
|
||||
from calibre.gui2.cover_flow import CoverFlowMixin
|
||||
from calibre.gui2.widgets import ProgressIndicator
|
||||
from calibre.gui2.update import UpdateMixin
|
||||
@ -653,6 +653,23 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{
|
||||
d.show()
|
||||
self._modeless_dialogs.append(d)
|
||||
return
|
||||
|
||||
if 'calibre.ebooks.conversion.ConversionUserFeedBack:' in job.details:
|
||||
if not minz:
|
||||
import json
|
||||
payload = job.details.rpartition(
|
||||
'calibre.ebooks.conversion.ConversionUserFeedBack:')[-1]
|
||||
payload = json.loads('{' + payload.partition('{')[-1])
|
||||
d = {'info':info_dialog, 'warn':warning_dialog,
|
||||
'error':error_dialog}.get(payload['level'],
|
||||
error_dialog)
|
||||
d = d(self, payload['title'],
|
||||
'<p>%s</p>'%payload['msg'],
|
||||
det_msg=payload['det_msg'])
|
||||
d.setModal(False)
|
||||
d.show()
|
||||
self._modeless_dialogs.append(d)
|
||||
return
|
||||
except:
|
||||
pass
|
||||
if job.killed:
|
||||
|
@ -3,7 +3,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
''' Post installation script for linux '''
|
||||
|
||||
import sys, os, cPickle, textwrap, stat, importlib
|
||||
import sys, os, cPickle, textwrap, stat
|
||||
from subprocess import check_call
|
||||
|
||||
from calibre import __appname__, prints, guess_type
|
||||
@ -177,7 +177,6 @@ class PostInstall:
|
||||
self.mime_resources = []
|
||||
if islinux or isbsd:
|
||||
self.setup_completion()
|
||||
self.install_man_pages()
|
||||
if islinux or isbsd:
|
||||
self.setup_desktop_integration()
|
||||
self.create_uninstaller()
|
||||
@ -343,38 +342,6 @@ class PostInstall:
|
||||
self.task_failed('Setting up completion failed')
|
||||
# }}}
|
||||
|
||||
def install_man_pages(self): # {{{
|
||||
try:
|
||||
from calibre.utils.help2man import create_man_page
|
||||
if isbsd:
|
||||
manpath = os.path.join(self.opts.staging_root, 'man/man1')
|
||||
else:
|
||||
manpath = os.path.join(self.opts.staging_sharedir, 'man/man1')
|
||||
if not os.path.exists(manpath):
|
||||
os.makedirs(manpath)
|
||||
self.info('Installing MAN pages...')
|
||||
for src in entry_points['console_scripts']:
|
||||
prog, right = src.split('=')
|
||||
prog = prog.strip()
|
||||
module = importlib.import_module(right.split(':')[0].strip())
|
||||
parser = getattr(module, 'option_parser', None)
|
||||
if parser is None:
|
||||
continue
|
||||
parser = parser()
|
||||
raw = create_man_page(prog, parser)
|
||||
if isbsd:
|
||||
manfile = os.path.join(manpath, prog+'.1')
|
||||
else:
|
||||
manfile = os.path.join(manpath, prog+'.1'+__appname__+'.bz2')
|
||||
self.info('\tInstalling MAN page for', prog)
|
||||
open(manfile, 'wb').write(raw)
|
||||
self.manifest.append(manfile)
|
||||
except:
|
||||
if self.opts.fatal_errors:
|
||||
raise
|
||||
self.task_failed('Installing MAN pages failed')
|
||||
# }}}
|
||||
|
||||
def setup_desktop_integration(self): # {{{
|
||||
try:
|
||||
self.info('Setting up desktop integration...')
|
||||
|
@ -69,6 +69,22 @@ If you have a hand edited TOC in the input document, you can use the TOC detecti
|
||||
|
||||
Finally, I encourage you to ditch the content TOC and only have a metadata TOC in your ebooks. Metadata TOCs will give the people reading your ebooks a much superior navigation experience (except on the Kindle, where they are essentially the same as a content TOC).
|
||||
|
||||
The covers for my MOBI files have stopped showing up in Kindle for PC/Kindle for Android/etc.
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
This is caused by a bug in the Amazon software. You can work around it by going
|
||||
to Preferences->Output Options->MOBI output and setting the "Enable sharing
|
||||
of book content" option. If you are reconverting a previously converted book,
|
||||
you will also have to enable the option in the conversion dialog for that
|
||||
individual book (as per book conversion settings are saved and take
|
||||
precedence).
|
||||
|
||||
Note that doing this will mean that the generated MOBI will show up under
|
||||
personal documents instead of Books on the Kindle Fire and Amazon whispersync
|
||||
will not work, but the covers will. It's your choice which functionality is
|
||||
more important to you. I encourage you to contact Amazon and ask them to fix
|
||||
this bug.
|
||||
|
||||
How do I convert a collection of HTML files in a specific order?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
In order to convert a collection of HTML files in a specific oder, you have to create a table of contents file. That is, another HTML file that contains links to all the other files in the desired order. Such a file looks like::
|
||||
|
@ -245,7 +245,7 @@ The following functions are available in addition to those described in single-f
|
||||
* ``current_library_name() -- `` return the last name on the path to the current calibre library. This function can be called in template program mode using the template ``{:'current_library_name()'}``.
|
||||
* ``days_between(date1, date2)`` -- return the number of days between ``date1`` and ``date2``. The number is positive if ``date1`` is greater than ``date2``, otherwise negative. If either ``date1`` or ``date2`` are not dates, the function returns the empty string.
|
||||
* ``divide(x, y)`` -- returns x / y. Throws an exception if either x or y are not numbers.
|
||||
* ``eval(string)`` -- evaluates the string as a program, passing the local variables (those ``assign`` ed to). This permits using the template processor to construct complex results from local variables. Because the `{` and `}` characters are special, you must use `[[` for the `{` character and `]]` for the '}' character; they are converted automatically. Note also that prefixes and suffixes (the "|prefix|suffix" syntax) cannot be used in the argument to this function when using template program mode.
|
||||
* ``eval(string)`` -- evaluates the string as a program, passing the local variables (those ``assign`` ed to). This permits using the template processor to construct complex results from local variables. Because the `{` and `}` characters are special, you must use `[[` for the `{` character and `]]` for the '}' character; they are converted automatically. Note also that prefixes and suffixes (the `|prefix|suffix` syntax) cannot be used in the argument to this function when using template program mode.
|
||||
* ``field(name)`` -- returns the metadata field named by ``name``.
|
||||
* ``first_non_empty(value, value, ...)`` -- returns the first value that is not empty. If all values are empty, then the empty value is returned. You can have as many values as you want.
|
||||
* ``format_date(x, date_format)`` -- format_date(val, format_string) -- format the value, which must be a date field, using the format_string, returning a string. The formatting codes are::
|
||||
@ -306,7 +306,7 @@ The following functions are available in addition to those described in single-f
|
||||
* ``substr(str, start, end)`` -- returns the ``start``'th through the ``end``'th characters of ``str``. The first character in ``str`` is the zero'th character. If end is negative, then it indicates that many characters counting from the right. If end is zero, then it indicates the last character. For example, ``substr('12345', 1, 0)`` returns ``'2345'``, and ``substr('12345', 1, -1)`` returns ``'234'``.
|
||||
* ``subtract(x, y)`` -- returns x - y. Throws an exception if either x or y are not numbers.
|
||||
* ``today()`` -- return a date string for today. This value is designed for use in format_date or days_between, but can be manipulated like any other string. The date is in ISO format.
|
||||
* ``template(x)`` -- evaluates x as a template. The evaluation is done in its own context, meaning that variables are not shared between the caller and the template evaluation. Because the `{` and `}` characters are special, you must use `[[` for the `{` character and `]]` for the '}' character; they are converted automatically. For example, ``template('[[title_sort]]') will evaluate the template ``{title_sort}`` and return its value. Note also that prefixes and suffixes (the "|prefix|suffix" syntax) cannot be used in the argument to this function when using template program mode.
|
||||
* ``template(x)`` -- evaluates x as a template. The evaluation is done in its own context, meaning that variables are not shared between the caller and the template evaluation. Because the `{` and `}` characters are special, you must use `[[` for the `{` character and `]]` for the '}' character; they are converted automatically. For example, ``template('[[title_sort]]') will evaluate the template ``{title_sort}`` and return its value. Note also that prefixes and suffixes (the `|prefix|suffix` syntax) cannot be used in the argument to this function when using template program mode.
|
||||
|
||||
.. _template_functions_reference:
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user