This commit is contained in:
GRiker 2012-01-05 06:35:23 -07:00
commit b6c715e1e9
20 changed files with 606 additions and 273 deletions

View File

@ -10,11 +10,11 @@ class Alternet(BasicNewsRecipe):
category = 'News, Magazine' category = 'News, Magazine'
description = 'News magazine and online community' description = 'News magazine and online community'
feeds = [ feeds = [
(u'Front Page', u'http://feeds.feedblitz.com/alternet'), (u'Front Page', u'http://feeds.feedblitz.com/alternet'),
(u'Breaking News', u'http://feeds.feedblitz.com/alternet_breaking_news'), (u'Breaking News', u'http://feeds.feedblitz.com/alternet_breaking_news'),
(u'Top Ten Campaigns', u'http://feeds.feedblitz.com/alternet_top_10_campaigns'), (u'Top Ten Campaigns', u'http://feeds.feedblitz.com/alternet_top_10_campaigns'),
(u'Special Coverage Areas', u'http://feeds.feedblitz.com/alternet_coverage') (u'Special Coverage Areas', u'http://feeds.feedblitz.com/alternet_coverage')
] ]
remove_attributes = ['width', 'align','cellspacing'] remove_attributes = ['width', 'align','cellspacing']
remove_javascript = True remove_javascript = True
use_embedded_content = False use_embedded_content = False
@ -36,3 +36,5 @@ class Alternet(BasicNewsRecipe):
self.temp_files[-1].write(html) self.temp_files[-1].write(html)
self.temp_files[-1].close() self.temp_files[-1].close()
return self.temp_files[-1].name return self.temp_files[-1].name
conversion_options = {'linearize_tables': True}

13
recipes/goal.recipe Normal file
View File

@ -0,0 +1,13 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1325677767(BasicNewsRecipe):
title = u'Goal'
oldest_article = 1
language = 'it'
max_articles_per_feed = 100
auto_cleanup = True
remove_tags_after = [dict(id='article_content')]
feeds = [(u'Goal', u'http://www.goal.com/it/feeds/news?fmt=rss')]
__author__ = 'faber1971'
description = 'Sports news from Italy'

View File

@ -1,79 +1,79 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2011, Attis <attis@attis.one.pl>' __copyright__ = '2011 Attis <attis@attis.one.pl>, 2012 Tomasz Długosz <tomek3d@gmail.com>'
__version__ = 'v. 0.1' __version__ = 'v. 0.1'
import re import re
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
class KopalniaWiedzy(BasicNewsRecipe): class KopalniaWiedzy(BasicNewsRecipe):
title = u'Kopalnia Wiedzy' title = u'Kopalnia Wiedzy'
publisher = u'Kopalnia Wiedzy' publisher = u'Kopalnia Wiedzy'
description = u'Ciekawostki ze świata nauki i techniki' description = u'Ciekawostki ze świata nauki i techniki'
encoding = 'utf-8' encoding = 'utf-8'
__author__ = 'Attis' __author__ = 'Attis & Tomasz Długosz'
language = 'pl' language = 'pl'
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 100
INDEX = u'http://kopalniawiedzy.pl/' INDEX = u'http://kopalniawiedzy.pl/'
remove_javascript = True remove_javascript = True
no_stylesheets = True no_stylesheets = True
remove_tags = [{'name':'p', 'attrs': {'class': 'keywords'} }, {'name':'div', 'attrs': {'class':'sexy-bookmarks sexy-bookmarks-bg-caring'}}] remove_tags = [{'name':'p', 'attrs': {'class': 'keywords'}}, {'name':'div', 'attrs': {'class':'sexy-bookmarks sexy-bookmarks-bg-caring'}}, {'name':'div', 'attrs': {'class':'article-time-and-cat'}}, {'name':'p', 'attrs': {'class':'tags'}}]
remove_tags_after = dict(attrs={'class':'ad-square'}) remove_tags_after = dict(attrs={'class':'ad-square'})
keep_only_tags = [dict(name="div", attrs={'id':'articleContent'})] keep_only_tags = [dict(name="div", attrs={'class':'article-text text-small'})]
extra_css = '.topimage {margin-top: 30px}' extra_css = '.topimage {margin-top: 30px}'
preprocess_regexps = [ preprocess_regexps = [
(re.compile(u'<a .* rel="lightboxText" .*><img (.*)></a>'), (re.compile(u'<a .* rel="lightboxText" .*><img (.*)></a>'),
lambda match: '<img class="topimage" ' + match.group(1) + '>' ), lambda match: '<img class="topimage" ' + match.group(1) + '>' ),
(re.compile(u'<br /><br />'), (re.compile(u'<br /><br />'),
lambda match: '<br\/>') lambda match: '<br\/>')
] ]
feeds = [ feeds = [
(u'Biologia', u'http://kopalniawiedzy.pl/wiadomosci_biologia.rss'), (u'Biologia', u'http://kopalniawiedzy.pl/wiadomosci_biologia.rss'),
(u'Medycyna', u'http://kopalniawiedzy.pl/wiadomosci_medycyna.rss'), (u'Medycyna', u'http://kopalniawiedzy.pl/wiadomosci_medycyna.rss'),
(u'Psychologia', u'http://kopalniawiedzy.pl/wiadomosci_psychologia.rss'), (u'Psychologia', u'http://kopalniawiedzy.pl/wiadomosci_psychologia.rss'),
(u'Technologie', u'http://kopalniawiedzy.pl/wiadomosci_technologie.rss'), (u'Technologie', u'http://kopalniawiedzy.pl/wiadomosci_technologie.rss'),
(u'Ciekawostki', u'http://kopalniawiedzy.pl/wiadomosci_ciekawostki.rss'), (u'Ciekawostki', u'http://kopalniawiedzy.pl/wiadomosci_ciekawostki.rss'),
(u'Artykuły', u'http://kopalniawiedzy.pl/artykuly.rss') (u'Artykuły', u'http://kopalniawiedzy.pl/artykuly.rss')
] ]
def is_link_wanted(self, url, tag): def is_link_wanted(self, url, tag):
return tag['class'] == 'next' return tag['class'] == 'next'
def remove_beyond(self, tag, next): def remove_beyond(self, tag, next):
while tag is not None and getattr(tag, 'name', None) != 'body': while tag is not None and getattr(tag, 'name', None) != 'body':
after = getattr(tag, next) after = getattr(tag, next)
while after is not None: while after is not None:
ns = getattr(tag, next) ns = getattr(tag, next)
after.extract() after.extract()
after = ns after = ns
tag = tag.parent tag = tag.parent
def append_page(self, soup, appendtag, position): def append_page(self, soup, appendtag, position):
pager = soup.find('a',attrs={'class':'next'}) pager = soup.find('a',attrs={'class':'next'})
if pager: if pager:
nexturl = self.INDEX + pager['href'] nexturl = self.INDEX + pager['href']
soup2 = self.index_to_soup(nexturl) soup2 = self.index_to_soup(nexturl)
texttag = soup2.find('div', attrs={'id':'articleContent'}) texttag = soup2.find('div', attrs={'id':'articleContent'})
tag = texttag.find(attrs={'class':'pages'}) tag = texttag.find(attrs={'class':'pages'})
self.remove_beyond(tag, 'nextSibling') self.remove_beyond(tag, 'nextSibling')
newpos = len(texttag.contents) newpos = len(texttag.contents)
self.append_page(soup2,texttag,newpos) self.append_page(soup2,texttag,newpos)
appendtag.insert(position,texttag) appendtag.insert(position,texttag)
def preprocess_html(self, soup): def preprocess_html(self, soup):
self.append_page(soup, soup.body, 3) self.append_page(soup, soup.body, 3)
for item in soup.findAll('div',attrs={'class':'pages'}): for item in soup.findAll('div',attrs={'class':'pages'}):
item.extract() item.extract()
for item in soup.findAll('p', attrs={'class':'wykop'}): for item in soup.findAll('p', attrs={'class':'wykop'}):
item.extract() item.extract()
return soup return soup

23
recipes/macity.recipe Normal file
View File

@ -0,0 +1,23 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1325766771(BasicNewsRecipe):
title = u'Macity'
language = 'it'
oldest_article = 7
max_articles_per_feed = 100
auto_cleanup = True
def get_article_url(self, article):
link = BasicNewsRecipe.get_article_url(self, article)
if link.split('/')[-1]=="story01.htm":
link=link.split('/')[-2]
a=['A', 'B', 'C', 'D', 'E', 'F', 'G', 'I', 'L' , 'N' , 'S' ]
b=['0', '.', '/', '?', '-', '=', '&', '_', 'http://', '.com', 'www.']
for i in range(0,len(a)):
link=link.replace('0'+a[-i],b[-i])
return link
feeds = [(u'Macity', u'http://www.macitynet.it.feedsportal.com/c/33714/f/599513/index.rss')]
__author__ = 'faber1971'
description = 'Apple and hi-tech news'

76
recipes/money_pl.recipe Normal file
View File

@ -0,0 +1,76 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe
class FocusRecipe(BasicNewsRecipe):
__license__ = 'GPL v3'
__author__ = u'intromatyk <intromatyk@gmail.com>'
language = 'pl'
version = 1
title = u'Money.pl'
category = u'News'
description = u'Informacje finansowe z kraju i ze świata. Aktualne i archiwalne: notowania giełdowe, kursy walut, wskaźniki gospodarcze.'
remove_empty_feeds= True
no_stylesheets=True
oldest_article = 1
max_articles_per_feed = 100000
recursions = 0
no_stylesheets = True
remove_javascript = True
simultaneous_downloads = 2
r = re.compile('.*(?P<url>http:\/\/(www.money.pl)|(rss.feedsportal.com\/c)\/.*\.html?).*')
keep_only_tags =[]
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'artykul'}))
remove_tags = [dict(name='ul', attrs={'class':'socialStuff'})]
extra_css = '''
body {font-family: Arial,Helvetica,sans-serif ;}
h1{text-align: left;}
h2{font-size: medium; font-weight: bold;}
p.lead {font-weight: bold; text-align: left;}
.authordate {font-size: small; color: #696969;}
.fot{font-size: x-small; color: #666666;}
'''
feeds = [
('Wiadomosci z kraju', 'http://money.pl.feedsportal.com/c/33900/f/612847/index.rss'),
('Wiadomosci ze swiata', 'http://money.pl.feedsportal.com/c/33900/f/612848/index.rss'),
('Gospodarka', 'http://money.pl.feedsportal.com/c/33900/f/612849/index.rss'),
('Waluty', 'http://money.pl.feedsportal.com/c/33900/f/612850/index.rss'),
('Gielda', 'http://money.pl.feedsportal.com/c/33900/f/612851/index.rss'),
('Banki', 'http://money.pl.feedsportal.com/c/33900/f/612852/index.rss'),
('Fundusze', 'http://money.pl.feedsportal.com/c/33900/f/612853/index.rss'),
('Emerytury', 'http://money.pl.feedsportal.com/c/33900/f/612854/index.rss'),
('Podatki', 'http://money.pl.feedsportal.com/c/33900/f/612855/index.rss'),
('Ubezpieczenia', 'http://money.pl.feedsportal.com/c/33900/f/612856/index.rss'),
('Poradniki', 'http://money.pl.feedsportal.com/c/33900/f/612857/index.rss'),
('Raporty', 'http://money.pl.feedsportal.com/c/33900/f/612858/index.rss'),
('Motoryzacja', 'http://money.pl.feedsportal.com/c/33900/f/612859/index.rss'),
('Manager', 'http://money.pl.feedsportal.com/c/33900/f/612860/index.rss'),
('Dla firm', 'http://money.pl.feedsportal.com/c/33900/f/612861/index.rss'),
('Prawo', 'http://money.pl.feedsportal.com/c/33900/f/612862/index.rss'),
('Nieruchomosci', 'http://money.pl.feedsportal.com/c/33900/f/612863/index.rss'),
('Praca', 'http://money.pl.feedsportal.com/c/33900/f/612864/index.rss'),
]
def print_version(self, url):
if url.count ('money.pl.feedsportal.com'):
u = url.find('0Cartykul0C')
u = 'http://www.m.money.pl/wiadomosci/artykul/' + url[u + 21:]
u = u.replace('0C', '/')
u = u.replace('A', '')
u = u.replace ('0E','-')
u = u.replace ('0P',';')
u = u.replace ('0H',',')
u = u.replace ('0B','.')
u = u.replace (',0,',',-1,')
u = u.replace('0Tutm0Isource0Frss0Gutm0Imedium0Frss0Gutm0Icampaign0Frss/story01.htm', '')
else:
u = url.replace('/nc/1','/do-druku/1')
return u

12
recipes/wired_it.recipe Normal file
View File

@ -0,0 +1,12 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1325758162(BasicNewsRecipe):
title = u'Wired'
language = 'it'
oldest_article = 7
max_articles_per_feed = 100
auto_cleanup = True
remove_tags_after = [dict(name='div', attrs={'class':'article_content'})]
feeds = [(u'Wired', u'http://www.wired.it/rss.xml')]
__author__ = 'faber1971'
description = 'An American magazine that reports on how new technology affects culture, the economy, and politics'

View File

@ -1410,19 +1410,22 @@ class MOBIFile(object): # {{{
self.mobi_header.extra_data_flags, decompress) for r in xrange(1, self.mobi_header.extra_data_flags, decompress) for r in xrange(1,
min(len(self.records), ntr+1))] min(len(self.records), ntr+1))]
self.image_records, self.binary_records = [], [] self.image_records, self.binary_records = [], []
image_index = 0
for i in xrange(fntbr, len(self.records)): for i in xrange(fntbr, len(self.records)):
if i in self.indexing_record_nums or i in self.huffman_record_nums: if i in self.indexing_record_nums or i in self.huffman_record_nums:
continue continue
image_index += 1
r = self.records[i] r = self.records[i]
fmt = None fmt = None
if i >= fii and r.raw[:4] not in (b'FLIS', b'FCIS', b'SRCS', if i >= fii and r.raw[:4] not in {b'FLIS', b'FCIS', b'SRCS',
b'\xe9\x8e\r\n'): b'\xe9\x8e\r\n', b'RESC', b'BOUN', b'FDST', b'DATP',
b'AUDI', b'VIDE'}:
try: try:
width, height, fmt = identify_data(r.raw) width, height, fmt = identify_data(r.raw)
except: except:
pass pass
if fmt is not None: if fmt is not None:
self.image_records.append(ImageRecord(len(self.image_records)+1, r, fmt)) self.image_records.append(ImageRecord(image_index, r, fmt))
else: else:
self.binary_records.append(BinaryRecord(i, r)) self.binary_records.append(BinaryRecord(i, r))

View File

@ -974,12 +974,13 @@ class MobiReader(object):
continue continue
processed_records.append(i) processed_records.append(i)
data = self.sections[i][0] data = self.sections[i][0]
image_index += 1
if data[:4] in {b'FLIS', b'FCIS', b'SRCS', b'\xe9\x8e\r\n', if data[:4] in {b'FLIS', b'FCIS', b'SRCS', b'\xe9\x8e\r\n',
b'RESC', b'BOUN', b'FDST', b'DATP', b'AUDI', b'VIDE'}: b'RESC', b'BOUN', b'FDST', b'DATP', b'AUDI', b'VIDE'}:
# A FLIS, FCIS, SRCS or EOF record, ignore # This record is a known non image type, not need to try to
# load the image
continue continue
buf = cStringIO.StringIO(data) buf = cStringIO.StringIO(data)
image_index += 1
try: try:
im = PILImage.open(buf) im = PILImage.open(buf)
im = im.convert('RGB') im = im.convert('RGB')

View File

@ -4,15 +4,24 @@
### ###
Copyright 2011, Kovid Goyal <kovid@kovidgoyal.net> Copyright 2011, Kovid Goyal <kovid@kovidgoyal.net>
Released under the GPLv3 License Released under the GPLv3 License
Based on code originally written by Peter Sorotkin (epubcfi.js) Based on code originally written by Peter Sorotkin
(http://code.google.com/p/epub-revision/source/browse/trunk/src/samples/cfi/epubcfi.js)
Improvements with respect to that code:
1. Works on all browsers (WebKit, Firefox and IE >= 8)
2. Works if the point is after the last text character in an element
3. Works for elements that are scrollable (i.e. have their own scrollbars)
To check if this script is compatible with the current browser, call
window.cfi.is_compatible() it will throw an exception if not compatible.
### ###
#
log = (error) -> log = (error) -> # {{{
if error if error
if window?.console?.log if window?.console?.log
window.console.log(error) window.console.log(error)
else if process?.stdout?.write else if process?.stdout?.write
process.stdout.write(error + '\n') process.stdout.write(error + '\n')
# }}}
# CFI escaping {{{ # CFI escaping {{{
escape_for_cfi = (raw) -> escape_for_cfi = (raw) ->
@ -51,12 +60,111 @@ fstr = (d) -> # {{{
ans ans
# }}} # }}}
get_current_time = (target) -> # {{{
ans = 0
if target.currentTime != undefined
ans = target.currentTime
fstr(ans)
# }}}
viewport_to_document = (x, y, doc) -> # {{{
win = doc.defaultView
x += win.scrollX
y += win.scrollY
if doc != window.document
# We are in a frame
node = win.frameElement
rect = node.getBoundingClientRect()
return viewport_to_document(rect.left, rect.top, node.ownerDocument)
return [x + win.scrollX, y + win.scrollY]
# }}}
# Equivalent for caretRangeFromPoint for non WebKit browsers {{{
range_has_point = (range, x, y) ->
for rect in range.getClientRects()
if (rect.left <= x <= rect.right) and (rect.top <= y <= rect.bottom)
return true
return false
offset_in_text_node = (node, range, x, y) ->
limits = [0, node.nodeValue.length]
while limits[0] != limits[1]
pivot = Math.floor( (limits[0] + limits[1]) / 2 )
lr = [limits[0], pivot]
rr = [pivot+1, limits[1]]
range.setStart(node, pivot)
range.setEnd(node, pivot+1)
if range_has_point(range, x, y)
return pivot
range.setStart(node, rr[0])
range.setEnd(node, rr[1])
if range_has_point(range, x, y)
limits = rr
continue
range.setStart(node, lr[0])
range.setEnd(node, lr[1])
if range_has_point(range, x, y)
limits = lr
continue
break
return limits[0]
find_offset_for_point = (x, y, node, cdoc) ->
range = cdoc.createRange()
child = node.firstChild
last_child = null
while child
if child.nodeType in [3, 4, 5, 6] and child.nodeValue?.length
range.setStart(child, 0)
range.setEnd(child, child.nodeValue.length)
if range_has_point(range, x, y)
return [child, offset_in_text_node(child, range, x, y)]
last_child = child
child = child.nextSibling
if not last_child
throw "#{node} has no children"
# The point must be after the last bit of text
pos = 0
return [last_child, last_child.nodeValue.length]
# }}}
class CanonicalFragmentIdentifier class CanonicalFragmentIdentifier
# This class is a namespace to expose CFI functions via the window.cfi # This class is a namespace to expose CFI functions via the window.cfi
# object # object
constructor: () -> constructor: () -> # {{{
this.CREATE_RANGE_ERR = "Your browser does not support the createRange function. Update it to a newer version."
this.IE_ERR = "Your browser is too old. You need Internet Explorer version 8 or newer."
# }}}
is_compatible: () -> # {{{
if not window.document.createRange
throw this.CREATE_RANGE_ERR
# Check if Internet Explorer >= 8 as getClientRects returns physical
# rather than logical pixels on older IE
div = document.createElement('div')
ver = 3
while true
div.innerHTML = "<!--[if gt IE #{ ++ver }]><i></i><![endif]-->"
if div.getElementsByTagName('i').length == 0
break
if ver > 4 and ver < 8
# We have IE < 8
throw this.IE_ERR
# }}}
set_current_time: (target, val) -> # {{{
if target.currentTime == undefined
return
if target.readyState == 4 or target.readyState == "complete"
target.currentTime = val
else
fn = -> target.currentTime = val
target.addEventListener("canplay", fn, false)
#}}}
encode: (doc, node, offset, tail) -> # {{{ encode: (doc, node, offset, tail) -> # {{{
cfi = tail or "" cfi = tail or ""
@ -64,7 +172,7 @@ class CanonicalFragmentIdentifier
# Handle the offset, if any # Handle the offset, if any
switch node.nodeType switch node.nodeType
when 1 # Element node when 1 # Element node
if typeoff(offset) == 'number' if typeof(offset) == 'number'
node = node.childNodes.item(offset) node = node.childNodes.item(offset)
when 3, 4, 5, 6 # Text/entity/CDATA node when 3, 4, 5, 6 # Text/entity/CDATA node
offset or= 0 offset or= 0
@ -89,12 +197,12 @@ class CanonicalFragmentIdentifier
cfi = "!" + cfi cfi = "!" + cfi
continue continue
break break
# Increase index by the length of all previous sibling text nodes # Find position of node in parent
index = 0 index = 0
child = p.firstChild child = p.firstChild
while true while true
index |= 1 index |= 1 # Increment index by 1 if it is even
if child.nodeType in [1, 7] if child.nodeType == 1
index++ index++
if child == node if child == node
break break
@ -117,8 +225,8 @@ class CanonicalFragmentIdentifier
error = null error = null
node = doc node = doc
until cfi.length <= 0 or error until cfi.length < 1 or error
if ( (r = cfi.match(simple_node_regex)) is not null ) # Path step if (r = cfi.match(simple_node_regex)) # Path step
target = parseInt(r[1]) target = parseInt(r[1])
assertion = r[2] assertion = r[2]
if assertion if assertion
@ -136,11 +244,18 @@ class CanonicalFragmentIdentifier
error = "No matching child found for CFI: " + cfi error = "No matching child found for CFI: " + cfi
break break
index |= 1 # Increment index by 1 if it is even index |= 1 # Increment index by 1 if it is even
if child.nodeType in [1, 7] # We have an element or a PI if child.nodeType == 1
index++ index++
if ( index == target ) if ( index == target )
cfi = cfi.substr(r[0].length) cfi = cfi.substr(r[0].length)
node = child node = child
if assertion and node.id != assertion
# The found child does not match the id assertion,
# trust the id assertion if an element with that id
# exists
child = doc.getElementById(assertion)
if child
node = child
break break
child = child.nextSibling child = child.nextSibling
@ -198,7 +313,9 @@ class CanonicalFragmentIdentifier
next = false next = false
while true while true
nn = node.nextSibling nn = node.nextSibling
if nn.nodeType in [3, 4, 5, 6] # Text node, entity, cdata if not nn
break
if nn.nodeType in [3, 4, 5, 6] and nn.nodeValue?.length # Text node, entity, cdata
next = nn next = nn
break break
if not next if not next
@ -253,7 +370,7 @@ class CanonicalFragmentIdentifier
(if target.parentNode then target.parentNode else target).normalize() (if target.parentNode then target.parentNode else target).normalize()
if name in ['audio', 'video'] if name in ['audio', 'video']
tail = "~" + fstr target.currentTime tail = "~" + get_current_time(target)
if name in ['img', 'video'] if name in ['img', 'video']
px = ((x + cwin.scrollX - target.offsetLeft)*100)/target.offsetWidth px = ((x + cwin.scrollX - target.offsetLeft)*100)/target.offsetWidth
@ -265,9 +382,12 @@ class CanonicalFragmentIdentifier
if range if range
target = range.startContainer target = range.startContainer
offset = range.startOffset offset = range.startOffset
else
throw "Failed to find range from point (#{ x }, #{ y })"
else if cdoc.createRange
[target, offset] = find_offset_for_point(x, y, target, cdoc)
else else
# TODO: implement a span bisection algorithm for UAs throw this.CREATE_RANGE_ERR
# without caretRangeFromPoint (Gecko, IE)
this.encode(doc, target, offset, tail) this.encode(doc, target, offset, tail)
# }}} # }}}
@ -285,52 +405,102 @@ class CanonicalFragmentIdentifier
nwin = ndoc.defaultView nwin = ndoc.defaultView
x = null x = null
y = null y = null
range = null
if typeof(r.offset) == "number" if typeof(r.offset) == "number"
# Character offset # Character offset
if not ndoc.createRange
throw this.CREATE_RANGE_ERR
range = ndoc.createRange() range = ndoc.createRange()
if r.forward if r.forward
try_list = [{start:0, end:0, a:0.5}, {start:0, end:1, a:1}, {start:-1, end:0, a:0}] try_list = [{start:0, end:0, a:0.5}, {start:0, end:1, a:1}, {start:-1, end:0, a:0}]
else else
try_list = [{start:0, end:0, a:0.5}, {start:-1, end:0, a:0}, {start:0, end:1, a:1}] try_list = [{start:0, end:0, a:0.5}, {start:-1, end:0, a:0}, {start:0, end:1, a:1}]
k = 0
a = null a = null
rects = null rects = null
node_len = node.nodeValue.length node_len = node.nodeValue.length
until rects or rects.length or k >= try_list.length offset = r.offset
t = try_list[k++] for i in [0, 1]
start_offset = r.offset + t.start # Try reducing the offset by 1 if we get no match as if it refers to the position after the
end_offset = r.offset + t.end # last character we wont get a match with getClientRects
a = t.a offset = r.offset - i
if start_offset < 0 or end_offset >= node_len if offset < 0
continue offset = 0
range.setStart(node, start_offset) k = 0
range.setEnd(node, end_offset) until rects?.length or k >= try_list.length
rects = range.getClientRects() t = try_list[k++]
start_offset = offset + t.start
end_offset = offset + t.end
a = t.a
if start_offset < 0 or end_offset >= node_len
continue
range.setStart(node, start_offset)
range.setEnd(node, end_offset)
rects = range.getClientRects()
if rects?.length
break
if not rects or not rects.length
if not rects?.length
log("Could not find caret position: rects: #{ rects } offset: #{ r.offset }") log("Could not find caret position: rects: #{ rects } offset: #{ r.offset }")
return null return null
rect = rects[0]
x = (a*rect.left + (1-a)*rect.right)
y = (rect.top + rect.bottom)/2
else else
x = node.offsetLeft - nwin.scrollX [x, y] = [r.x, r.y]
y = node.offsetTop - nwin.scrollY
if typeof(r.x) == "number" and node.offsetWidth
x += (r.x*node.offsetWidth)/100
y += (r.y*node.offsetHeight)/100
until ndoc == doc {x:x, y:y, node:r.node, time:r.time, range:range, a:a}
node = nwin.frameElement
# }}}
scroll_to: (cfi, callback=false, doc=window?.document) -> # {{{
point = this.point(cfi, doc)
if not point
log("No point found for cfi: #{ cfi }")
return
if typeof point.time == 'number'
this.set_current_time(point.node, point.time)
if point.range != null
r = point.range
node = r.startContainer
ndoc = node.ownerDocument ndoc = node.ownerDocument
nwin = ndoc.defaultView nwin = ndoc.defaultView
x += node.offsetLeft - nwin.scrollX span = ndoc.createElement('span')
y += node.offsetTop - nwin.scrollY span.setAttribute('style', 'border-width: 0; padding: 0; margin: 0')
r.surroundContents(span)
span.scrollIntoView()
fn = ->
rect = span.getBoundingClientRect()
x = (point.a*rect.left + (1-point.a)*rect.right)
y = (rect.top + rect.bottom)/2
[x, y] = viewport_to_document(x, y, ndoc)
tn = if span.firstChild then span.firstChild.nodeValue else ''
tn = ndoc.createTextNode(tn)
p = span.parentNode
p.insertBefore(tn, span)
p.removeChild(span)
p.normalize()
if callback
callback(x, y)
else
node = point.node
nwin = node.ownerDocument.defaultView
node.scrollIntoView()
{x:x, y:y, node:r.node, time:r.time} fn = ->
rect = node.getBoundingClientRect()
[x, y] = viewport_to_document(rect.left, rect.top, node.ownerDocument)
if typeof(point.x) == 'number' and node.offsetWidth
x += (r.x*node.offsetWidth)/100
if typeof(point.y) == 'number' and node.offsetHeight
y += (r.y*node.offsetHeight)/100
scrollTo(x, y)
if callback
callback(x, y)
setTimeout(fn, 10)
null
# }}} # }}}
if window? if window?

View File

@ -6,19 +6,53 @@
Released under the GPLv3 License Released under the GPLv3 License
### ###
viewport_top = (node) -> log = (error) ->
$(node).offset().top - window.pageYOffset if error
if window?.console?.log
window.console.log(error)
else if process?.stdout?.write
process.stdout.write(error + '\n')
viewport_left = (node) -> show_cfi = () ->
$(node).offset().left - window.pageXOffset if window.current_cfi
fn = (x, y) ->
ms = document.getElementById("marker").style
ms.display = 'block'
ms.top = y - 30 + 'px'
ms.left = x - 1 + 'px'
window.cfi.scroll_to(window.current_cfi, fn)
null
mark_and_reload = (evt) ->
# Remove image in case the click was on the image itself, we want the cfi to
# be on the underlying element
ms = document.getElementById("marker")
ms.parentNode.removeChild(ms)
fn = () ->
window.current_cfi = window.cfi.at(evt.clientX, evt.clientY)
if window.current_cfi
epubcfi = "#epubcfi(#{ window.current_cfi })"
newloc = window.location.href.replace(/#.*$/, '') + epubcfi
window.location.replace(newloc)
document.getElementById('current-cfi').innerHTML = window.current_cfi
window.location.reload()
setTimeout(fn, 1)
null
window.onload = -> window.onload = ->
h1 = document.getElementsByTagName('h1')[0] try
x = h1.scrollLeft + 150 window.cfi.is_compatible()
y = viewport_top(h1) + h1.offsetHeight/2 catch error
e = document.elementFromPoint x, y alert(error)
if e.getAttribute('id') != 'first-h1'
alert 'Failed to find top h1'
return return
alert window.cfi.at x, y document.onclick = mark_and_reload
r = location.hash.match(/#epubcfi\((.+)\)$/)
if r
window.current_cfi = r[1]
document.getElementById('current-cfi').innerHTML = window.current_cfi
setTimeout(show_cfi, 100)
null

View File

@ -0,0 +1,65 @@
<!DOCTYPE html>
<html>
<head>
<title>Testing CFI functionality</title>
<script type="text/javascript" src="cfi.coffee"></script>
<script type="text/javascript" src="cfi-test.coffee"></script>
<style type="text/css">
body { font-family: sans-serif }
h2 {
border-top: solid 2px black;
margin-top: 4ex;
}
#container {
max-width: 30em;
margin-right: auto;
margin-left: 2em;
position:relative;
}
#current-cfi {
font-family: monospace;
border: solid 1px blue;
padding: 1em;
}
#overflow {
max-height: 100px;
overflow: scroll;
border: solid 1px black;
}
</style>
</head>
<body>
<div id="container">
<h1 id="first-h1">Testing EPUB CFI</h1>
<div id="current-cfi">Current CFI:&nbsp;None</div>
<h2>A div with scrollbars</h2>
<div id="overflow"> But I must explain to you how all this mistaken
idea of denouncing pleasure and praising pain was born and I
will give you a complete account of the system, and expound the
actual teachings of the great explorer of the truth, the
master-builder of human happiness. No one rejects, dislikes, or
avoids pleasure itself, because it is pleasure, but because
those who do not know how to pursue pleasure rationally
encounter consequences that are extremely painful. Nor again is
there anyone who <b>loves</b> or pursues or desires to obtain pain of
itself, because it is pain, but because occasionally
circumstances occur in which toil and pain can procure him some
great pleasure. To take a trivial example, which of us ever
undertakes laborious physical exercise, except to obtain some
advantage from it? But who has any right to find fault with a
man who chooses to enjoy a pleasure that has no annoying
consequences, or one who avoids a pain that produces no
resultant pleasure? On the other hand, we denounce with
righteous indignation and dislike men who are so beguiled and
demoralized by the charms of pleasure of the moment, so blinded
by desire, that they cannot foresee
</div>
</div>
<img id="marker" style="position: absolute; display:none; z-index:10" src="marker.png" alt="Marker" />
</body>
</html>

Binary file not shown.

After

Width:  |  Height:  |  Size: 751 B

View File

@ -1,14 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<title>Testing CFI functionality</title>
<script type="text/javascript" src="../cfi.coffee"></script>
<script type="text/javascript" src="jquery.js"></script>
<script type="text/javascript" src="cfi-test.coffee"></script>
</head>
<body>
<h1 id="first-h1" style="border: solid 1px red">Testing CFI functionality</h1>
</body>
</html>

View File

@ -16,10 +16,9 @@ except ImportError:
if False: init_calibre, serve if False: init_calibre, serve
from calibre.utils.coffeescript import serve from calibre.utils.coffeescript import serve
def run_devel_server(): def run_devel_server():
os.chdir(os.path.dirname(os.path.abspath(__file__))) os.chdir(os.path.dirname(os.path.abspath(__file__)))
serve() serve(resources={'cfi.coffee':'../cfi.coffee'})
if __name__ == '__main__': if __name__ == '__main__':
run_devel_server() run_devel_server()

View File

@ -80,7 +80,7 @@ class PML_HTMLizer(object):
'b': ('<span style="font-weight: bold;">', '</span>'), 'b': ('<span style="font-weight: bold;">', '</span>'),
'l': ('<span style="font-size: 150%;">', '</span>'), 'l': ('<span style="font-size: 150%;">', '</span>'),
'k': ('<span style="font-size: 75%; font-variant: small-caps;">', '</span>'), 'k': ('<span style="font-size: 75%; font-variant: small-caps;">', '</span>'),
'FN': ('<br /><br style="page-break-after: always;" /><div id="fn-%s"><p>', '</p><<small><a href="#rfn-%s">return</a></small></div>'), 'FN': ('<br /><br style="page-break-after: always;" /><div id="fn-%s"><p>', '</p><small><a href="#rfn-%s">return</a></small></div>'),
'SB': ('<br /><br style="page-break-after: always;" /><div id="sb-%s"><p>', '</p><small><a href="#rsb-%s">return</a></small></div>'), 'SB': ('<br /><br style="page-break-after: always;" /><div id="sb-%s"><p>', '</p><small><a href="#rsb-%s">return</a></small></div>'),
} }

View File

@ -14,7 +14,7 @@ from calibre.ebooks.metadata.opf2 import OPF
from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata import MetaInformation
from calibre.constants import preferred_encoding, filesystem_encoding, DEBUG from calibre.constants import preferred_encoding, filesystem_encoding, DEBUG
from calibre.utils.config import prefs from calibre.utils.config import prefs
from calibre import prints from calibre import prints, force_unicode, as_unicode
single_shot = partial(QTimer.singleShot, 75) single_shot = partial(QTimer.singleShot, 75)
@ -66,7 +66,8 @@ class RecursiveFind(QThread): # {{{
if self.canceled: if self.canceled:
return return
self.update.emit( self.update.emit(
_('Searching in')+' '+dirpath[0]) _('Searching in')+' '+force_unicode(dirpath[0],
filesystem_encoding))
self.books += list(self.db.find_books_in_directory(dirpath[0], self.books += list(self.db.find_books_in_directory(dirpath[0],
self.single_book_per_directory)) self.single_book_per_directory))
@ -82,10 +83,7 @@ class RecursiveFind(QThread): # {{{
except Exception as err: except Exception as err:
import traceback import traceback
traceback.print_exc() traceback.print_exc()
try: msg = as_unicode(err)
msg = unicode(err)
except:
msg = repr(err)
self.found.emit(msg) self.found.emit(msg)
return return

View File

@ -12,14 +12,13 @@ from PyQt4.Qt import (QSize, QSizePolicy, QUrl, SIGNAL, Qt, QTimer,
QPainter, QPalette, QBrush, QFontDatabase, QDialog, QPainter, QPalette, QBrush, QFontDatabase, QDialog,
QColor, QPoint, QImage, QRegion, QVariant, QIcon, QColor, QPoint, QImage, QRegion, QVariant, QIcon,
QFont, pyqtSignature, QAction, QByteArray, QMenu, QFont, pyqtSignature, QAction, QByteArray, QMenu,
pyqtSignal) pyqtSignal, QSwipeGesture)
from PyQt4.QtWebKit import QWebPage, QWebView, QWebSettings from PyQt4.QtWebKit import QWebPage, QWebView, QWebSettings
from calibre.utils.config import Config, StringConfig from calibre.utils.config import Config, StringConfig
from calibre.utils.localization import get_language from calibre.utils.localization import get_language
from calibre.gui2.viewer.config_ui import Ui_Dialog from calibre.gui2.viewer.config_ui import Ui_Dialog
from calibre.gui2.viewer.flip import SlideFlip from calibre.gui2.viewer.flip import SlideFlip
from calibre.gui2.viewer.gestures import Gestures
from calibre.gui2.shortcuts import Shortcuts, ShortcutConfig from calibre.gui2.shortcuts import Shortcuts, ShortcutConfig
from calibre.constants import iswindows from calibre.constants import iswindows
from calibre import prints, guess_type from calibre import prints, guess_type
@ -514,7 +513,6 @@ class DocumentView(QWebView): # {{{
def __init__(self, *args): def __init__(self, *args):
QWebView.__init__(self, *args) QWebView.__init__(self, *args)
self.flipper = SlideFlip(self) self.flipper = SlideFlip(self)
self.gestures = Gestures()
self.is_auto_repeat_event = False self.is_auto_repeat_event = False
self.debug_javascript = False self.debug_javascript = False
self.shortcuts = Shortcuts(SHORTCUTS, 'shortcuts/viewer') self.shortcuts = Shortcuts(SHORTCUTS, 'shortcuts/viewer')
@ -582,6 +580,7 @@ class DocumentView(QWebView): # {{{
else: else:
m.addAction(name, a[key], self.shortcuts.get_sequences(key)[0]) m.addAction(name, a[key], self.shortcuts.get_sequences(key)[0])
self.goto_location_action.setMenu(self.goto_location_menu) self.goto_location_action.setMenu(self.goto_location_menu)
self.grabGesture(Qt.SwipeGesture)
def goto_next_section(self, *args): def goto_next_section(self, *args):
if self.manager is not None: if self.manager is not None:
@ -1047,28 +1046,24 @@ class DocumentView(QWebView): # {{{
self.manager.viewport_resized(self.scroll_fraction) self.manager.viewport_resized(self.scroll_fraction)
def event(self, ev): def event(self, ev):
typ = ev.type() if ev.type() == ev.Gesture:
if typ == ev.TouchBegin: swipe = ev.gesture(Qt.SwipeGesture)
try: if swipe is not None:
self.gestures.start_gesture('touch', ev) self.handle_swipe(swipe)
except:
import traceback
traceback.print_exc()
elif typ == ev.TouchEnd:
try:
gesture = self.gestures.end_gesture('touch', ev, self.rect())
except:
import traceback
traceback.print_exc()
if gesture is not None:
ev.accept()
if gesture == 'lineleft':
self.next_page()
elif gesture == 'lineright':
self.previous_page()
return True return True
return QWebView.event(self, ev) return QWebView.event(self, ev)
def handle_swipe(self, swipe):
if swipe.state() == Qt.GestureFinished:
if swipe.horizontalDirection() == QSwipeGesture.Left:
self.previous_page()
elif swipe.horizontalDirection() == QSwipeGesture.Right:
self.next_page()
elif swipe.verticalDirection() == QSwipeGesture.Up:
self.goto_previous_section()
elif swipe.horizontalDirection() == QSwipeGesture.Down:
self.goto_next_section()
def mouseReleaseEvent(self, ev): def mouseReleaseEvent(self, ev):
opos = self.document.ypos opos = self.document.ypos
ret = QWebView.mouseReleaseEvent(self, ev) ret = QWebView.mouseReleaseEvent(self, ev)

View File

@ -1,61 +0,0 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
__license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import time
class Gestures(object):
def __init__(self):
self.in_progress = {}
def get_boundary_point(self, event):
t = time.time()
id_ = None
if hasattr(event, 'touchPoints'):
tps = list(event.touchPoints())
tp = None
for t in tps:
if t.isPrimary():
tp = t
break
if tp is None:
tp = tps[0]
gp, p = tp.screenPos(), tp.pos()
id_ = tp.id()
else:
gp, p = event.globalPos(), event.pos()
return (t, gp, p, id_)
def start_gesture(self, typ, event):
self.in_progress[typ] = self.get_boundary_point(event)
def is_in_progress(self, typ):
return typ in self.in_progress
def end_gesture(self, typ, event, widget_rect):
if not self.is_in_progress(typ):
return
start = self.in_progress[typ]
end = self.get_boundary_point(event)
if start[3] != end[3]:
return
timespan = end[0] - start[0]
start_pos, end_pos = start[1], end[1]
xspan = end_pos.x() - start_pos.x()
yspan = end_pos.y() - start_pos.y()
width = widget_rect.width()
if timespan < 1.1 and abs(xspan) >= width/5. and \
abs(yspan) < abs(xspan)/5.:
# Quick horizontal gesture
return 'line'+('left' if xspan < 0 else 'right')
return None

View File

@ -11,7 +11,11 @@ from cookielib import CookieJar
from mechanize import Browser as B from mechanize import Browser as B
class Browser(B): class Browser(B):
'A cloneable mechanize browser' '''
A cloneable mechanize browser. Useful for multithreading. The idea is that
each thread has a browser clone. Every clone uses the same thread safe
cookie jar. All clones share the same browser configuration.
'''
def __init__(self): def __init__(self):
self._clone_actions = {} self._clone_actions = {}

View File

@ -11,16 +11,32 @@ __docformat__ = 'restructuredtext en'
Utilities to help with developing coffeescript based apps Utilities to help with developing coffeescript based apps
''' '''
import time, SimpleHTTPServer, SocketServer, os, subprocess import time, SimpleHTTPServer, SocketServer, os, subprocess
from io import BytesIO
class Handler(SimpleHTTPServer.SimpleHTTPRequestHandler): class Handler(SimpleHTTPServer.SimpleHTTPRequestHandler):
generated_files = set() special_resources = {}
compiled_cs = {}
def send_head(self):
path = self.path
if path.endswith('.coffee'):
path = path[1:] if path.startswith('/') else path
path = self.special_resources.get(path, path)
raw, mtime = self.compile_coffeescript(path)
self.send_response(200)
self.send_header("Content-type", b'text/javascript')
self.send_header("Content-Length", bytes(len(raw)))
self.send_header("Last-Modified", self.date_time_string(int(mtime)))
self.end_headers()
return BytesIO(raw)
return SimpleHTTPServer.SimpleHTTPRequestHandler.send_head(self)
def translate_path(self, path): def translate_path(self, path):
if path.endswith('jquery.js'): path = self.special_resources.get(path, path)
if path.endswith('/jquery.js'):
return P('content_server/jquery.js') return P('content_server/jquery.js')
if path.endswith('.coffee'):
return self.compile_coffeescript(path[1:])
return SimpleHTTPServer.SimpleHTTPRequestHandler.translate_path(self, return SimpleHTTPServer.SimpleHTTPRequestHandler.translate_path(self,
path) path)
@ -31,36 +47,33 @@ class Handler(SimpleHTTPServer.SimpleHTTPRequestHandler):
except: except:
time.sleep(0.01) time.sleep(0.01)
sstat = os.stat(src) sstat = os.stat(src)
return (not os.access(dest, os.R_OK) or sstat.st_mtime > return sstat.st_mtime > dest
os.stat(dest).st_mtime)
def compile_coffeescript(self, src): def compile_coffeescript(self, src):
dest = os.path.splitext(src)[0] + '.js' raw, mtime = self.compiled_cs.get(src, (None, 0))
self.generated_files.add(dest) if self.newer(src, mtime):
if self.newer(src, dest): mtime = time.time()
with open(dest, 'wb') as f: try:
try: raw = subprocess.check_output(['coffee', '-c', '-p', src])
subprocess.check_call(['coffee', '-c', '-p', src], stdout=f) except:
except: print('Compilation of %s failed'%src)
print('Compilation of %s failed'%src) cs = '''
f.seek(0) // Compilation of coffeescript failed
f.truncate() alert("Compilation of %s failed");
f.write('// Compilation of coffeescript failed') '''%src
f.write('alert("Compilation of %s failed");'%src) raw = cs.encode('utf-8')
return dest self.compiled_cs[src] = (raw, mtime)
return raw, mtime
def serve(port=8000): class HTTPD(SocketServer.TCPServer):
httpd = SocketServer.TCPServer(('localhost', port), Handler) allow_reuse_address = True
def serve(resources={}, port=8000):
Handler.special_resources = resources
httpd = HTTPD(('0.0.0.0', port), Handler)
print('serving at localhost:%d'%port) print('serving at localhost:%d'%port)
try: try:
try: httpd.serve_forever()
httpd.serve_forever() except KeyboardInterrupt:
except KeyboardInterrupt: raise SystemExit(0)
raise SystemExit(0)
finally:
for x in Handler.generated_files:
try:
os.remove(x)
except:
pass