mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge from trunk
This commit is contained in:
commit
6bdb3232cf
89
resources/recipes/modoros.recipe
Normal file
89
resources/recipes/modoros.recipe
Normal file
@ -0,0 +1,89 @@
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.constants import config_dir, CONFIG_DIR_MODE
|
||||
import os, os.path, urllib
|
||||
from hashlib import md5
|
||||
|
||||
class ModorosBlogHu(BasicNewsRecipe):
|
||||
__author__ = 'Zsolt Botykai'
|
||||
title = u'Modoros Blog'
|
||||
description = u"Modoros.blog.hu"
|
||||
oldest_article = 10000
|
||||
max_articles_per_feed = 10000
|
||||
reverse_article_order = True
|
||||
language = 'hu'
|
||||
remove_javascript = True
|
||||
remove_empty_feeds = True
|
||||
no_stylesheets = True
|
||||
feeds = [(u'Modoros Blog', u'http://modoros.blog.hu/rss')]
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'<!--megosztas -->.*?</body>', re.DOTALL|re.IGNORECASE),
|
||||
lambda match: '</body>'),
|
||||
(re.compile(r'<p align="left"'), lambda m: '<p'),
|
||||
(re.compile(r'<noscript.+?noscript>', re.DOTALL|re.IGNORECASE), lambda m: ''),
|
||||
(re.compile(r'<img style="position: absolute;top:-10px.+?>', re.DOTALL|re.IGNORECASE), lambda m: ''),
|
||||
(re.compile(r'<p>( | )*?</p>', re.DOTALL|re.IGNORECASE), lambda match: ''),
|
||||
]
|
||||
extra_css = '''
|
||||
body { background-color: white; color: black }
|
||||
'''
|
||||
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id':['csucs']}) ,
|
||||
dict(name='img', attrs={'style':['position: absolute;top:-10px;left:-10px;']}) ,
|
||||
dict(name='div', attrs={'class':['tovabb-is-van', \
|
||||
'page-break', \
|
||||
'clear']}) ,
|
||||
dict(name='span', attrs={'class':['hozzaszolas-szamlalo']})
|
||||
]
|
||||
|
||||
masthead_url='http://modoros.blog.hu/media/skins/modoros-neon/img/modorosblog-felirat.png'
|
||||
|
||||
def get_cover_url(self):
|
||||
return 'http://modoros.blog.hu/media/skins/modoros-neon/img/modorosblog-felirat.png'
|
||||
|
||||
# As seen here: http://www.mobileread.com/forums/showpost.php?p=1295505&postcount=10
|
||||
def parse_feeds(self):
|
||||
recipe_dir = os.path.join(config_dir,'recipes')
|
||||
hash_dir = os.path.join(recipe_dir,'recipe_storage')
|
||||
feed_dir = os.path.join(hash_dir,self.title.encode('utf-8').replace('/',':'))
|
||||
if not os.path.isdir(feed_dir):
|
||||
os.makedirs(feed_dir,mode=CONFIG_DIR_MODE)
|
||||
|
||||
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||
|
||||
for feed in feeds:
|
||||
feed_hash = urllib.quote(feed.title.encode('utf-8'),safe='')
|
||||
feed_fn = os.path.join(feed_dir,feed_hash)
|
||||
|
||||
past_items = set()
|
||||
if os.path.exists(feed_fn):
|
||||
with file(feed_fn) as f:
|
||||
for h in f:
|
||||
past_items.add(h.strip())
|
||||
|
||||
cur_items = set()
|
||||
for article in feed.articles[:]:
|
||||
item_hash = md5()
|
||||
if article.content: item_hash.update(article.content.encode('utf-8'))
|
||||
if article.summary: item_hash.update(article.summary.encode('utf-8'))
|
||||
item_hash = item_hash.hexdigest()
|
||||
if article.url:
|
||||
item_hash = article.url + ':' + item_hash
|
||||
cur_items.add(item_hash)
|
||||
if item_hash in past_items:
|
||||
feed.articles.remove(article)
|
||||
with file(feed_fn,'w') as f:
|
||||
for h in cur_items:
|
||||
f.write(h+'\n')
|
||||
|
||||
remove = [f for f in feeds if len(f) == 0 and
|
||||
self.remove_empty_feeds]
|
||||
for f in remove:
|
||||
feeds.remove(f)
|
||||
|
||||
return feeds
|
||||
|
109
resources/recipes/office_space.recipe
Normal file
109
resources/recipes/office_space.recipe
Normal file
@ -0,0 +1,109 @@
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.constants import config_dir, CONFIG_DIR_MODE
|
||||
import os, os.path, urllib
|
||||
from hashlib import md5
|
||||
|
||||
class OfficeSpaceBlogHu(BasicNewsRecipe):
|
||||
__author__ = 'Zsolt Botykai'
|
||||
title = u'Office Space Blog'
|
||||
description = u"officespace.blog.hu"
|
||||
oldest_article = 10000
|
||||
max_articles_per_feed = 10000
|
||||
reverse_article_order = True
|
||||
language = 'hu'
|
||||
remove_javascript = True
|
||||
remove_empty_feeds = True
|
||||
no_stylesheets = True
|
||||
feeds = [(u'Office Space Blog', u'http://officespace.blog.hu/rss')]
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
title = u'Irodai patkényok'
|
||||
feeds = [(u'Office Space', u'http://officespace.blog.hu/rss')]
|
||||
|
||||
masthead_url='http://m.blog.hu/of/officespace/ipfejlec7.jpg'
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':['mainWrapper']})
|
||||
]
|
||||
|
||||
# 1.: I like justified lines more
|
||||
# 2.: remove empty paragraphs
|
||||
# 3.: drop header and sidebar
|
||||
# 4.: drop comments counter
|
||||
# 5.: drop everything after article-tags
|
||||
# 6-8.: drop audit images
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'<p align="left"'), lambda m: '<p'),
|
||||
(re.compile(r'<p>( | )*?</p>', re.DOTALL|re.IGNORECASE), lambda match: ''),
|
||||
(re.compile(r'<body[^>]+>.*?<div id="mainIn"', re.DOTALL|re.IGNORECASE), lambda match: '<body><div id="mainIn"'),
|
||||
(re.compile(r'<h3 class="comments">.*?</h3>', re.DOTALL|re.IGNORECASE), lambda match: ''),
|
||||
(re.compile(r'<div class="related">.*?</body>', re.DOTALL|re.IGNORECASE), lambda match: '<body>'),
|
||||
(re.compile(r'<img style="position: absolute;" src="[^"]+pixel\?uc.*?>', re.DOTALL|re.IGNORECASE), lambda match: ''),
|
||||
(re.compile(r'<noscript.+?noscript>', re.DOTALL|re.IGNORECASE), lambda m: ''),
|
||||
(re.compile(r'<img style="position: absolute;top:-10px.+?>', re.DOTALL|re.IGNORECASE), lambda m: ''),
|
||||
]
|
||||
extra_css = '''
|
||||
body { background-color: white; color: black }
|
||||
'''
|
||||
|
||||
def get_cover_url(self):
|
||||
return 'http://m.blog.hu/of/officespace/ipfejlec7.jpg'
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for tagz in soup.findAll('h3', attrs={'class':'tags'}):
|
||||
for taglink in tagz.findAll('a'):
|
||||
if taglink.string is not None:
|
||||
tstr = taglink.string + ','
|
||||
taglink.replaceWith(tstr)
|
||||
|
||||
for alink in soup.findAll('a'):
|
||||
if alink.string is not None:
|
||||
tstr = alink.string
|
||||
alink.replaceWith(tstr)
|
||||
|
||||
return soup
|
||||
|
||||
# As seen here: http://www.mobileread.com/forums/showpost.php?p=1295505&postcount=10
|
||||
def parse_feeds(self):
|
||||
recipe_dir = os.path.join(config_dir,'recipes')
|
||||
hash_dir = os.path.join(recipe_dir,'recipe_storage')
|
||||
feed_dir = os.path.join(hash_dir,self.title.encode('utf-8').replace('/',':'))
|
||||
if not os.path.isdir(feed_dir):
|
||||
os.makedirs(feed_dir,mode=CONFIG_DIR_MODE)
|
||||
|
||||
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||
|
||||
for feed in feeds:
|
||||
feed_hash = urllib.quote(feed.title.encode('utf-8'),safe='')
|
||||
feed_fn = os.path.join(feed_dir,feed_hash)
|
||||
|
||||
past_items = set()
|
||||
if os.path.exists(feed_fn):
|
||||
with file(feed_fn) as f:
|
||||
for h in f:
|
||||
past_items.add(h.strip())
|
||||
|
||||
cur_items = set()
|
||||
for article in feed.articles[:]:
|
||||
item_hash = md5()
|
||||
if article.content: item_hash.update(article.content.encode('utf-8'))
|
||||
if article.summary: item_hash.update(article.summary.encode('utf-8'))
|
||||
item_hash = item_hash.hexdigest()
|
||||
if article.url:
|
||||
item_hash = article.url + ':' + item_hash
|
||||
cur_items.add(item_hash)
|
||||
if item_hash in past_items:
|
||||
feed.articles.remove(article)
|
||||
with file(feed_fn,'w') as f:
|
||||
for h in cur_items:
|
||||
f.write(h+'\n')
|
||||
|
||||
remove = [f for f in feeds if len(f) == 0 and
|
||||
self.remove_empty_feeds]
|
||||
for f in remove:
|
||||
feeds.remove(f)
|
||||
|
||||
return feeds
|
||||
|
@ -115,6 +115,8 @@ class KOBO(USBMS):
|
||||
playlist_map[lpath]= "Im_Reading"
|
||||
elif readstatus == 2:
|
||||
playlist_map[lpath]= "Read"
|
||||
elif readstatus == 3:
|
||||
playlist_map[lpath]= "Closed"
|
||||
|
||||
path = self.normalize_path(path)
|
||||
# print "Normalized FileName: " + path
|
||||
@ -599,11 +601,47 @@ class KOBO(USBMS):
|
||||
try:
|
||||
cursor.execute('update content set ReadStatus=2,FirstTimeReading=\'true\' where BookID is Null and ContentID = ?', t)
|
||||
except:
|
||||
debug_print('Database Exception: Unable set book as Rinished')
|
||||
debug_print('Database Exception: Unable set book as Finished')
|
||||
raise
|
||||
else:
|
||||
connection.commit()
|
||||
# debug_print('Database: Commit set ReadStatus as Finished')
|
||||
if category == 'Closed':
|
||||
# Reset Im_Reading list in the database
|
||||
if oncard == 'carda':
|
||||
query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ReadStatus = 3 and ContentID like \'file:///mnt/sd/%\''
|
||||
elif oncard != 'carda' and oncard != 'cardb':
|
||||
query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ReadStatus = 3 and ContentID not like \'file:///mnt/sd/%\''
|
||||
|
||||
try:
|
||||
cursor.execute (query)
|
||||
except:
|
||||
debug_print('Database Exception: Unable to reset Closed list')
|
||||
raise
|
||||
else:
|
||||
# debug_print('Commit: Reset Closed list')
|
||||
connection.commit()
|
||||
|
||||
for book in books:
|
||||
# debug_print('Title:', book.title, 'lpath:', book.path)
|
||||
book.device_collections = ['Closed']
|
||||
|
||||
extension = os.path.splitext(book.path)[1]
|
||||
ContentType = self.get_content_type_from_extension(extension) if extension != '' else self.get_content_type_from_path(book.path)
|
||||
|
||||
ContentID = self.contentid_from_path(book.path, ContentType)
|
||||
# datelastread = time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime())
|
||||
|
||||
t = (ContentID,)
|
||||
|
||||
try:
|
||||
cursor.execute('update content set ReadStatus=3,FirstTimeReading=\'true\' where BookID is Null and ContentID = ?', t)
|
||||
except:
|
||||
debug_print('Database Exception: Unable set book as Closed')
|
||||
raise
|
||||
else:
|
||||
connection.commit()
|
||||
# debug_print('Database: Commit set ReadStatus as Closed')
|
||||
else: # No collections
|
||||
# Since no collections exist the ReadStatus needs to be reset to 0 (Unread)
|
||||
print "Reseting ReadStatus to 0"
|
||||
|
Loading…
x
Reference in New Issue
Block a user