mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Sync to trunk.
This commit is contained in:
commit
28d5eff2f8
@ -18,13 +18,12 @@ import re
|
||||
import sys
|
||||
import glob
|
||||
from itertools import repeat
|
||||
from math import ceil
|
||||
|
||||
from calibre.devices.interface import DevicePlugin
|
||||
from calibre.devices.errors import DeviceError, FreeSpaceError
|
||||
from calibre.devices.usbms.deviceconfig import DeviceConfig
|
||||
from calibre import iswindows, islinux, isosx, __appname__
|
||||
from calibre.utils.filenames import ascii_filename as sanitize
|
||||
from calibre.utils.filenames import ascii_filename as sanitize, shorten_components_to
|
||||
|
||||
class Device(DeviceConfig, DevicePlugin):
|
||||
|
||||
@ -669,71 +668,47 @@ class Device(DeviceConfig, DevicePlugin):
|
||||
return path
|
||||
|
||||
def create_upload_path(self, path, mdata, fname):
|
||||
resizable = []
|
||||
path = os.path.abspath(path)
|
||||
newpath = path
|
||||
if self.SUPPORTS_SUB_DIRS and self.settings().use_subdirs:
|
||||
extra_components = []
|
||||
|
||||
if self.SUPPORTS_SUB_DIRS and self.settings().use_subdirs:
|
||||
if 'tags' in mdata.keys():
|
||||
for tag in mdata['tags']:
|
||||
if tag.startswith(_('News')):
|
||||
newpath = os.path.join(newpath, 'news')
|
||||
extra_components.append('news')
|
||||
c = sanitize(mdata.get('title', ''))
|
||||
if c:
|
||||
newpath = os.path.join(newpath, c)
|
||||
resizable.append(c)
|
||||
extra_components.append(c)
|
||||
c = sanitize(mdata.get('timestamp', ''))
|
||||
if c:
|
||||
newpath = os.path.join(newpath, c)
|
||||
resizable.append(c)
|
||||
extra_components.append(c)
|
||||
break
|
||||
elif tag.startswith('/'):
|
||||
for c in tag.split('/'):
|
||||
c = sanitize(c)
|
||||
if not c: continue
|
||||
newpath = os.path.join(newpath, c)
|
||||
resizable.append(c)
|
||||
extra_components.append(c)
|
||||
break
|
||||
|
||||
if newpath == path:
|
||||
if not extra_components:
|
||||
c = sanitize(mdata.get('authors', _('Unknown')))
|
||||
if c:
|
||||
newpath = os.path.join(newpath, c)
|
||||
resizable.append(c)
|
||||
extra_components.append(c)
|
||||
c = sanitize(mdata.get('title', _('Unknown')))
|
||||
if c:
|
||||
extra_components.append(c)
|
||||
newpath = os.path.join(newpath, c)
|
||||
resizable.append(c)
|
||||
|
||||
newpath = os.path.abspath(newpath)
|
||||
fname = sanitize(fname)
|
||||
resizable.append(fname)
|
||||
extra_components.append(fname)
|
||||
extra_components = [str(x) for x in extra_components]
|
||||
components = shorten_components_to(250 - len(path), extra_components)
|
||||
filepath = os.path.join(path, *components)
|
||||
filedir = os.path.dirname(filepath)
|
||||
|
||||
filepath = os.path.join(newpath, fname)
|
||||
|
||||
if len(filepath) > 245:
|
||||
extra = len(filepath) - 245
|
||||
delta = int(ceil(extra/float(len(resizable))))
|
||||
for x in resizable:
|
||||
if delta > len(x):
|
||||
r = x[0] if x is resizable[-1] else ''
|
||||
else:
|
||||
if x is resizable[-1]:
|
||||
b, e = os.path.splitext(x)
|
||||
r = b[:-delta]+e
|
||||
if r.startswith('.'): r = x[0]+r
|
||||
else:
|
||||
r = x[:-delta]
|
||||
r = r.strip()
|
||||
if not r:
|
||||
r = x.strip()[0] if x.strip() else 'x'
|
||||
if x is resizable[-1]:
|
||||
filepath = filepath.replace(os.sep+x, os.sep+r)
|
||||
else:
|
||||
filepath = filepath.replace(os.sep+x+os.sep, os.sep+r+os.sep)
|
||||
filepath = filepath.replace(os.sep+os.sep, os.sep).strip()
|
||||
newpath = os.path.dirname(filepath)
|
||||
|
||||
if not os.path.exists(newpath):
|
||||
os.makedirs(newpath)
|
||||
if not os.path.exists(filedir):
|
||||
os.makedirs(filedir)
|
||||
|
||||
return filepath
|
||||
|
@ -45,7 +45,7 @@ class DBAdder(Thread):
|
||||
self.critical = {}
|
||||
self.number_of_books_added = 0
|
||||
self.duplicates = []
|
||||
self.names, self.path, self.infos = [], [], []
|
||||
self.names, self.paths, self.infos = [], [], []
|
||||
Thread.__init__(self)
|
||||
self.daemon = True
|
||||
self.input_queue = Queue()
|
||||
|
BIN
src/calibre/gui2/images/news/beta.png
Normal file
BIN
src/calibre/gui2/images/news/beta.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 670 B |
BIN
src/calibre/gui2/images/news/beta_en.png
Normal file
BIN
src/calibre/gui2/images/news/beta_en.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 670 B |
BIN
src/calibre/gui2/images/news/glasjavnosti.png
Normal file
BIN
src/calibre/gui2/images/news/glasjavnosti.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 731 B |
@ -1445,36 +1445,40 @@ class LibraryDatabase2(LibraryDatabase):
|
||||
self.notify('add', [id])
|
||||
|
||||
def move_library_to(self, newloc, progress=lambda x: x):
|
||||
books = self.conn.get('SELECT id, path, title FROM books')
|
||||
if not os.path.exists(newloc):
|
||||
os.makedirs(newloc)
|
||||
items = os.listdir(self.library_path)
|
||||
old_dirs = set([])
|
||||
for i, book in enumerate(books):
|
||||
path = book[1]
|
||||
if not path:
|
||||
continue
|
||||
dir = path.split('/')[0]
|
||||
srcdir = os.path.join(self.library_path, dir)
|
||||
tdir = os.path.join(newloc, dir)
|
||||
if os.path.exists(tdir):
|
||||
shutil.rmtree(tdir)
|
||||
if os.path.exists(srcdir):
|
||||
shutil.copytree(srcdir, tdir)
|
||||
old_dirs.add(srcdir)
|
||||
progress(book[2])
|
||||
for i, x in enumerate(items):
|
||||
src = os.path.join(self.library_path, x)
|
||||
dest = os.path.join(newloc, x)
|
||||
if os.path.isdir(src):
|
||||
if os.path.exists(dest):
|
||||
shutil.rmtree(dest)
|
||||
shutil.copytree(src, dest)
|
||||
old_dirs.add(src)
|
||||
else:
|
||||
if os.path.exists(dest):
|
||||
os.remove(dest)
|
||||
shutil.copyfile(src, dest)
|
||||
if not isinstance(x, unicode):
|
||||
x = x.decode(filesystem_encoding, 'replace')
|
||||
progress(x)
|
||||
|
||||
dbpath = os.path.join(newloc, os.path.basename(self.dbpath))
|
||||
shutil.copyfile(self.dbpath, dbpath)
|
||||
opath = self.dbpath
|
||||
self.conn.close()
|
||||
self.library_path, self.dbpath = newloc, dbpath
|
||||
self.connect()
|
||||
try:
|
||||
os.unlink(opath)
|
||||
for dir in old_dirs:
|
||||
shutil.rmtree(dir)
|
||||
except:
|
||||
pass
|
||||
for dir in old_dirs:
|
||||
try:
|
||||
shutil.rmtree(dir)
|
||||
except:
|
||||
pass
|
||||
|
||||
def __iter__(self):
|
||||
for record in self.data._data:
|
||||
@ -1639,9 +1643,9 @@ books_series_link feeds
|
||||
def import_book_directory(self, dirpath, callback=None):
|
||||
dirpath = os.path.abspath(dirpath)
|
||||
formats = self.find_books_in_directory(dirpath, True)
|
||||
formats = list(formats)[0]
|
||||
if not formats:
|
||||
return
|
||||
formats = list(iter(formats))
|
||||
mi = metadata_from_formats(formats)
|
||||
if mi.title is None:
|
||||
return
|
||||
|
@ -31,7 +31,7 @@ from calibre.library.database2 import LibraryDatabase2, FIELD_MAP
|
||||
from calibre.utils.config import config_dir
|
||||
from calibre.utils.mdns import publish as publish_zeroconf, \
|
||||
stop_server as stop_zeroconf
|
||||
from calibre.ebooks.metadata import fmt_sidx
|
||||
from calibre.ebooks.metadata import fmt_sidx, title_sort
|
||||
|
||||
build_time = datetime.strptime(build_time, '%d %m %Y %H%M%S')
|
||||
server_resources['jquery.js'] = jquery
|
||||
@ -125,6 +125,41 @@ class LibraryServer(object):
|
||||
</feed>
|
||||
'''))
|
||||
|
||||
STANZA_MAIN = MarkupTemplate(textwrap.dedent('''\
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom" xmlns:py="http://genshi.edgewall.org/">
|
||||
<title>calibre Library</title>
|
||||
<id>$id</id>
|
||||
<updated>${updated.strftime('%Y-%m-%dT%H:%M:%S+00:00')}</updated>
|
||||
<link rel="search" title="Search" type="application/atom+xml" href="/?search={searchTerms}"/>
|
||||
<author>
|
||||
<name>calibre</name>
|
||||
<uri>http://calibre.kovidgoyal.net</uri>
|
||||
</author>
|
||||
<subtitle>
|
||||
${subtitle}
|
||||
</subtitle>
|
||||
<entry>
|
||||
<title>By Author</title>
|
||||
<id>urn:uuid:fc000fa0-8c23-11de-a31d-0002a5d5c51b</id>
|
||||
<updated>${updated.strftime('%Y-%m-%dT%H:%M:%S+00:00')}</updated>
|
||||
<link type="application/atom+xml" href="/?sortby=byauthor" />
|
||||
</entry>
|
||||
<entry>
|
||||
<title>By Title</title>
|
||||
<id>urn:uuid:1df4fe40-8c24-11de-b4c6-0002a5d5c51b</id>
|
||||
<updated>${updated.strftime('%Y-%m-%dT%H:%M:%S+00:00')}</updated>
|
||||
<link type="application/atom+xml" href="/?sortby=bytitle" />
|
||||
</entry>
|
||||
<entry>
|
||||
<title>By Newest</title>
|
||||
<id>urn:uuid:3c6d4940-8c24-11de-a4d7-0002a5d5c51b</id>
|
||||
<updated>${updated.strftime('%Y-%m-%dT%H:%M:%S+00:00')}</updated>
|
||||
<link type="application/atom+xml" href="/?sortby=bynewest" />
|
||||
</entry>
|
||||
</feed>
|
||||
'''))
|
||||
|
||||
|
||||
def __init__(self, db, opts, embedded=False, show_tracebacks=True):
|
||||
self.db = db
|
||||
@ -295,11 +330,25 @@ class LibraryServer(object):
|
||||
|
||||
|
||||
@expose
|
||||
def stanza(self, search=None):
|
||||
def stanza(self, search=None, sortby=None):
|
||||
'Feeds to read calibre books on a ipod with stanza.'
|
||||
books = []
|
||||
updated = self.db.last_modified()
|
||||
cherrypy.response.headers['Last-Modified'] = self.last_modified(updated)
|
||||
cherrypy.response.headers['Content-Type'] = 'text/xml'
|
||||
if not sortby and not search:
|
||||
return self.STANZA_MAIN.generate(subtitle='', data=books, FM=FIELD_MAP,
|
||||
updated=updated, id='urn:calibre:main').render('xml')
|
||||
ids = self.db.data.parse(search) if search and search.strip() else self.db.data.universal_set()
|
||||
for record in reversed(list(iter(self.db))):
|
||||
record_list = list(iter(self.db))
|
||||
if sortby == "byauthor":
|
||||
record_list.sort(lambda x, y: cmp(x[FIELD_MAP['author_sort']], y[FIELD_MAP['author_sort']]))
|
||||
elif sortby == "bytitle":
|
||||
record_list.sort(lambda x, y: cmp(title_sort(x[FIELD_MAP['title']]),
|
||||
title_sort(y[FIELD_MAP['title']])))
|
||||
else:
|
||||
record_list = reversed(record_list)
|
||||
for record in record_list:
|
||||
if record[0] not in ids: continue
|
||||
r = record[FIELD_MAP['formats']]
|
||||
r = r.upper() if r else ''
|
||||
@ -335,10 +384,6 @@ class LibraryServer(object):
|
||||
timestamp=strftime('%Y-%m-%dT%H:%M:%S+00:00', record[5]),
|
||||
).render('xml').decode('utf8'))
|
||||
|
||||
updated = self.db.last_modified()
|
||||
cherrypy.response.headers['Last-Modified'] = self.last_modified(updated)
|
||||
cherrypy.response.headers['Content-Type'] = 'text/xml'
|
||||
|
||||
return self.STANZA.generate(subtitle='', data=books, FM=FIELD_MAP,
|
||||
updated=updated, id='urn:calibre:main').render('xml')
|
||||
|
||||
@ -389,7 +434,7 @@ class LibraryServer(object):
|
||||
'The / URL'
|
||||
want_opds = cherrypy.request.headers.get('Stanza-Device-Name', 919) != \
|
||||
919 or cherrypy.request.headers.get('Want-OPDS-Catalog', 919) != 919
|
||||
return self.stanza(search=kwargs.get('search', None)) if want_opds else self.static('index.html')
|
||||
return self.stanza(search=kwargs.get('search', None), sortby=kwargs.get('sortby',None)) if want_opds else self.static('index.html')
|
||||
|
||||
|
||||
@expose
|
||||
|
@ -54,6 +54,7 @@ recipe_modules = ['recipe_' + r for r in (
|
||||
'fastcompany', 'accountancyage', 'laprensa_hn', 'latribuna',
|
||||
'eltiempo_hn', 'slate', 'tnxm', 'bbcvietnamese', 'vnexpress',
|
||||
'volksrant', 'theeconomictimes_india', 'ourdailybread',
|
||||
'monitor', 'republika', 'beta', 'beta_en', 'glasjavnosti',
|
||||
)]
|
||||
|
||||
|
||||
|
50
src/calibre/web/feeds/recipes/recipe_beta.py
Normal file
50
src/calibre/web/feeds/recipes/recipe_beta.py
Normal file
@ -0,0 +1,50 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
beta.rs
|
||||
'''
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||
|
||||
class Danas(BasicNewsRecipe):
|
||||
title = 'BETA'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Novinska Agencija'
|
||||
publisher = 'Beta'
|
||||
category = 'news, politics, Serbia'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = False
|
||||
use_embedded_content = True
|
||||
language = _('Serbian')
|
||||
lang = 'sr-Latn-RS'
|
||||
direction = 'ltr'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
feeds = [
|
||||
(u'Vesti dana', u'http://www.beta.rs/rssvd.asp')
|
||||
,(u'Ekonomija' , u'http://www.beta.rs/rssek.asp')
|
||||
,(u'Sport' , u'http://www.beta.rs/rsssp.asp')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['lang'] = self.lang
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
||||
soup.head.insert(0,mlang)
|
||||
soup.head.insert(1,mcharset)
|
||||
return self.adeify_images(soup)
|
37
src/calibre/web/feeds/recipes/recipe_beta_en.py
Normal file
37
src/calibre/web/feeds/recipes/recipe_beta_en.py
Normal file
@ -0,0 +1,37 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
beta.rs
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||
|
||||
class Danas(BasicNewsRecipe):
|
||||
title = 'BETA - English'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Serbian news agency'
|
||||
publisher = 'Beta'
|
||||
category = 'news, politics, Serbia'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = False
|
||||
use_embedded_content = True
|
||||
language = _('English')
|
||||
lang = 'en'
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
|
||||
feeds = [(u'News', u'http://www.beta.rs/rssen.asp')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
80
src/calibre/web/feeds/recipes/recipe_glasjavnosti.py
Normal file
80
src/calibre/web/feeds/recipes/recipe_glasjavnosti.py
Normal file
@ -0,0 +1,80 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.glas-javnosti.rs
|
||||
'''
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class GlasJavnosti(BasicNewsRecipe):
|
||||
title = 'Glas Javnosti'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Glas javnosti - Mi ne ulepsavamo stvarnost'
|
||||
publisher = 'Glas Javnosti'
|
||||
category = 'news, politics, Serbia'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = False
|
||||
use_embedded_content = False
|
||||
language = _('Serbian')
|
||||
lang = 'sr-Latn-RS'
|
||||
direction = 'ltr'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':'above-content'})
|
||||
,dict(name='div', attrs={'class':'node' })
|
||||
]
|
||||
remove_tags = [
|
||||
dict(name=['object','link'])
|
||||
,dict(name='div',attrs={'class':['links','meta']})
|
||||
,dict(name='div',attrs={'id':'block-block-12'})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Politika', u'http://www.glas-javnosti.rs/aktuelni-clanci/2')
|
||||
,(u'Tema', u'http://www.glas-javnosti.rs/aktuelni-clanci/48')
|
||||
,(u'Drustvo', u'http://www.glas-javnosti.rs/aktuelni-clanci/17')
|
||||
,(u'Ekonomija', u'http://www.glas-javnosti.rs/aktuelni-clanci/16')
|
||||
,(u'Dosije', u'http://www.glas-javnosti.rs/aktuelni-clanci/65')
|
||||
,(u'Svet', u'http://www.glas-javnosti.rs/aktuelni-clanci/18')
|
||||
,(u'Hronika', u'http://www.glas-javnosti.rs/aktuelni-clanci/19')
|
||||
,(u'Kultura', u'http://www.glas-javnosti.rs/aktuelni-clanci/6')
|
||||
,(u'Ljudi i Dogadjaji', u'http://www.glas-javnosti.rs/aktuelni-clanci/37')
|
||||
,(u'Putovanja', u'http://www.glas-javnosti.rs/aktuelni-clanci/113')
|
||||
,(u'Feljton', u'http://www.glas-javnosti.rs/aktuelni-clanci/49')
|
||||
,(u'Sport', u'http://www.glas-javnosti.rs/aktuelni-clanci/1')
|
||||
,(u'Lov i Ribolov', u'http://www.glas-javnosti.rs/aktuelni-clanci/591')
|
||||
,(u'Nedelja', u'http://www.glas-javnosti.rs/aktuelni-clanci/1862')
|
||||
,(u'Glasno', u'http://www.glas-javnosti.rs/aktuelni-clanci/590')
|
||||
,(u'Tehnologija', u'http://www.glas-javnosti.rs/aktuelni-clanci/609')
|
||||
,(u'Reflektor', u'http://www.glas-javnosti.rs/aktuelni-clanci/717')
|
||||
,(u'Saznanja', u'http://www.glas-javnosti.rs/aktuelni-clanci/1694')
|
||||
,(u'Beograd', u'http://www.glas-javnosti.rs/aktuelni-clanci/40')
|
||||
,(u'Srbija', u'http://www.glas-javnosti.rs/aktuelni-clanci/114')
|
||||
,(u'Zapadna Srbija', u'http://www.glas-javnosti.rs/aktuelni-clanci/41')
|
||||
,(u'Istocna i Juzna Srbija', u'http://www.glas-javnosti.rs/aktuelni-clanci/42')
|
||||
,(u'Sumadija i Pomoravlje', u'http://www.glas-javnosti.rs/aktuelni-clanci/43')
|
||||
,(u'Vojvodina', u'http://www.glas-javnosti.rs/aktuelni-clanci/44')
|
||||
,(u'Republika Srpska', u'http://www.glas-javnosti.rs/aktuelni-clanci/45')
|
||||
,(u'Slobodno Vreme', u'http://www.glas-javnosti.rs/aktuelni-clanci/61')
|
||||
,(u'Konjske Snage', u'http://www.glas-javnosti.rs/aktuelni-clanci/46')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return self.adeify_images(soup)
|
@ -8,17 +8,16 @@ www.guardian.co.uk
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||
|
||||
class Guardian(BasicNewsRecipe):
|
||||
|
||||
title = u'The Guardian'
|
||||
__author__ = 'Seabound'
|
||||
__author__ = 'Seabound and Sujata Raman'
|
||||
language = _('English')
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 20
|
||||
remove_javascript = True
|
||||
|
||||
|
||||
timefmt = ' [%a, %d %b %Y]'
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':["content","article_header","main-article-info",]}),
|
||||
@ -30,20 +29,20 @@ class Guardian(BasicNewsRecipe):
|
||||
dict(name='ul', attrs={'id':["content-actions"]}),
|
||||
]
|
||||
use_embedded_content = False
|
||||
|
||||
|
||||
no_stylesheets = True
|
||||
extra_css = '''
|
||||
.article-attributes{font-size: x-small; font-family:Arial,Helvetica,sans-serif;}
|
||||
.h1{font-size: large ;font-family:georgia,serif; font-weight:bold;}
|
||||
.stand-first-alone{color:#666666; font-size:small; font-family:Arial,Helvetica,sans-serif;}
|
||||
.caption{color:#666666; font-size:x-small; font-family:Arial,Helvetica,sans-serif;}
|
||||
#article-wrapper{font-size:small; font-family:Arial,Helvetica,sans-serif;}
|
||||
#article-wrapper{font-size:small; font-family:Arial,Helvetica,sans-serif;font-weight:normal;}
|
||||
.main-article-info{font-family:Arial,Helvetica,sans-serif;}
|
||||
#full-contents{font-size:small; font-family:Arial,Helvetica,sans-serif;}
|
||||
#match-stats-summary{font-size:small; font-family:Arial,Helvetica,sans-serif;}
|
||||
#full-contents{font-size:small; font-family:Arial,Helvetica,sans-serif;font-weight:normal;}
|
||||
#match-stats-summary{font-size:small; font-family:Arial,Helvetica,sans-serif;font-weight:normal;}
|
||||
'''
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
feeds = [
|
||||
('Front Page', 'http://www.guardian.co.uk/rss'),
|
||||
@ -57,21 +56,30 @@ class Guardian(BasicNewsRecipe):
|
||||
('Comment','http://www.guardian.co.uk/commentisfree/rss'),
|
||||
]
|
||||
|
||||
|
||||
def get_article_url(self, article):
|
||||
url = article.get('guid', None)
|
||||
if '/video/' in url or '/flyer/' in url or '/quiz/' in url or \
|
||||
'/gallery/' in url or 'ivebeenthere' in url or \
|
||||
'pickthescore' in url or 'audioslideshow' in url :
|
||||
url = None
|
||||
return url
|
||||
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
|
||||
for item in soup.findAll(style=True):
|
||||
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
|
||||
for item in soup.findAll(face=True):
|
||||
|
||||
for item in soup.findAll(face=True):
|
||||
del item['face']
|
||||
for tag in soup.findAll(name=['ul','li']):
|
||||
tag.name = 'div'
|
||||
|
||||
|
||||
return soup
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
85
src/calibre/web/feeds/recipes/recipe_monitor.py
Normal file
85
src/calibre/web/feeds/recipes/recipe_monitor.py
Normal file
@ -0,0 +1,85 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
|
||||
'''
|
||||
monitorcg.com
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||
|
||||
class MonitorCG(BasicNewsRecipe):
|
||||
title = 'Monitor online'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'News from Montenegro'
|
||||
publisher = 'MONITOR d.o.o. Podgorica'
|
||||
category = 'news, politics, Montenegro'
|
||||
oldest_article = 15
|
||||
max_articles_per_feed = 150
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
language = _('Montenegrin')
|
||||
lang ='sr-Latn-Me'
|
||||
INDEX = 'http://www.monitorcg.com'
|
||||
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'ja-current-content'})]
|
||||
|
||||
remove_tags = [ dict(name=['object','link','embed'])
|
||||
, dict(attrs={'class':['buttonheading','article-section']})]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['xml:lang'] = self.lang
|
||||
soup.html['lang'] = self.lang
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
||||
soup.head.insert(0,mlang)
|
||||
soup.head.insert(1,mcharset)
|
||||
return self.adeify_images(soup)
|
||||
|
||||
def parse_index(self):
|
||||
totalfeeds = []
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
cover_item = soup.find('div',attrs={'class':'ja-catslwi'})
|
||||
if cover_item:
|
||||
dt = cover_item['onclick'].partition("location.href=")[2]
|
||||
curl = self.INDEX + dt.strip("'")
|
||||
lfeeds = [(u'Svi clanci', curl)]
|
||||
for feedobj in lfeeds:
|
||||
feedtitle, feedurl = feedobj
|
||||
self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
|
||||
articles = []
|
||||
soup = self.index_to_soup(feedurl)
|
||||
contitem = soup.find('div',attrs={'class':'article-content'})
|
||||
if contitem:
|
||||
img = contitem.find('img')
|
||||
if img:
|
||||
self.cover_url = self.INDEX + img['src']
|
||||
for item in contitem.findAll('a'):
|
||||
url = self.INDEX + item['href']
|
||||
title = self.tag_to_string(item)
|
||||
articles.append({
|
||||
'title' :title
|
||||
,'date' :''
|
||||
,'url' :url
|
||||
,'description':''
|
||||
})
|
||||
totalfeeds.append((feedtitle, articles))
|
||||
return totalfeeds
|
||||
|
||||
|
80
src/calibre/web/feeds/recipes/recipe_republika.py
Normal file
80
src/calibre/web/feeds/recipes/recipe_republika.py
Normal file
@ -0,0 +1,80 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
|
||||
'''
|
||||
republika.co.yu
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Republika(BasicNewsRecipe):
|
||||
title = 'Republika'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Glasilo gradjanskog samooslobadjanja. Protiv stihije straha, mrznje i nasilja'
|
||||
publisher = ' Zadruga Res Publica'
|
||||
category = 'news, politics, Serbia'
|
||||
language = _('Serbian')
|
||||
lang = 'sr-Latn-RS'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
encoding = 'cp1250'
|
||||
use_embedded_content = False
|
||||
INDEX = u'http://www.republika.co.yu/'
|
||||
extra_css = ' @font-face {font-family: "serif1"; src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif} .naslov{font-size: x-large; font-weight: bold} .autor{font-size: small; font-weight: bold} '
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
keep_only_tags = [ dict(attrs={'class':'naslov'})
|
||||
, dict(attrs={'class':'text1'})
|
||||
]
|
||||
|
||||
remove_tags = [dict(name=['object','link','iframe','base','img'])]
|
||||
|
||||
feeds = [(u'Svi clanci', INDEX)]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
attribs = [ 'style','font','valign'
|
||||
,'colspan','width','height'
|
||||
,'rowspan','summary','align'
|
||||
,'cellspacing','cellpadding'
|
||||
,'frames','rules','border'
|
||||
]
|
||||
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
||||
item.name = 'div'
|
||||
for attrib in attribs:
|
||||
if item.has_key(attrib):
|
||||
del item[attrib]
|
||||
return soup
|
||||
|
||||
def parse_index(self):
|
||||
totalfeeds = []
|
||||
lfeeds = self.get_feeds()
|
||||
for feedobj in lfeeds:
|
||||
feedtitle, feedurl = feedobj
|
||||
self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
|
||||
articles = []
|
||||
soup = self.index_to_soup(feedurl)
|
||||
for item in soup.findAll('a', attrs={'class':'naslovLink'}):
|
||||
url = item['href']
|
||||
title = self.tag_to_string(item)
|
||||
articles.append({
|
||||
'title' :title
|
||||
,'date' :''
|
||||
,'url' :url
|
||||
,'description':''
|
||||
})
|
||||
totalfeeds.append((feedtitle, articles))
|
||||
return totalfeeds
|
||||
|
Loading…
x
Reference in New Issue
Block a user