diff --git a/src/calibre/devices/usbms/device.py b/src/calibre/devices/usbms/device.py index 980378b928..007d058941 100644 --- a/src/calibre/devices/usbms/device.py +++ b/src/calibre/devices/usbms/device.py @@ -18,13 +18,12 @@ import re import sys import glob from itertools import repeat -from math import ceil from calibre.devices.interface import DevicePlugin from calibre.devices.errors import DeviceError, FreeSpaceError from calibre.devices.usbms.deviceconfig import DeviceConfig from calibre import iswindows, islinux, isosx, __appname__ -from calibre.utils.filenames import ascii_filename as sanitize +from calibre.utils.filenames import ascii_filename as sanitize, shorten_components_to class Device(DeviceConfig, DevicePlugin): @@ -669,71 +668,47 @@ class Device(DeviceConfig, DevicePlugin): return path def create_upload_path(self, path, mdata, fname): - resizable = [] + path = os.path.abspath(path) newpath = path - if self.SUPPORTS_SUB_DIRS and self.settings().use_subdirs: + extra_components = [] + if self.SUPPORTS_SUB_DIRS and self.settings().use_subdirs: if 'tags' in mdata.keys(): for tag in mdata['tags']: if tag.startswith(_('News')): - newpath = os.path.join(newpath, 'news') + extra_components.append('news') c = sanitize(mdata.get('title', '')) if c: - newpath = os.path.join(newpath, c) - resizable.append(c) + extra_components.append(c) c = sanitize(mdata.get('timestamp', '')) if c: - newpath = os.path.join(newpath, c) - resizable.append(c) + extra_components.append(c) break elif tag.startswith('/'): for c in tag.split('/'): c = sanitize(c) if not c: continue - newpath = os.path.join(newpath, c) - resizable.append(c) + extra_components.append(c) break - if newpath == path: + if not extra_components: c = sanitize(mdata.get('authors', _('Unknown'))) if c: - newpath = os.path.join(newpath, c) - resizable.append(c) + extra_components.append(c) c = sanitize(mdata.get('title', _('Unknown'))) if c: + extra_components.append(c) newpath = os.path.join(newpath, c) - resizable.append(c) - newpath = os.path.abspath(newpath) fname = sanitize(fname) - resizable.append(fname) + extra_components.append(fname) + extra_components = [str(x) for x in extra_components] + components = shorten_components_to(250 - len(path), extra_components) + filepath = os.path.join(path, *components) + filedir = os.path.dirname(filepath) - filepath = os.path.join(newpath, fname) - if len(filepath) > 245: - extra = len(filepath) - 245 - delta = int(ceil(extra/float(len(resizable)))) - for x in resizable: - if delta > len(x): - r = x[0] if x is resizable[-1] else '' - else: - if x is resizable[-1]: - b, e = os.path.splitext(x) - r = b[:-delta]+e - if r.startswith('.'): r = x[0]+r - else: - r = x[:-delta] - r = r.strip() - if not r: - r = x.strip()[0] if x.strip() else 'x' - if x is resizable[-1]: - filepath = filepath.replace(os.sep+x, os.sep+r) - else: - filepath = filepath.replace(os.sep+x+os.sep, os.sep+r+os.sep) - filepath = filepath.replace(os.sep+os.sep, os.sep).strip() - newpath = os.path.dirname(filepath) - - if not os.path.exists(newpath): - os.makedirs(newpath) + if not os.path.exists(filedir): + os.makedirs(filedir) return filepath diff --git a/src/calibre/gui2/add.py b/src/calibre/gui2/add.py index 1f5670b0d3..56ed9b661a 100644 --- a/src/calibre/gui2/add.py +++ b/src/calibre/gui2/add.py @@ -45,7 +45,7 @@ class DBAdder(Thread): self.critical = {} self.number_of_books_added = 0 self.duplicates = [] - self.names, self.path, self.infos = [], [], [] + self.names, self.paths, self.infos = [], [], [] Thread.__init__(self) self.daemon = True self.input_queue = Queue() diff --git a/src/calibre/gui2/images/news/beta.png b/src/calibre/gui2/images/news/beta.png new file mode 100644 index 0000000000..5bc0d841c4 Binary files /dev/null and b/src/calibre/gui2/images/news/beta.png differ diff --git a/src/calibre/gui2/images/news/beta_en.png b/src/calibre/gui2/images/news/beta_en.png new file mode 100644 index 0000000000..5bc0d841c4 Binary files /dev/null and b/src/calibre/gui2/images/news/beta_en.png differ diff --git a/src/calibre/gui2/images/news/glasjavnosti.png b/src/calibre/gui2/images/news/glasjavnosti.png new file mode 100644 index 0000000000..4bf1051aa3 Binary files /dev/null and b/src/calibre/gui2/images/news/glasjavnosti.png differ diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index f1a9ea18dd..daec400101 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -1445,36 +1445,40 @@ class LibraryDatabase2(LibraryDatabase): self.notify('add', [id]) def move_library_to(self, newloc, progress=lambda x: x): - books = self.conn.get('SELECT id, path, title FROM books') if not os.path.exists(newloc): os.makedirs(newloc) + items = os.listdir(self.library_path) old_dirs = set([]) - for i, book in enumerate(books): - path = book[1] - if not path: - continue - dir = path.split('/')[0] - srcdir = os.path.join(self.library_path, dir) - tdir = os.path.join(newloc, dir) - if os.path.exists(tdir): - shutil.rmtree(tdir) - if os.path.exists(srcdir): - shutil.copytree(srcdir, tdir) - old_dirs.add(srcdir) - progress(book[2]) + for i, x in enumerate(items): + src = os.path.join(self.library_path, x) + dest = os.path.join(newloc, x) + if os.path.isdir(src): + if os.path.exists(dest): + shutil.rmtree(dest) + shutil.copytree(src, dest) + old_dirs.add(src) + else: + if os.path.exists(dest): + os.remove(dest) + shutil.copyfile(src, dest) + if not isinstance(x, unicode): + x = x.decode(filesystem_encoding, 'replace') + progress(x) dbpath = os.path.join(newloc, os.path.basename(self.dbpath)) - shutil.copyfile(self.dbpath, dbpath) opath = self.dbpath self.conn.close() self.library_path, self.dbpath = newloc, dbpath self.connect() try: os.unlink(opath) - for dir in old_dirs: - shutil.rmtree(dir) except: pass + for dir in old_dirs: + try: + shutil.rmtree(dir) + except: + pass def __iter__(self): for record in self.data._data: @@ -1639,9 +1643,9 @@ books_series_link feeds def import_book_directory(self, dirpath, callback=None): dirpath = os.path.abspath(dirpath) formats = self.find_books_in_directory(dirpath, True) + formats = list(formats)[0] if not formats: return - formats = list(iter(formats)) mi = metadata_from_formats(formats) if mi.title is None: return diff --git a/src/calibre/library/server.py b/src/calibre/library/server.py index 98ae93d070..2032c3f2c4 100644 --- a/src/calibre/library/server.py +++ b/src/calibre/library/server.py @@ -31,7 +31,7 @@ from calibre.library.database2 import LibraryDatabase2, FIELD_MAP from calibre.utils.config import config_dir from calibre.utils.mdns import publish as publish_zeroconf, \ stop_server as stop_zeroconf -from calibre.ebooks.metadata import fmt_sidx +from calibre.ebooks.metadata import fmt_sidx, title_sort build_time = datetime.strptime(build_time, '%d %m %Y %H%M%S') server_resources['jquery.js'] = jquery @@ -125,6 +125,41 @@ class LibraryServer(object): ''')) + STANZA_MAIN = MarkupTemplate(textwrap.dedent('''\ + + + calibre Library + $id + ${updated.strftime('%Y-%m-%dT%H:%M:%S+00:00')} + + + calibre + http://calibre.kovidgoyal.net + + + ${subtitle} + + + By Author + urn:uuid:fc000fa0-8c23-11de-a31d-0002a5d5c51b + ${updated.strftime('%Y-%m-%dT%H:%M:%S+00:00')} + + + + By Title + urn:uuid:1df4fe40-8c24-11de-b4c6-0002a5d5c51b + ${updated.strftime('%Y-%m-%dT%H:%M:%S+00:00')} + + + + By Newest + urn:uuid:3c6d4940-8c24-11de-a4d7-0002a5d5c51b + ${updated.strftime('%Y-%m-%dT%H:%M:%S+00:00')} + + + + ''')) + def __init__(self, db, opts, embedded=False, show_tracebacks=True): self.db = db @@ -295,11 +330,25 @@ class LibraryServer(object): @expose - def stanza(self, search=None): + def stanza(self, search=None, sortby=None): 'Feeds to read calibre books on a ipod with stanza.' books = [] + updated = self.db.last_modified() + cherrypy.response.headers['Last-Modified'] = self.last_modified(updated) + cherrypy.response.headers['Content-Type'] = 'text/xml' + if not sortby and not search: + return self.STANZA_MAIN.generate(subtitle='', data=books, FM=FIELD_MAP, + updated=updated, id='urn:calibre:main').render('xml') ids = self.db.data.parse(search) if search and search.strip() else self.db.data.universal_set() - for record in reversed(list(iter(self.db))): + record_list = list(iter(self.db)) + if sortby == "byauthor": + record_list.sort(lambda x, y: cmp(x[FIELD_MAP['author_sort']], y[FIELD_MAP['author_sort']])) + elif sortby == "bytitle": + record_list.sort(lambda x, y: cmp(title_sort(x[FIELD_MAP['title']]), + title_sort(y[FIELD_MAP['title']]))) + else: + record_list = reversed(record_list) + for record in record_list: if record[0] not in ids: continue r = record[FIELD_MAP['formats']] r = r.upper() if r else '' @@ -335,10 +384,6 @@ class LibraryServer(object): timestamp=strftime('%Y-%m-%dT%H:%M:%S+00:00', record[5]), ).render('xml').decode('utf8')) - updated = self.db.last_modified() - cherrypy.response.headers['Last-Modified'] = self.last_modified(updated) - cherrypy.response.headers['Content-Type'] = 'text/xml' - return self.STANZA.generate(subtitle='', data=books, FM=FIELD_MAP, updated=updated, id='urn:calibre:main').render('xml') @@ -389,7 +434,7 @@ class LibraryServer(object): 'The / URL' want_opds = cherrypy.request.headers.get('Stanza-Device-Name', 919) != \ 919 or cherrypy.request.headers.get('Want-OPDS-Catalog', 919) != 919 - return self.stanza(search=kwargs.get('search', None)) if want_opds else self.static('index.html') + return self.stanza(search=kwargs.get('search', None), sortby=kwargs.get('sortby',None)) if want_opds else self.static('index.html') @expose diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py index d6ba724256..f0c6dd6bd7 100644 --- a/src/calibre/web/feeds/recipes/__init__.py +++ b/src/calibre/web/feeds/recipes/__init__.py @@ -54,6 +54,7 @@ recipe_modules = ['recipe_' + r for r in ( 'fastcompany', 'accountancyage', 'laprensa_hn', 'latribuna', 'eltiempo_hn', 'slate', 'tnxm', 'bbcvietnamese', 'vnexpress', 'volksrant', 'theeconomictimes_india', 'ourdailybread', + 'monitor', 'republika', 'beta', 'beta_en', 'glasjavnosti', )] diff --git a/src/calibre/web/feeds/recipes/recipe_beta.py b/src/calibre/web/feeds/recipes/recipe_beta.py new file mode 100644 index 0000000000..a647c43ab2 --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_beta.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Darko Miletic ' +''' +beta.rs +''' +import re +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag + +class Danas(BasicNewsRecipe): + title = 'BETA' + __author__ = 'Darko Miletic' + description = 'Novinska Agencija' + publisher = 'Beta' + category = 'news, politics, Serbia' + oldest_article = 2 + max_articles_per_feed = 100 + no_stylesheets = False + use_embedded_content = True + language = _('Serbian') + lang = 'sr-Latn-RS' + direction = 'ltr' + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}' + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : lang + , 'pretty_print' : True + } + + + preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] + + feeds = [ + (u'Vesti dana', u'http://www.beta.rs/rssvd.asp') + ,(u'Ekonomija' , u'http://www.beta.rs/rssek.asp') + ,(u'Sport' , u'http://www.beta.rs/rsssp.asp') + ] + + def preprocess_html(self, soup): + soup.html['lang'] = self.lang + mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)]) + mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")]) + soup.head.insert(0,mlang) + soup.head.insert(1,mcharset) + return self.adeify_images(soup) \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_beta_en.py b/src/calibre/web/feeds/recipes/recipe_beta_en.py new file mode 100644 index 0000000000..8ace641233 --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_beta_en.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Darko Miletic ' +''' +beta.rs +''' + +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag + +class Danas(BasicNewsRecipe): + title = 'BETA - English' + __author__ = 'Darko Miletic' + description = 'Serbian news agency' + publisher = 'Beta' + category = 'news, politics, Serbia' + oldest_article = 2 + max_articles_per_feed = 100 + no_stylesheets = False + use_embedded_content = True + language = _('English') + lang = 'en' + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : lang + , 'pretty_print' : True + } + + + feeds = [(u'News', u'http://www.beta.rs/rssen.asp')] + + def preprocess_html(self, soup): + return self.adeify_images(soup) \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_glasjavnosti.py b/src/calibre/web/feeds/recipes/recipe_glasjavnosti.py new file mode 100644 index 0000000000..cf21372366 --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_glasjavnosti.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Darko Miletic ' +''' +www.glas-javnosti.rs +''' +import re +from calibre.web.feeds.news import BasicNewsRecipe + +class GlasJavnosti(BasicNewsRecipe): + title = 'Glas Javnosti' + __author__ = 'Darko Miletic' + description = 'Glas javnosti - Mi ne ulepsavamo stvarnost' + publisher = 'Glas Javnosti' + category = 'news, politics, Serbia' + oldest_article = 2 + max_articles_per_feed = 100 + no_stylesheets = False + use_embedded_content = False + language = _('Serbian') + lang = 'sr-Latn-RS' + direction = 'ltr' + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}' + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : lang + , 'pretty_print' : True + } + + + preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] + + keep_only_tags = [ + dict(name='div', attrs={'id':'above-content'}) + ,dict(name='div', attrs={'class':'node' }) + ] + remove_tags = [ + dict(name=['object','link']) + ,dict(name='div',attrs={'class':['links','meta']}) + ,dict(name='div',attrs={'id':'block-block-12'}) + ] + + feeds = [ + (u'Politika', u'http://www.glas-javnosti.rs/aktuelni-clanci/2') + ,(u'Tema', u'http://www.glas-javnosti.rs/aktuelni-clanci/48') + ,(u'Drustvo', u'http://www.glas-javnosti.rs/aktuelni-clanci/17') + ,(u'Ekonomija', u'http://www.glas-javnosti.rs/aktuelni-clanci/16') + ,(u'Dosije', u'http://www.glas-javnosti.rs/aktuelni-clanci/65') + ,(u'Svet', u'http://www.glas-javnosti.rs/aktuelni-clanci/18') + ,(u'Hronika', u'http://www.glas-javnosti.rs/aktuelni-clanci/19') + ,(u'Kultura', u'http://www.glas-javnosti.rs/aktuelni-clanci/6') + ,(u'Ljudi i Dogadjaji', u'http://www.glas-javnosti.rs/aktuelni-clanci/37') + ,(u'Putovanja', u'http://www.glas-javnosti.rs/aktuelni-clanci/113') + ,(u'Feljton', u'http://www.glas-javnosti.rs/aktuelni-clanci/49') + ,(u'Sport', u'http://www.glas-javnosti.rs/aktuelni-clanci/1') + ,(u'Lov i Ribolov', u'http://www.glas-javnosti.rs/aktuelni-clanci/591') + ,(u'Nedelja', u'http://www.glas-javnosti.rs/aktuelni-clanci/1862') + ,(u'Glasno', u'http://www.glas-javnosti.rs/aktuelni-clanci/590') + ,(u'Tehnologija', u'http://www.glas-javnosti.rs/aktuelni-clanci/609') + ,(u'Reflektor', u'http://www.glas-javnosti.rs/aktuelni-clanci/717') + ,(u'Saznanja', u'http://www.glas-javnosti.rs/aktuelni-clanci/1694') + ,(u'Beograd', u'http://www.glas-javnosti.rs/aktuelni-clanci/40') + ,(u'Srbija', u'http://www.glas-javnosti.rs/aktuelni-clanci/114') + ,(u'Zapadna Srbija', u'http://www.glas-javnosti.rs/aktuelni-clanci/41') + ,(u'Istocna i Juzna Srbija', u'http://www.glas-javnosti.rs/aktuelni-clanci/42') + ,(u'Sumadija i Pomoravlje', u'http://www.glas-javnosti.rs/aktuelni-clanci/43') + ,(u'Vojvodina', u'http://www.glas-javnosti.rs/aktuelni-clanci/44') + ,(u'Republika Srpska', u'http://www.glas-javnosti.rs/aktuelni-clanci/45') + ,(u'Slobodno Vreme', u'http://www.glas-javnosti.rs/aktuelni-clanci/61') + ,(u'Konjske Snage', u'http://www.glas-javnosti.rs/aktuelni-clanci/46') + ] + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return self.adeify_images(soup) \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_guardian.py b/src/calibre/web/feeds/recipes/recipe_guardian.py index f8543c7d59..58e1c3e706 100644 --- a/src/calibre/web/feeds/recipes/recipe_guardian.py +++ b/src/calibre/web/feeds/recipes/recipe_guardian.py @@ -8,17 +8,16 @@ www.guardian.co.uk ''' from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag class Guardian(BasicNewsRecipe): title = u'The Guardian' - __author__ = 'Seabound' + __author__ = 'Seabound and Sujata Raman' language = _('English') oldest_article = 7 max_articles_per_feed = 20 remove_javascript = True - + timefmt = ' [%a, %d %b %Y]' keep_only_tags = [ dict(name='div', attrs={'id':["content","article_header","main-article-info",]}), @@ -30,20 +29,20 @@ class Guardian(BasicNewsRecipe): dict(name='ul', attrs={'id':["content-actions"]}), ] use_embedded_content = False - + no_stylesheets = True extra_css = ''' .article-attributes{font-size: x-small; font-family:Arial,Helvetica,sans-serif;} .h1{font-size: large ;font-family:georgia,serif; font-weight:bold;} .stand-first-alone{color:#666666; font-size:small; font-family:Arial,Helvetica,sans-serif;} .caption{color:#666666; font-size:x-small; font-family:Arial,Helvetica,sans-serif;} - #article-wrapper{font-size:small; font-family:Arial,Helvetica,sans-serif;} + #article-wrapper{font-size:small; font-family:Arial,Helvetica,sans-serif;font-weight:normal;} .main-article-info{font-family:Arial,Helvetica,sans-serif;} - #full-contents{font-size:small; font-family:Arial,Helvetica,sans-serif;} - #match-stats-summary{font-size:small; font-family:Arial,Helvetica,sans-serif;} + #full-contents{font-size:small; font-family:Arial,Helvetica,sans-serif;font-weight:normal;} + #match-stats-summary{font-size:small; font-family:Arial,Helvetica,sans-serif;font-weight:normal;} ''' - - + + feeds = [ ('Front Page', 'http://www.guardian.co.uk/rss'), @@ -57,21 +56,30 @@ class Guardian(BasicNewsRecipe): ('Comment','http://www.guardian.co.uk/commentisfree/rss'), ] - + def get_article_url(self, article): + url = article.get('guid', None) + if '/video/' in url or '/flyer/' in url or '/quiz/' in url or \ + '/gallery/' in url or 'ivebeenthere' in url or \ + 'pickthescore' in url or 'audioslideshow' in url : + url = None + return url + + + def preprocess_html(self, soup): - - for item in soup.findAll(style=True): + + for item in soup.findAll(style=True): del item['style'] - - for item in soup.findAll(face=True): + + for item in soup.findAll(face=True): del item['face'] for tag in soup.findAll(name=['ul','li']): tag.name = 'div' - + return soup - - - - + + + + diff --git a/src/calibre/web/feeds/recipes/recipe_monitor.py b/src/calibre/web/feeds/recipes/recipe_monitor.py new file mode 100644 index 0000000000..b2a6bd20a0 --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_monitor.py @@ -0,0 +1,85 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Darko Miletic ' + +''' +monitorcg.com +''' + +import re +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag + +class MonitorCG(BasicNewsRecipe): + title = 'Monitor online' + __author__ = 'Darko Miletic' + description = 'News from Montenegro' + publisher = 'MONITOR d.o.o. Podgorica' + category = 'news, politics, Montenegro' + oldest_article = 15 + max_articles_per_feed = 150 + no_stylesheets = True + encoding = 'utf-8' + use_embedded_content = False + language = _('Montenegrin') + lang ='sr-Latn-Me' + INDEX = 'http://www.monitorcg.com' + + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}' + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : lang + , 'pretty_print' : True + } + + preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] + + keep_only_tags = [dict(name='div', attrs={'id':'ja-current-content'})] + + remove_tags = [ dict(name=['object','link','embed']) + , dict(attrs={'class':['buttonheading','article-section']})] + + def preprocess_html(self, soup): + soup.html['xml:lang'] = self.lang + soup.html['lang'] = self.lang + mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)]) + mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")]) + soup.head.insert(0,mlang) + soup.head.insert(1,mcharset) + return self.adeify_images(soup) + + def parse_index(self): + totalfeeds = [] + soup = self.index_to_soup(self.INDEX) + cover_item = soup.find('div',attrs={'class':'ja-catslwi'}) + if cover_item: + dt = cover_item['onclick'].partition("location.href=")[2] + curl = self.INDEX + dt.strip("'") + lfeeds = [(u'Svi clanci', curl)] + for feedobj in lfeeds: + feedtitle, feedurl = feedobj + self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl)) + articles = [] + soup = self.index_to_soup(feedurl) + contitem = soup.find('div',attrs={'class':'article-content'}) + if contitem: + img = contitem.find('img') + if img: + self.cover_url = self.INDEX + img['src'] + for item in contitem.findAll('a'): + url = self.INDEX + item['href'] + title = self.tag_to_string(item) + articles.append({ + 'title' :title + ,'date' :'' + ,'url' :url + ,'description':'' + }) + totalfeeds.append((feedtitle, articles)) + return totalfeeds + + \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_republika.py b/src/calibre/web/feeds/recipes/recipe_republika.py new file mode 100644 index 0000000000..65577c9119 --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_republika.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Darko Miletic ' + +''' +republika.co.yu +''' + +import re +from calibre.web.feeds.news import BasicNewsRecipe + +class Republika(BasicNewsRecipe): + title = 'Republika' + __author__ = 'Darko Miletic' + description = 'Glasilo gradjanskog samooslobadjanja. Protiv stihije straha, mrznje i nasilja' + publisher = ' Zadruga Res Publica' + category = 'news, politics, Serbia' + language = _('Serbian') + lang = 'sr-Latn-RS' + oldest_article = 2 + max_articles_per_feed = 100 + no_stylesheets = True + encoding = 'cp1250' + use_embedded_content = False + INDEX = u'http://www.republika.co.yu/' + extra_css = ' @font-face {font-family: "serif1"; src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif} .naslov{font-size: x-large; font-weight: bold} .autor{font-size: small; font-weight: bold} ' + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : lang + , 'pretty_print' : True + } + + preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] + + keep_only_tags = [ dict(attrs={'class':'naslov'}) + , dict(attrs={'class':'text1'}) + ] + + remove_tags = [dict(name=['object','link','iframe','base','img'])] + + feeds = [(u'Svi clanci', INDEX)] + + def preprocess_html(self, soup): + attribs = [ 'style','font','valign' + ,'colspan','width','height' + ,'rowspan','summary','align' + ,'cellspacing','cellpadding' + ,'frames','rules','border' + ] + for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']): + item.name = 'div' + for attrib in attribs: + if item.has_key(attrib): + del item[attrib] + return soup + + def parse_index(self): + totalfeeds = [] + lfeeds = self.get_feeds() + for feedobj in lfeeds: + feedtitle, feedurl = feedobj + self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl)) + articles = [] + soup = self.index_to_soup(feedurl) + for item in soup.findAll('a', attrs={'class':'naslovLink'}): + url = item['href'] + title = self.tag_to_string(item) + articles.append({ + 'title' :title + ,'date' :'' + ,'url' :url + ,'description':'' + }) + totalfeeds.append((feedtitle, articles)) + return totalfeeds +