diff --git a/resources/recipes/danas.recipe b/resources/recipes/danas.recipe index 1e0e319334..38957b3d63 100644 --- a/resources/recipes/danas.recipe +++ b/resources/recipes/danas.recipe @@ -51,8 +51,14 @@ class Danas(BasicNewsRecipe): preprocess_regexps = [ (re.compile(u'\u0110'), lambda match: u'\u00D0') - ,(re.compile(u'\u201c'), lambda match: '"') - ,(re.compile(u'\u201e'), lambda match: '"') + ,(re.compile(u'\u2018'), lambda match: '‘') # left single quotation mark + ,(re.compile(u'\u2019'), lambda match: '’') # right single quotation mark + ,(re.compile(u'\u201a'), lambda match: '‘') # single low-9 quotation mark + ,(re.compile(u'\u201b'), lambda match: '’') # single high-reversed-9 quotation mark + ,(re.compile(u'\u201c'), lambda match: '“') # left double quotation mark + ,(re.compile(u'\u201d'), lambda match: '”') # right double quotation mark + ,(re.compile(u'\u201e'), lambda match: '“') # double low-9 quotation mark + ,(re.compile(u'\u201f'), lambda match: '”') # double high-reversed-9 quotation mark ] keep_only_tags = [dict(name='div', attrs={'id':'left'})] @@ -89,7 +95,9 @@ class Danas(BasicNewsRecipe): ,(u'Zvaka u pepeljari' , u'http://www.danas.rs/rss/rss.asp?column_id=56') ,(u'Vostani Serbie' , u'http://www.danas.rs/rss/rss.asp?column_id=57') ,(u'Med&Jad-a' , u'http://www.danas.rs/rss/rss.asp?column_id=58') - ,(u'Svetlosti pozornice' , u'http://www.danas.rs/rss/rss.asp?column_id=59') + ,(u'Svetlosti pozornice' , u'http://www.danas.rs/rss/rss.asp?column_id=59') + ,(u'Dva cvancika' , u'http://www.danas.rs/rss/rss.asp?column_id=65') + ,(u'Iz kornera' , u'http://www.danas.rs/rss/rss.asp?column_id=64') ] def preprocess_html(self, soup): diff --git a/resources/recipes/scientific_american.recipe b/resources/recipes/scientific_american.recipe index 3970684788..8896121092 100644 --- a/resources/recipes/scientific_american.recipe +++ b/resources/recipes/scientific_american.recipe @@ -12,96 +12,53 @@ from calibre.web.feeds.news import BasicNewsRecipe class ScientificAmerican(BasicNewsRecipe): title = u'Scientific American' description = u'Popular science. Monthly magazine.' - __author__ = 'Kovid Goyal and Sujata Raman' + __author__ = 'Kovid Goyal' language = 'en' remove_javascript = True - oldest_article = 30 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - extra_css = ''' - p{font-weight: normal; font-size:small} - li{font-weight: normal; font-size:small} - .headline p{font-size:x-small; font-family:Arial,Helvetica,sans-serif;} - h2{font-size:x-small;} - h3{font-size:x-small;font-family:Arial,Helvetica,sans-serif;} - ''' - remove_tags_before = dict(name='div', attrs={'class':'headline'}) + encoding = 'utf-8' - remove_tags_after = dict(id=['article']) - remove_tags = [ - dict(id=['sharetools', 'reddit']), - #dict(name='script'), - {'class':['float_left', 'atools']}, - {"class": re.compile(r'also-in-this')}, - dict(name='a',title = ["Get the Rest of the Article","Subscribe","Buy this Issue"]), - dict(name = 'img',alt = ["Graphic - Get the Rest of the Article"]), - dict(name='div', attrs={'class':['commentbox']}), - dict(name='h2', attrs={'class':['discuss_h2']}), - ] - - html2lrf_options = ['--base-font-size', '8'] - recursions = 1 - match_regexps = [r'article.cfm.id=\S+page=(2|3|4|5|6|7|8|9|10|11|12|13|14|15)'] + def print_version(self, url): + return url + '&print=true' def parse_index(self): soup = self.index_to_soup('http://www.scientificamerican.com/sciammag/') - monthtag = soup.find('div',attrs={'id':'magazine-main_col2'}) - month = self.tag_to_string(monthtag.contents[1]) - - - self.timefmt = ' [%s]'%(self.tag_to_string(month)) + month = self.tag_to_string(soup.find('p',attrs={'id':'articleDek'})) + self.timefmt = ' [%s]'%(' '.join(month.strip().split()[:2])) img = soup.find('img', alt='Scientific American Magazine', src=True) if img is not None: self.cover_url = img['src'] - features, feeds = [], [] - for p in soup.find(id='magazine-main_col2').findAll('p') : - a = p.find('a', href=True) - - if a is None: continue - desc = '' - s = p.find('span', attrs={'class':"sub"}) - desc = self.tag_to_string(s) - - article = { - 'url' : a['href'], - 'title' : self.tag_to_string(a), - 'date' : '', - 'description' : desc, - } - features.append(article) - feeds.append(('Features', features)) - - section = [] - title = None - - for x in soup.find(id='magazine-main_col1').findAll(['div', 'a']): - - if x.name == 'div': - - if section: - feeds.append((title, section)) - - title = self.tag_to_string(x) - section = [] - else: - - if 'article.cfm' in x['href']: - article = { - 'url' : x['href'], - 'title' : self.tag_to_string(x), - 'date': '', - 'description': '', - } - - section.append(article) - - if section: - feeds.append((title, section)) + feeds = [] + for div in soup.findAll('div', attrs={'class':['primaryCol', + 'secondaryCol']}): + current_section = None + for tag in div.findAll(['h2', 'ul']): + if tag.name == 'h2': + current_section = self.tag_to_string(tag).strip() + self.log('\tFound section:', current_section) + elif current_section is not None and tag.name == 'ul': + articles = [] + for li in tag.findAll('li'): + t = li.findAll('a', + attrs={'class':lambda x: x != 'thumb'}, + href=lambda x: x and 'article.cfm' in x) + if not t: + continue + t = t[-1] + title = self.tag_to_string(t) + url = t['href'] + desc = '' + p = li.find(attrs={'class':'dek'}) + if p is not None: + desc = self.tag_to_string(p) + articles.append({'title':title, 'url':url, + 'description':desc, 'date':''}) + self.log('\t\tFound article:', title, '\n\t\tat', url) + if articles: + feeds.append((current_section, articles)) + current_section = None return feeds - def postprocess_html(self, soup, first_fetch): if soup is not None: for span in soup.findAll('span', attrs={'class':'pagination'}): diff --git a/src/calibre/devices/android/driver.py b/src/calibre/devices/android/driver.py index c9c0827759..dd08c745b1 100644 --- a/src/calibre/devices/android/driver.py +++ b/src/calibre/devices/android/driver.py @@ -34,7 +34,7 @@ class ANDROID(USBMS): 0x227]}, # Samsung - 0x04e8 : { 0x681d : [0x0222, 0x0400], + 0x04e8 : { 0x681d : [0x0222, 0x0224, 0x0400], 0x681c : [0x0222, 0x0224, 0x0400], 0x6640 : [0x0100], }, diff --git a/src/calibre/ebooks/oeb/transforms/flatcss.py b/src/calibre/ebooks/oeb/transforms/flatcss.py index 7212bd33c6..7b83421097 100644 --- a/src/calibre/ebooks/oeb/transforms/flatcss.py +++ b/src/calibre/ebooks/oeb/transforms/flatcss.py @@ -219,7 +219,10 @@ class CSSFlattener(object): fnums = self.context.source.fnums if size[0] in ('+', '-'): # Oh, the warcrimes - esize = 3 + force_int(size) + try: + esize = 3 + force_int(size) + except: + esize = 3 if esize < 1: esize = 1 if esize > 7: diff --git a/src/calibre/gui2/add.py b/src/calibre/gui2/add.py index 9f246aeb93..1d7b5075b4 100644 --- a/src/calibre/gui2/add.py +++ b/src/calibre/gui2/add.py @@ -381,11 +381,7 @@ class Adder(QObject): # {{{ # }}} -############################################################################### -############################## END ADDER ###################################### -############################################################################### - -class Saver(QObject): +class Saver(QObject): # {{{ def __init__(self, parent, db, callback, rows, path, opts, spare_server=None): @@ -446,4 +442,5 @@ class Saver(QObject): self.pd.set_msg(_('Saved')+' '+title) if not ok: self.failures.add((title, tb)) +# }}} diff --git a/src/calibre/library/caches.py b/src/calibre/library/caches.py index 714579ec77..1e52350e46 100644 --- a/src/calibre/library/caches.py +++ b/src/calibre/library/caches.py @@ -48,6 +48,7 @@ class MetadataBackup(Thread): # {{{ time.sleep(2) if not self.dump_func([id_]): prints('Failed to backup metadata for id:', id_, 'again, giving up') + time.sleep(0.2) # Limit to five per second # }}} @@ -96,8 +97,12 @@ class CoverCache(Thread): # {{{ import traceback traceback.print_exc() continue - with self.lock: - self.cache[id_] = img + try: + with self.lock: + self.cache[id_] = img + except: + # Happens during interpreter shutdown + break def set_cache(self, ids): with self.lock: diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index a34ef9cf89..4775e13818 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -455,6 +455,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): self.add_format(id, format, stream, index_is_id=True, path=tpath, notify=False) self.conn.execute('UPDATE books SET path=? WHERE id=?', (path, id)) + self.dirtied([id], commit=False) + self.conn.commit() self.data.set(id, self.FIELD_MAP['path'], path, row_is_id=True) # Delete not needed directories if current_path and os.path.exists(spath): @@ -1922,7 +1924,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): mi.timestamp = utcnow() if mi.pubdate is None: mi.pubdate = utcnow() - self.set_metadata(id, mi) + self.set_metadata(id, mi, ignore_errors=True) if cover is not None: try: self.set_cover(id, cover) diff --git a/src/calibre/library/server/opds.py b/src/calibre/library/server/opds.py index bd5b2f36b3..f1aeb583db 100644 --- a/src/calibre/library/server/opds.py +++ b/src/calibre/library/server/opds.py @@ -160,7 +160,7 @@ def ACQUISITION_ENTRY(item, version, db, updated, CFM, CKEYS): for key in CKEYS: mi = db.get_metadata(item[CFM['id']['rec_index']], index_is_id=True) name, val = mi.format_field(key) - if not val: + if val: datatype = CFM[key]['datatype'] if datatype == 'text' and CFM[key]['is_multiple']: extra.append('%s: %s
'%(name, format_tag_string(val, ',', diff --git a/src/calibre/utils/ipc/server.py b/src/calibre/utils/ipc/server.py index 1d14f6a128..380e2e074b 100644 --- a/src/calibre/utils/ipc/server.py +++ b/src/calibre/utils/ipc/server.py @@ -195,8 +195,7 @@ class Server(Thread): job.result = cPickle.load(open(worker.rfile, 'rb')) os.remove(worker.rfile) except: - import traceback - traceback.print_exc() + pass job.duration = time.time() - job.start_time self.changed_jobs_queue.put(job)