diff --git a/resources/recipes/danas.recipe b/resources/recipes/danas.recipe index 1e0e319334..38957b3d63 100644 --- a/resources/recipes/danas.recipe +++ b/resources/recipes/danas.recipe @@ -51,8 +51,14 @@ class Danas(BasicNewsRecipe): preprocess_regexps = [ (re.compile(u'\u0110'), lambda match: u'\u00D0') - ,(re.compile(u'\u201c'), lambda match: '"') - ,(re.compile(u'\u201e'), lambda match: '"') + ,(re.compile(u'\u2018'), lambda match: '‘') # left single quotation mark + ,(re.compile(u'\u2019'), lambda match: '’') # right single quotation mark + ,(re.compile(u'\u201a'), lambda match: '‘') # single low-9 quotation mark + ,(re.compile(u'\u201b'), lambda match: '’') # single high-reversed-9 quotation mark + ,(re.compile(u'\u201c'), lambda match: '“') # left double quotation mark + ,(re.compile(u'\u201d'), lambda match: '”') # right double quotation mark + ,(re.compile(u'\u201e'), lambda match: '“') # double low-9 quotation mark + ,(re.compile(u'\u201f'), lambda match: '”') # double high-reversed-9 quotation mark ] keep_only_tags = [dict(name='div', attrs={'id':'left'})] @@ -89,7 +95,9 @@ class Danas(BasicNewsRecipe): ,(u'Zvaka u pepeljari' , u'http://www.danas.rs/rss/rss.asp?column_id=56') ,(u'Vostani Serbie' , u'http://www.danas.rs/rss/rss.asp?column_id=57') ,(u'Med&Jad-a' , u'http://www.danas.rs/rss/rss.asp?column_id=58') - ,(u'Svetlosti pozornice' , u'http://www.danas.rs/rss/rss.asp?column_id=59') + ,(u'Svetlosti pozornice' , u'http://www.danas.rs/rss/rss.asp?column_id=59') + ,(u'Dva cvancika' , u'http://www.danas.rs/rss/rss.asp?column_id=65') + ,(u'Iz kornera' , u'http://www.danas.rs/rss/rss.asp?column_id=64') ] def preprocess_html(self, soup): diff --git a/resources/recipes/scientific_american.recipe b/resources/recipes/scientific_american.recipe index 3970684788..8896121092 100644 --- a/resources/recipes/scientific_american.recipe +++ b/resources/recipes/scientific_american.recipe @@ -12,96 +12,53 @@ from calibre.web.feeds.news import BasicNewsRecipe class ScientificAmerican(BasicNewsRecipe): title = u'Scientific American' description = u'Popular science. Monthly magazine.' - __author__ = 'Kovid Goyal and Sujata Raman' + __author__ = 'Kovid Goyal' language = 'en' remove_javascript = True - oldest_article = 30 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - extra_css = ''' - p{font-weight: normal; font-size:small} - li{font-weight: normal; font-size:small} - .headline p{font-size:x-small; font-family:Arial,Helvetica,sans-serif;} - h2{font-size:x-small;} - h3{font-size:x-small;font-family:Arial,Helvetica,sans-serif;} - ''' - remove_tags_before = dict(name='div', attrs={'class':'headline'}) + encoding = 'utf-8' - remove_tags_after = dict(id=['article']) - remove_tags = [ - dict(id=['sharetools', 'reddit']), - #dict(name='script'), - {'class':['float_left', 'atools']}, - {"class": re.compile(r'also-in-this')}, - dict(name='a',title = ["Get the Rest of the Article","Subscribe","Buy this Issue"]), - dict(name = 'img',alt = ["Graphic - Get the Rest of the Article"]), - dict(name='div', attrs={'class':['commentbox']}), - dict(name='h2', attrs={'class':['discuss_h2']}), - ] - - html2lrf_options = ['--base-font-size', '8'] - recursions = 1 - match_regexps = [r'article.cfm.id=\S+page=(2|3|4|5|6|7|8|9|10|11|12|13|14|15)'] + def print_version(self, url): + return url + '&print=true' def parse_index(self): soup = self.index_to_soup('http://www.scientificamerican.com/sciammag/') - monthtag = soup.find('div',attrs={'id':'magazine-main_col2'}) - month = self.tag_to_string(monthtag.contents[1]) - - - self.timefmt = ' [%s]'%(self.tag_to_string(month)) + month = self.tag_to_string(soup.find('p',attrs={'id':'articleDek'})) + self.timefmt = ' [%s]'%(' '.join(month.strip().split()[:2])) img = soup.find('img', alt='Scientific American Magazine', src=True) if img is not None: self.cover_url = img['src'] - features, feeds = [], [] - for p in soup.find(id='magazine-main_col2').findAll('p') : - a = p.find('a', href=True) - - if a is None: continue - desc = '' - s = p.find('span', attrs={'class':"sub"}) - desc = self.tag_to_string(s) - - article = { - 'url' : a['href'], - 'title' : self.tag_to_string(a), - 'date' : '', - 'description' : desc, - } - features.append(article) - feeds.append(('Features', features)) - - section = [] - title = None - - for x in soup.find(id='magazine-main_col1').findAll(['div', 'a']): - - if x.name == 'div': - - if section: - feeds.append((title, section)) - - title = self.tag_to_string(x) - section = [] - else: - - if 'article.cfm' in x['href']: - article = { - 'url' : x['href'], - 'title' : self.tag_to_string(x), - 'date': '', - 'description': '', - } - - section.append(article) - - if section: - feeds.append((title, section)) + feeds = [] + for div in soup.findAll('div', attrs={'class':['primaryCol', + 'secondaryCol']}): + current_section = None + for tag in div.findAll(['h2', 'ul']): + if tag.name == 'h2': + current_section = self.tag_to_string(tag).strip() + self.log('\tFound section:', current_section) + elif current_section is not None and tag.name == 'ul': + articles = [] + for li in tag.findAll('li'): + t = li.findAll('a', + attrs={'class':lambda x: x != 'thumb'}, + href=lambda x: x and 'article.cfm' in x) + if not t: + continue + t = t[-1] + title = self.tag_to_string(t) + url = t['href'] + desc = '' + p = li.find(attrs={'class':'dek'}) + if p is not None: + desc = self.tag_to_string(p) + articles.append({'title':title, 'url':url, + 'description':desc, 'date':''}) + self.log('\t\tFound article:', title, '\n\t\tat', url) + if articles: + feeds.append((current_section, articles)) + current_section = None return feeds - def postprocess_html(self, soup, first_fetch): if soup is not None: for span in soup.findAll('span', attrs={'class':'pagination'}): diff --git a/src/calibre/devices/android/driver.py b/src/calibre/devices/android/driver.py index c9c0827759..dd08c745b1 100644 --- a/src/calibre/devices/android/driver.py +++ b/src/calibre/devices/android/driver.py @@ -34,7 +34,7 @@ class ANDROID(USBMS): 0x227]}, # Samsung - 0x04e8 : { 0x681d : [0x0222, 0x0400], + 0x04e8 : { 0x681d : [0x0222, 0x0224, 0x0400], 0x681c : [0x0222, 0x0224, 0x0400], 0x6640 : [0x0100], }, diff --git a/src/calibre/ebooks/oeb/transforms/flatcss.py b/src/calibre/ebooks/oeb/transforms/flatcss.py index 7212bd33c6..7b83421097 100644 --- a/src/calibre/ebooks/oeb/transforms/flatcss.py +++ b/src/calibre/ebooks/oeb/transforms/flatcss.py @@ -219,7 +219,10 @@ class CSSFlattener(object): fnums = self.context.source.fnums if size[0] in ('+', '-'): # Oh, the warcrimes - esize = 3 + force_int(size) + try: + esize = 3 + force_int(size) + except: + esize = 3 if esize < 1: esize = 1 if esize > 7: diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index f62c4ce074..a9160f976f 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -456,7 +456,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): path=tpath, notify=False) self.conn.execute('UPDATE books SET path=? WHERE id=?', (path, id)) self.dirtied([id], commit=False) - self.commit() + self.conn.commit() self.data.set(id, self.FIELD_MAP['path'], path, row_is_id=True) # Delete not needed directories if current_path and os.path.exists(spath): diff --git a/src/calibre/utils/ipc/server.py b/src/calibre/utils/ipc/server.py index 1d14f6a128..380e2e074b 100644 --- a/src/calibre/utils/ipc/server.py +++ b/src/calibre/utils/ipc/server.py @@ -195,8 +195,7 @@ class Server(Thread): job.result = cPickle.load(open(worker.rfile, 'rb')) os.remove(worker.rfile) except: - import traceback - traceback.print_exc() + pass job.duration = time.time() - job.start_time self.changed_jobs_queue.put(job)