From 1a5b92d6d915775428c38b6f99768c6def9bf012 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 18 Sep 2010 20:17:30 -0600 Subject: [PATCH 1/7] Popular Science by Tony Stegall --- resources/images/news/popscience.png | Bin 0 -> 737 bytes resources/recipes/popscience.recipe | 59 +++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) create mode 100644 resources/images/news/popscience.png create mode 100644 resources/recipes/popscience.recipe diff --git a/resources/images/news/popscience.png b/resources/images/news/popscience.png new file mode 100644 index 0000000000000000000000000000000000000000..ff33483b10421b209f599cdec809544db83988e7 GIT binary patch literal 737 zcmeAS@N?(olHy`uVBq!ia0vp^f*{Pn1|+R>-G2cowj^(N7l!{JxM1({$v_d#0*}aI zAngIhZYQ(tK!Rljj_E)ete>H;(O;c`fr-o0#WBRQAzgQ^}I3sDr zEs}bFIvJBwqaPUcE+q zV_I6|?mA{M9-)bsPx{?cF*TW?b4hbr!70&y>`8k(&R^ALSkjdC{oLLFhrH{D3|BB5 z++3S2wAxB`X6MG1oAo9tny1q&@~&|Ix*cct+12Xkr1dM_v-vukMc+6dzaS%8aB<~- z=Ec3i^}QNBjW4-xew9kfkg$1qYW;6M&)2@rRhM{I2d_LeUt4?Qz5d(f$?xZtJl$R_ zp2C&PnR-(tx17;{-JfYe-yEll+Y&;Tzl{0B8`A%!{a>lP4cGLk-+K5BuW$Vs&S3Fg zT!6XO<1zQSS6My6Z@NtOH*fT?TKb;xfT(T9{rZV+(#L;v%+Ol=!usZ%v&UTa{Iba| z=6$0cTsM!&|4!7|_{V)Q5%-p#FZ6$B&$E}avc3PbNrOeP)aqX%sp|x9+Zopj21+HEtq-c=jMqw+y!ipc6y(Dxk6agNLl^gnQ!yIcW&&+*;e!DAoGgf-;b?p zVA#*#rWWaIlH~$Q9;zj-5hW>!C8<`)MX5lF!N|bSK-a)h*U%!w$iT|b)XKzM*TBrm rz(8f@sU{Q+x%nxXX_Y7%jI0cS$Pl98PswvoB4+S(^>bP0l+XkK_>3?| literal 0 HcmV?d00001 diff --git a/resources/recipes/popscience.recipe b/resources/recipes/popscience.recipe new file mode 100644 index 0000000000..a1ea91a6ae --- /dev/null +++ b/resources/recipes/popscience.recipe @@ -0,0 +1,59 @@ +import re +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe1282101454(BasicNewsRecipe): + title = 'Popular Science' + language = 'en' + __author__ = 'TonytheBookworm' + description = 'Popular Science' + publisher = 'Popular Science' + category = 'gadgets,science' + oldest_article = 7 # change this if you want more current articles. I like to go a week in + max_articles_per_feed = 100 + no_stylesheets = True + remove_javascript = True + + masthead_url = 'http://www.raytheon.com/newsroom/rtnwcm/groups/Public/documents/masthead/rtn08_popscidec_masthead.jpg' + + remove_tags = [dict(name='div', attrs={'id':['toolbar','main_supplements']}), + dict(name='span', attrs={'class':['comments']}), + dict(name='div', attrs={'class':['relatedinfo related-right','node_navigation','content2']}), + dict(name='ul', attrs={'class':['item-list clear-block']})] + feeds = [ + + ('Gadgets', 'http://www.popsci.com/full-feed/gadgets'), + ('Cars', 'http://www.popsci.com/full-feed/cars'), + ('Science', 'http://www.popsci.com/full-feed/science'), + ('Technology', 'http://www.popsci.com/full-feed/technology'), + ('DIY', 'http://www.popsci.com/full-feed/diy'), + + ] + + + #The following will get read of the Gallery: links when found + + def preprocess_html(self, soup) : + print 'SOUP IS: ', soup + weblinks = soup.findAll(['head','h2']) + if weblinks is not None: + for link in weblinks: + if re.search('(Gallery)(:)',str(link)): + + link.parent.extract() + return soup + #----------------------------------------------------------------- + + + + + + + + + + + + + + + From 3b9e43e79ec323d2d7d4fd0b7b9900cec0d971ff Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 18 Sep 2010 21:00:28 -0600 Subject: [PATCH 2/7] EPUB metadata: Don't read timestamp value from epubs as I am sick of closing bugs about adding books and having the Date not be today. Does not affect reading of metadata from OPF, so it should still be possible to restore date when adding from a previously saved to disk folder (as long as the OPF was saved) --- src/calibre/ebooks/metadata/epub.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/calibre/ebooks/metadata/epub.py b/src/calibre/ebooks/metadata/epub.py index 041a1ee603..df9a394258 100644 --- a/src/calibre/ebooks/metadata/epub.py +++ b/src/calibre/ebooks/metadata/epub.py @@ -176,6 +176,7 @@ def get_metadata(stream, extract_cover=True): except: import traceback traceback.print_exc() + mi.timestamp = None return mi def get_quick_metadata(stream): From dec27fbaa1e9544675d6d10bda566d83fd7a85f2 Mon Sep 17 00:00:00 2001 From: ldolse Date: Sun, 19 Sep 2010 13:02:02 +0800 Subject: [PATCH 3/7] new dehyphenation algorithm, using the document as a dictionary --- src/calibre/ebooks/conversion/preprocess.py | 58 +++++++++++++++++++-- src/calibre/ebooks/conversion/utils.py | 13 +++-- 2 files changed, 62 insertions(+), 9 deletions(-) diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index 03a0047927..a1e28b2554 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -106,6 +106,50 @@ def line_length(format, raw, percent): return lengths[index] +class Dehyphenator(object): + ''' + Analyzes words to determine whether hyphens should be retained/removed. Uses the document + itself is as a dictionary. This method handles all languages along with uncommon, made-up, and + scientific words. The primary disadvantage is that words appearing only once in the document + retain hyphens. + ''' + + def dehyphenate(self, match): + firsthalf = match.group('firstpart') + secondhalf = match.group('secondpart') + hyphenated = str(firsthalf) + "-" + str(secondhalf) + dehyphenated = str(firsthalf) + str(secondhalf) + # Add common suffixes to the regex below to increase the likelihood of a match - + # don't add suffixes which are also complete words, such as 'able' or 'sex' + removesuffixes = re.compile(r"((ed)?ly|(')?s|a?(t|s)ion(s|al(ly)?)?|ings?|(i)?ous|(i|a)ty|(it)?ies|ive|gence|istic|(e|a)nce|ment(s)?|ism|ated|(e|u)ct(ed)?|ed|(i|ed)?ness|(e|a)ncy|ble|ier|al|ex)$", re.IGNORECASE) + lookupword = removesuffixes.sub('', dehyphenated) + # remove prefixes if the prefix was not already the point of hyphenation + prefixes = re.compile(r'^(un|in|ex)$', re.IGNORECASE) + removeprefix = re.compile(r'^(un|in|ex)', re.IGNORECASE) + if prefixes.match(firsthalf) is None: + lookupword = removeprefix.sub('', lookupword) + booklookup = re.compile(u'%s' % lookupword, re.IGNORECASE) + #print "lookup word is: "+str(lookupword)+", orig is: " + str(hyphenated) + match = booklookup.search(self.html) + if match: + #print "returned dehyphenated word: " + str(dehyphenated) + return dehyphenated + else: + #print "returned hyphenated word: " + str(hyphenated) + return hyphenated + + def __call__(self, html, format, length=1): + self.html = html + if format == 'html': + intextmatch = re.compile(u'(?<=.{%i})(?P[^“"\s>]+)-\s*(?=<)(\s*(\s*<[iubp][^>]*>\s*)?]*>|\s*<[iubp][^>]*>)?\s*(?P[\w\d]+)' % length) + elif format == 'pdf': + intextmatch = re.compile(u'(?<=.{%i})(?P[^“"\s>]+)-\s*(

|\s*

\s*<[iub]>)\s*(?P[\w\d]+)'% length) + elif format == 'individual_words': + intextmatch = re.compile('>[^<]*\b(?P[^"\s>]+)-(?P\s*(?=[[a-z\d])'), lambda match: '')) + # unwrap em/en dashes + end_rules.append((re.compile(u'(?<=[–—])\s*

\s*(?=[[a-z\d])'), lambda match: '')) # unwrap/delete soft hyphens end_rules.append((re.compile(u'[­](\s*

)+\s*(?=[[a-z\d])'), lambda match: '')) # unwrap/delete soft hyphens with formatting @@ -350,7 +393,7 @@ class HTMLPreProcessor(object): # print "The pdf line length returned is " + str(length) end_rules.append( # Un wrap using punctuation - (re.compile(r'(?<=.{%i}([a-z,:)\-IA]|(?)?\s*(\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines), + (re.compile(r'(?<=.{%i}([a-z,:)\IA]|(?)?\s*(\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines), ) for rule in self.PREPROCESS + start_rules: @@ -380,6 +423,11 @@ class HTMLPreProcessor(object): for rule in rules + end_rules: html = rule[0].sub(rule[1], html) + if is_pdftohtml: + # Dehyphenate + dehyphenator = Dehyphenator() + html = dehyphenator(html,'pdf', length) + #dump(html, 'post-preprocess') # Handle broken XHTML w/ SVG (ugh) diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py index 37fd169cb1..f9178ead0b 100644 --- a/src/calibre/ebooks/conversion/utils.py +++ b/src/calibre/ebooks/conversion/utils.py @@ -6,7 +6,7 @@ __copyright__ = '2010, Kovid Goyal ' __docformat__ = 'restructuredtext en' import re -from calibre.ebooks.conversion.preprocess import line_length +from calibre.ebooks.conversion.preprocess import line_length, Dehyphenator from calibre.utils.logging import default_log class PreProcessor(object): @@ -132,7 +132,6 @@ class PreProcessor(object): # Arrange line feeds and

tags so the line_length and no_markup functions work correctly html = re.sub(r"\s*

", "

\n", html) html = re.sub(r"\s*

\s*", "\n

", html) - #self.log("\n\n\n\n\n\n\n\n\n\n\n"+html+"\n\n\n\n\n\n\n\n\n\n\n\n\n") # detect chapters/sections to match xpath or splitting logic heading = re.compile(']*>', re.IGNORECASE) self.html_preprocess_sections = len(heading.findall(html)) @@ -174,10 +173,16 @@ class PreProcessor(object): length = line_length(format, html, getattr(self.extra_opts, 'html_unwrap_factor', 0.4)) self.log("*** Median line length is " + str(length) + ", calculated with " + format + " format ***") + max_length = length * 1.4 + min_max = str("(?<=.{"+str(length)+"})(?\s*(\s*<[iubp][^>]*>\s*)?]*>|\s*<[iubp][^>]*>)?\s*', '', html) - html = re.sub(u'(?<=[-\u2013\u2014])\s*(?=<)(\s*(\s*<[iubp][^>]*>\s*)?]*>|\s*<[iubp][^>]*>)?\s*(?=[[a-z\d])', '', html) + html = re.sub(u'%s(?<=[\u2013\u2014])\s*(?=<)(\s*(\s*<[iubp][^>]*>\s*)?]*>|\s*<[iubp][^>]*>)?\s*(?=[[a-z\d])' % min_max, '', html) + # Dehyphenate + dehyphenator = Dehyphenator() + html = dehyphenator(html,'html', length) # Unwrap lines using punctation and line length unwrap = re.compile(r"(?<=.{%i}([a-z,;):\IA]|(?\s*()?\s*(?P<(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*\s*)\s*){0,3}\s*<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*" % length, re.UNICODE) From 053d60331fcfb9f82e141ebc11a625b1acd3e1a4 Mon Sep 17 00:00:00 2001 From: ldolse Date: Sun, 19 Sep 2010 23:07:07 +0800 Subject: [PATCH 4/7] regex optimizations --- src/calibre/ebooks/conversion/preprocess.py | 2 +- src/calibre/ebooks/conversion/utils.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index 16bfb42d1f..7f13cefcaa 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -121,7 +121,7 @@ class Dehyphenator(object): dehyphenated = str(firsthalf) + str(secondhalf) # Add common suffixes to the regex below to increase the likelihood of a match - # don't add suffixes which are also complete words, such as 'able' or 'sex' - removesuffixes = re.compile(r"((ed)?ly|(')?s|a?(t|s)ion(s|al(ly)?)?|ings?|(i)?ous|(i|a)ty|(it)?ies|ive|gence|istic|(e|a)nce|ment(s)?|ism|ated|(e|u)ct(ed)?|ed|(i|ed)?ness|(e|a)ncy|ble|ier|al|ex)$", re.IGNORECASE) + removesuffixes = re.compile(r"((ed)?ly|('e)?s|a?(t|s)ion(s|al(ly)?)?|ings?|(i)?ous|(i|a)ty|(it)?ies|ive|gence|istic|(e|a)nce|ment(s)?|ism|ated|(e|u)ct(ed)?|ed|(i|ed)?ness|(e|a)ncy|ble|ier|al|ex)$", re.IGNORECASE) lookupword = removesuffixes.sub('', dehyphenated) # remove prefixes if the prefix was not already the point of hyphenation prefixes = re.compile(r'^(un|in|ex)$', re.IGNORECASE) diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py index f9178ead0b..6a5eaa4a34 100644 --- a/src/calibre/ebooks/conversion/utils.py +++ b/src/calibre/ebooks/conversion/utils.py @@ -114,7 +114,7 @@ class PreProcessor(object): html = re.sub(ur'\s*\s*', ' ', html) # Get rid of empty span, bold, & italics tags html = re.sub(r"\s*]*>\s*(]>\s*){0,2}\s*\s*", " ", html) - html = re.sub(r"\s*<[ibu]>\s*(<[ibu]>\s*\s*){0,2}\s*", " ", html) + html = re.sub(r"\s*<[ibu][^>]*>\s*(<[ibu][^>]*>\s*\s*){0,2}\s*", " ", html) html = re.sub(r"\s*]*>\s*(]>\s*){0,2}\s*\s*", " ", html) # If more than 40% of the lines are empty paragraphs then delete them to clean up spacing @@ -139,16 +139,16 @@ class PreProcessor(object): # # Start with most typical chapter headings, get more aggressive until one works if self.html_preprocess_sections < 10: - chapdetect = re.compile(r'(?=]*>)\s*(<[ibu]>){0,2}\s*(]*>)?\s*(<[ibu]>){0,2}\s*(]*>)?\s*(?P(<[ibu]>){0,2}\s*.?(Introduction|Synopsis|Acknowledgements|Chapter|Epilogue|Volume|Prologue|Book\s|Part\s|Dedication)\s*([\d\w-]+\:?\s*){0,8}\s*(){0,2})\s*()?s*(){0,2}\s*()?\s*()\s*\s*(\s*]*>\s*

){0,2}\s*(<(/?br|p)[^>]*>\s*(<[ibu]>){0,2}\s*(]*>)?\s*(?P(<[ibu]>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.IGNORECASE|re.VERBOSE) + chapdetect = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<[ibu][^>]*>){0,2}\s*.?(Introduction|Synopsis|Acknowledgements|Chapter|Epilogue|Volume|Prologue|Book\s|Part\s|Dedication)\s*([\d\w-]+\:?\s*){0,8}\s*(</[ibu]>){0,2})\s*(</span>)?s*(</[ibu]>){0,2}\s*(</span>)?\s*(</(p|/?br)>)\s*\s*(\s*<p[^>]*>\s*</p>){0,2}\s*(<(/?br|p)[^>]*>\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<[ibu][^>]*>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.IGNORECASE|re.VERBOSE) html = chapdetect.sub(self.chapter_head, html) if self.html_preprocess_sections < 10: self.log("not enough chapters, only " + str(self.html_preprocess_sections) + ", trying numeric chapters") - chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<[ibu]>){0,2}\s*.?(\d+\.?|(CHAPTER\s*([\dA-Z\-\'\"\?\.!#,]+\s*){1,10}))\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<[ibu]>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.UNICODE) + chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<[ibu][^>]*>){0,2}\s*.?(\d+\.?|(CHAPTER\s*([\dA-Z\-\'\"\?\.!#,]+\s*){1,10}))\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<[ibu][^>]*>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.UNICODE) html = chapdetect2.sub(self.chapter_head, html) if self.html_preprocess_sections < 10: self.log("not enough chapters, only " + str(self.html_preprocess_sections) + ", trying with uppercase words") - chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<[ibu]>){0,2}\s*.?([A-Z#\-\s]+)\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<[ibu]>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.UNICODE) + chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<[ibu][^>]*>){0,2}\s*.?([A-Z#\-\s]+)\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<[ibu][^>]*>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.UNICODE) html = chapdetect2.sub(self.chapter_head, html) ###### Unwrap lines ###### @@ -191,7 +191,7 @@ class PreProcessor(object): # If still no sections after unwrapping mark split points on lines with no punctuation if self.html_preprocess_sections < 10: self.log("Looking for more split points based on punctuation, currently have " + str(self.html_preprocess_sections)) - chapdetect3 = re.compile(r'<(?P<styles>(p|div)[^>]*)>\s*(?P<section>(<span[^>]*>)?\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*.?([a-z#-*]+\s*){1,5}\s*\s*(</span>)?(</[ibu]>){0,2}\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</span>)?\s*</(p|div)>)', re.IGNORECASE) + chapdetect3 = re.compile(r'<(?P<styles>(p|div)[^>]*)>\s*(?P<section>(<span[^>]*>)?\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*.?(?=[a-z#\-*\s]+<)([a-z#-*]+\s*){1,5}\s*\s*(</span>)?(</[ibu]>){0,2}\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</span>)?\s*</(p|div)>)', re.IGNORECASE) html = chapdetect3.sub(self.chapter_break, html) # search for places where a first or second level heading is immediately followed by another # top level heading. demote the second heading to h3 to prevent splitting between chapter From 980388f2bde3d4cb4b07673cb9e79c951aabd867 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Sun, 19 Sep 2010 09:48:39 -0600 Subject: [PATCH 5/7] Le Journal de Montreal by Luciano Furtado. Fixes #405 (New news feed) --- resources/recipes/le_journal.recipe | 43 +++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 resources/recipes/le_journal.recipe diff --git a/resources/recipes/le_journal.recipe b/resources/recipes/le_journal.recipe new file mode 100644 index 0000000000..24a7d52164 --- /dev/null +++ b/resources/recipes/le_journal.recipe @@ -0,0 +1,43 @@ +__author__ = ' (lrfurtado@yahoo.com.br)' + +from calibre.web.feeds.news import BasicNewsRecipe + +class LeJournalDeMontrealRecipe(BasicNewsRecipe): + + title = u'Le Journal de Montreal' + description = u'Le Journal de Montreal' + __author__ = 'Luciano Furtado' + language = 'fr' + + oldest_article = 7 + use_embedded_content=0 + max_articles_per_feed = 15 + + remove_tags = [ + dict(name='ul',attrs={'id':'mainNav'}), + dict(name='div',attrs={'id':'boxPolitique'}), + dict(name='div',attrs={'id':'boxScoop'}), + dict(name='div',attrs={'id':'DossierSpec'}), + dict(name='div',attrs={'id':'channelBoxes'}), + dict(name='div',attrs={'id':'sectionBoxes'}), + dict(name='div',attrs={'id':'header'}), + dict(name='div',attrs={'id':'footer'}), + dict(name='div',attrs={'id':'navbarCanoe_container'}), + dict(name='div',attrs={'id':'popularCanoe'}), + dict(name='div',attrs={'id':'textAds'}), + dict(name='div',attrs={'id':'24heures'}), + dict(name='div',attrs={'class':'bottomBox clear'}), + dict(name='div',attrs={'class':'articleControls thin'}), + ] + + + feeds = [ + (u'Actualites', + u'http://www.canoe.com/rss/feed/nouvelles/ljm_actualites.xml'), + (u'Arts et spectacle', + u'http://www.canoe.com/rss/feed/nouvelles/ljm_arts.xml'), + (u'Sports', + u'http://www.canoe.com/rss/feed/nouvelles/ljm_sports.xml'), + (u'Chroniques', + u'http://www.canoe.com/rss/feed/nouvelles/ljm_chroniques.xml'), + ] From 23cd4fd7833180d7036aa77c0c1efcbd09ca6a00 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Sun, 19 Sep 2010 10:16:41 -0600 Subject: [PATCH 6/7] Content server: Making serving of large files more efficient. --- src/calibre/library/server/content.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/library/server/content.py b/src/calibre/library/server/content.py index 95794a8c1d..aeba8a3218 100644 --- a/src/calibre/library/server/content.py +++ b/src/calibre/library/server/content.py @@ -184,7 +184,7 @@ class ContentServer(object): if path and os.path.exists(path): updated = fromtimestamp(os.stat(path).st_mtime) cherrypy.response.headers['Last-Modified'] = self.last_modified(updated) - return fmt.read() + return fmt # }}} From ef3fd4df536811ca7b91be06ab10595ae1dc6a4c Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Sun, 19 Sep 2010 10:39:45 -0600 Subject: [PATCH 7/7] ... --- resources/content_server/gui.js | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/resources/content_server/gui.js b/resources/content_server/gui.js index afc21137e1..bd0743a854 100644 --- a/resources/content_server/gui.js +++ b/resources/content_server/gui.js @@ -84,7 +84,10 @@ function render_book(book) { } title += '</span>' title += '<span class="tagdata_long" style="display:none">' - if (tags) title += 'Tags=[{0}] '.format(tags); + if (tags) { + t = tags.split(':&:', 2); + title += 'Tags=[{0}] '.format(t[1]); + } custcols = book.attr("custcols").split(',') for ( i = 0; i < custcols.length; i++) { if (custcols[i].length > 0) {