diff --git a/Changelog.yaml b/Changelog.yaml index 17f3ebcf97..01425ec2ca 100644 --- a/Changelog.yaml +++ b/Changelog.yaml @@ -19,6 +19,57 @@ # new recipes: # - title: +- version: 0.8.48 + date: 2012-04-20 + + new features: + - title: "Conversion: The search and replace feature has been completely revamped." + description: "You can now use any number of search and replace + expression, not just three. You can also store and load frequently used + sets of search and replace expressions. Also, the wizard generates its + preview in a separate process to protect against crashes/memory leaks." + tickets: [983476,983484,983478] + + - title: "Support for the new '.azw3' files that Amazon recently started generating. calibre will now detect them as ebooks. It can also view/convert them, if they are DRM free." + + - title: "Drivers for Samsung Galaxy ACE GT-S5830L and HTC One X" + tickets: [981185] + + bug fixes: + - title: "Get Books: Support the new website design of Barnes & Noble" + + - title: "T1 driver: Fix books sent to SD card sometimes resulting problems when deleted." + tickets: [943586] + + - title: "Do not allow author names to be set to blank via the Manage authors function. Blank authors are now automatically set to 'Unknown'" + + - title: "MOBI Output: Handle background color specified on and in addition to tags." + tickets: [980813] + + - title: "MOBI Output: Fix underline style applied to parent element not getting inherited by children." + tickets: [985711] + + improved recipes: + - xkcd + - Metro Nieuws + - Calgary Herald + - Orlando Sentinel + - countryfile + - Heise + + new recipes: + - title: Various new Polish news sources + author: fenuks + + - title: Various Italian news sources + author: faber1971 + + - title: Jakarta Globe + author: rty + + - title: Acim Bilim Dergisi + author: thomass + - version: 0.8.47 date: 2012-04-13 diff --git a/recipes/acim_bilim_dergisi.recipe b/recipes/acim_bilim_dergisi.recipe new file mode 100644 index 0000000000..5d674fe93a --- /dev/null +++ b/recipes/acim_bilim_dergisi.recipe @@ -0,0 +1,27 @@ +# -*- coding: utf-8 -*- + +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe1334868409(BasicNewsRecipe): + title = u'AÇIK BİLİM DERGİSİ' + description = ' Aylık çevrimiçi bilim dergisi' + __author__ = u'thomass' + oldest_article = 30 + max_articles_per_feed = 300 + auto_cleanup = True + encoding = 'UTF-8' + publisher = 'açık bilim' + category = 'haber, bilim,TR,dergi' + language = 'tr' + publication_type = 'magazine ' + conversion_options = { + 'tags' : category + ,'language' : language + ,'publisher' : publisher + ,'linearize_tables': True + } + cover_img_url = 'http://www.acikbilim.com/wp-content/themes/Equilibrium/images/logodene.jpg' + masthead_url = 'http://www.acikbilim.com/wp-content/themes/Equilibrium/images/logodene.jpg' + + + feeds = [(u'Tüm Yayınlar', u'http://www.acikbilim.com/feed')] diff --git a/recipes/adventure_zone_pl.recipe b/recipes/adventure_zone_pl.recipe index 65f4e3e52d..bb311606ac 100644 --- a/recipes/adventure_zone_pl.recipe +++ b/recipes/adventure_zone_pl.recipe @@ -9,6 +9,7 @@ class Adventure_zone(BasicNewsRecipe): no_stylesheets = True oldest_article = 20 max_articles_per_feed = 100 + index='http://www.adventure-zone.info/fusion/' use_embedded_content=False preprocess_regexps = [(re.compile(r"", re.IGNORECASE), lambda m: '')] remove_tags_before= dict(name='td', attrs={'class':'main-bg'}) @@ -45,6 +46,19 @@ class Adventure_zone(BasicNewsRecipe): skip_tag = skip_tag.findAll(name='a') for r in skip_tag: if r.strong: - word=r.strong.string - if word and (('zapowied' in word) or ('recenzj' in word) or ('solucj' in word)): - return self.index_to_soup('http://www.adventure-zone.info/fusion/print.php?type=A&item'+r['href'][r['href'].find('article_id')+7:], raw=True) \ No newline at end of file + word=r.strong.string.lower() + if word and (('zapowied' in word) or ('recenzj' in word) or ('solucj' in word) or ('poradnik' in word)): + return self.index_to_soup('http://www.adventure-zone.info/fusion/print.php?type=A&item'+r['href'][r['href'].find('article_id')+7:], raw=True) + + def preprocess_html(self, soup): + footer=soup.find(attrs={'class':'news-footer middle-border'}) + if footer and len(footer('a'))>=2: + footer('a')[1].extract() + for item in soup.findAll(style=True): + del item['style'] + for a in soup('a'): + if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']: + a['href']=self.index + a['href'] + return soup + + \ No newline at end of file diff --git a/recipes/benchmark_pl.recipe b/recipes/benchmark_pl.recipe index cc74cc9128..00eea1be68 100644 --- a/recipes/benchmark_pl.recipe +++ b/recipes/benchmark_pl.recipe @@ -68,4 +68,7 @@ class Benchmark_pl(BasicNewsRecipe): self.image_article(soup, soup.body) else: self.append_page(soup, soup.body) + for a in soup('a'): + if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']: + a['href']=self.INDEX + a['href'] return soup diff --git a/recipes/calgary_herald.recipe b/recipes/calgary_herald.recipe index dc919a76f8..12134bc9a4 100644 --- a/recipes/calgary_herald.recipe +++ b/recipes/calgary_herald.recipe @@ -1,220 +1,35 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -__license__ = 'GPL v3' - -''' -www.canada.com -''' - -import re -from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup - - -class CanWestPaper(BasicNewsRecipe): - - # un-comment the following four lines for the Victoria Times Colonist -## title = u'Victoria Times Colonist' -## url_prefix = 'http://www.timescolonist.com' -## description = u'News from Victoria, BC' -## fp_tag = 'CAN_TC' - - # un-comment the following four lines for the Vancouver Province -## title = u'Vancouver Province' -## url_prefix = 'http://www.theprovince.com' -## description = u'News from Vancouver, BC' -## fp_tag = 'CAN_VP' - - # un-comment the following four lines for the Vancouver Sun -## title = u'Vancouver Sun' -## url_prefix = 'http://www.vancouversun.com' -## description = u'News from Vancouver, BC' -## fp_tag = 'CAN_VS' - - # un-comment the following four lines for the Edmonton Journal -## title = u'Edmonton Journal' -## url_prefix = 'http://www.edmontonjournal.com' -## description = u'News from Edmonton, AB' -## fp_tag = 'CAN_EJ' - - # un-comment the following four lines for the Calgary Herald - title = u'Calgary Herald' - url_prefix = 'http://www.calgaryherald.com' - description = u'News from Calgary, AB' - fp_tag = 'CAN_CH' - - # un-comment the following four lines for the Regina Leader-Post -## title = u'Regina Leader-Post' -## url_prefix = 'http://www.leaderpost.com' -## description = u'News from Regina, SK' -## fp_tag = '' - - # un-comment the following four lines for the Saskatoon Star-Phoenix -## title = u'Saskatoon Star-Phoenix' -## url_prefix = 'http://www.thestarphoenix.com' -## description = u'News from Saskatoon, SK' -## fp_tag = '' - - # un-comment the following four lines for the Windsor Star -## title = u'Windsor Star' -## url_prefix = 'http://www.windsorstar.com' -## description = u'News from Windsor, ON' -## fp_tag = 'CAN_' - - # un-comment the following four lines for the Ottawa Citizen -## title = u'Ottawa Citizen' -## url_prefix = 'http://www.ottawacitizen.com' -## description = u'News from Ottawa, ON' -## fp_tag = 'CAN_OC' - - # un-comment the following four lines for the Montreal Gazette -## title = u'Montreal Gazette' -## url_prefix = 'http://www.montrealgazette.com' -## description = u'News from Montreal, QC' -## fp_tag = 'CAN_MG' - - - language = 'en_CA' - __author__ = 'Nick Redding' - no_stylesheets = True - timefmt = ' [%b %d]' - extra_css = ''' - .timestamp { font-size:xx-small; display: block; } - #storyheader { font-size: medium; } - #storyheader h1 { font-size: x-large; } - #storyheader h2 { font-size: large; font-style: italic; } - .byline { font-size:xx-small; } - #photocaption { font-size: small; font-style: italic } - #photocredit { font-size: xx-small; }''' - keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})] - remove_tags = [{'class':'comments'}, - dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}), - dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}), - dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}), - dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}), - dict(name='div', attrs={'class':'rule_grey_solid'}), - dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})] - - def get_cover_url(self): - from datetime import timedelta, date - if self.fp_tag=='': - return None - cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg' - br = BasicNewsRecipe.get_browser() - daysback=1 - try: - br.open(cover) - except: - while daysback<7: - cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str((date.today() - timedelta(days=daysback)).day)+'/lg/'+self.fp_tag+'.jpg' - br = BasicNewsRecipe.get_browser() - try: - br.open(cover) - except: - daysback = daysback+1 - continue - break - if daysback==7: - self.log("\nCover unavailable") - cover = None - return cover - - def fixChars(self,string): - # Replace lsquo (\x91) - fixed = re.sub("\x91","‘",string) - # Replace rsquo (\x92) - fixed = re.sub("\x92","’",fixed) - # Replace ldquo (\x93) - fixed = re.sub("\x93","“",fixed) - # Replace rdquo (\x94) - fixed = re.sub("\x94","”",fixed) - # Replace ndash (\x96) - fixed = re.sub("\x96","–",fixed) - # Replace mdash (\x97) - fixed = re.sub("\x97","—",fixed) - fixed = re.sub("’","’",fixed) - return fixed - - def massageNCXText(self, description): - # Kindle TOC descriptions won't render certain characters - if description: - massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES)) - # Replace '&' with '&' - massaged = re.sub("&","&", massaged) - return self.fixChars(massaged) - else: - return description - - def populate_article_metadata(self, article, soup, first): - if first: - picdiv = soup.find('body').find('img') - if picdiv is not None: - self.add_toc_thumbnail(article,re.sub(r'links\\link\d+\\','',picdiv['src'])) - xtitle = article.text_summary.strip() - if len(xtitle) == 0: - desc = soup.find('meta',attrs={'property':'og:description'}) - if desc is not None: - article.summary = article.text_summary = desc['content'] - - def strip_anchors(self,soup): - paras = soup.findAll(True) - for para in paras: - aTags = para.findAll('a') - for a in aTags: - if a.img is None: - a.replaceWith(a.renderContents().decode('cp1252','replace')) - return soup - - def preprocess_html(self, soup): - return self.strip_anchors(soup) - - - - def parse_index(self): - soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html') - - articles = {} - key = 'News' - ans = ['News'] - - # Find each instance of class="sectiontitle", class="featurecontent" - for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}): - #self.log(" div class = %s" % divtag['class']) - if divtag['class'].startswith('section_title'): - # div contains section title - if not divtag.h3: - continue - key = self.tag_to_string(divtag.h3,False) - ans.append(key) - self.log("Section name %s" % key) - continue - # div contains article data - h1tag = divtag.find('h1') - if not h1tag: - continue - atag = h1tag.find('a',href=True) - if not atag: - continue - url = self.url_prefix+'/news/todays-paper/'+atag['href'] - #self.log("Section %s" % key) - #self.log("url %s" % url) - title = self.tag_to_string(atag,False) - #self.log("title %s" % title) - pubdate = '' - description = '' - ptag = divtag.find('p'); - if ptag: - description = self.tag_to_string(ptag,False) - #self.log("description %s" % description) - author = '' - autag = divtag.find('h4') - if autag: - author = self.tag_to_string(autag,False) - #self.log("author %s" % author) - if not articles.has_key(key): - articles[key] = [] - articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content='')) - - ans = [(key, articles[key]) for key in ans if articles.has_key(key)] - return ans +from calibre.web.feeds.news import BasicNewsRecipe + +class CalgaryHerald(BasicNewsRecipe): + title = u'Calgary Herald' + oldest_article = 3 + max_articles_per_feed = 100 + + feeds = [ + (u'News', u'http://rss.canada.com/get/?F233'), + (u'Calgary', u'http://www.calgaryherald.com/scripts/sp6query.aspx?catalog=cahr&tags=keyword|calgary&output=rss?link=http%3a%2f%2fwww.calgaryherald'), + (u'Alberta', u'http://www.calgaryherald.com/scripts/Sp6Query.aspx?catalog=CAHR&tags=Keyword|Alberta&output=rss?link=http%3A%2F%2Fwww.calgaryherald.com%2Fnews%2Falberta%2Findex.html'), + (u'Politics', u'http://rss.canada.com/get/?F7551'), + (u'National', u'http://rss.canada.com/get/?F7552'), + (u'World', u'http://rss.canada.com/get/?F7553'), + ] + __author__ = 'rty' + pubisher = 'Calgary Herald' + description = 'Calgary, Alberta, Canada' + category = 'News, Calgary, Alberta, Canada' + + + remove_javascript = True + use_embedded_content = False + no_stylesheets = True + language = 'en_CA' + encoding = 'utf-8' + conversion_options = {'linearize_tables':True} + ##masthead_url = 'http://www.calgaryherald.com/index.html' + keep_only_tags = [ + dict(name='div', attrs={'id':'storyheader'}), + dict(name='div', attrs={'id':'storycontent'}) + + ] + remove_tags_after = {'class':"story_tool_hr"} + diff --git a/recipes/camera_di_commercio_di_bari.recipe b/recipes/camera_di_commercio_di_bari.recipe new file mode 100644 index 0000000000..c80a825883 --- /dev/null +++ b/recipes/camera_di_commercio_di_bari.recipe @@ -0,0 +1,17 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe1331729727(BasicNewsRecipe): + title = u'Camera di Commercio di Bari' + oldest_article = 7 + __author__ = 'faber1971' + description = 'News from the Chamber of Commerce of Bari' + language = 'it' + max_articles_per_feed = 100 + auto_cleanup = True + masthead_url = 'http://www.ba.camcom.it/grafica/layout-bordo/logo_camcom_bari.png' + feeds = [(u'Camera di Commercio di Bari', u'http://feed43.com/4715147488845101.xml')] + +__license__ = 'GPL v3' +__copyright__ = '2012, faber1971' +__version__ = 'v1.00' +__date__ = '17, April 2012' diff --git a/recipes/cd_action.recipe b/recipes/cd_action.recipe index ff46774dc9..4e19fbc6c1 100644 --- a/recipes/cd_action.recipe +++ b/recipes/cd_action.recipe @@ -6,6 +6,7 @@ class CD_Action(BasicNewsRecipe): description = 'cdaction.pl - polish games magazine site' category = 'games' language = 'pl' + index='http://www.cdaction.pl' oldest_article = 8 max_articles_per_feed = 100 no_stylesheets= True @@ -17,4 +18,10 @@ class CD_Action(BasicNewsRecipe): def get_cover_url(self): soup = self.index_to_soup('http://www.cdaction.pl/magazyn/') self.cover_url='http://www.cdaction.pl'+ soup.find(id='wspolnik').div.a['href'] - return getattr(self, 'cover_url', self.cover_url) \ No newline at end of file + return getattr(self, 'cover_url', self.cover_url) + + def preprocess_html(self, soup): + for a in soup('a'): + if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']: + a['href']=self.index + a['href'] + return soup \ No newline at end of file diff --git a/recipes/countryfile.recipe b/recipes/countryfile.recipe index 7a41b5b905..0502129791 100644 --- a/recipes/countryfile.recipe +++ b/recipes/countryfile.recipe @@ -1,11 +1,12 @@ +from calibre import browser from calibre.web.feeds.news import BasicNewsRecipe class AdvancedUserRecipe1325006965(BasicNewsRecipe): title = u'Countryfile.com' - cover_url = 'http://www.buysubscriptions.com/static_content/the-immediate/en/images/covers/CFIL_maxi.jpg' + #cover_url = 'http://www.countryfile.com/sites/default/files/imagecache/160px_wide/cover/2_1.jpg' __author__ = 'Dave Asbury' description = 'The official website of Countryfile Magazine' - # last updated 29/1/12 + # last updated 15/4/12 language = 'en_GB' oldest_article = 30 max_articles_per_feed = 25 @@ -13,7 +14,23 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe): no_stylesheets = True auto_cleanup = True #articles_are_obfuscated = True + def get_cover_url(self): + soup = self.index_to_soup('http://www.countryfile.com/') + cov = soup.find(attrs={'class' : 'imagecache imagecache-160px_wide imagecache-linked imagecache-160px_wide_linked'}) + #print '******** ',cov,' ***' + cov2 = str(cov) + cov2=cov2[124:-90] + #print '******** ',cov2,' ***' + # try to get cover - if can't get known cover + br = browser() + br.set_handle_redirect(False) + try: + br.open_novisit(cov2) + cover_url = cov2 + except: + cover_url = 'http://www.countryfile.com/sites/default/files/imagecache/160px_wide/cover/2_1.jpg' + return cover_url remove_tags = [ # dict(attrs={'class' : ['player']}), diff --git a/recipes/dobreprogamy.recipe b/recipes/dobreprogamy.recipe index a27a9b0877..0614cf98ee 100644 --- a/recipes/dobreprogamy.recipe +++ b/recipes/dobreprogamy.recipe @@ -11,6 +11,7 @@ class Dobreprogramy_pl(BasicNewsRecipe): cover_url = 'http://userlogos.org/files/logos/Karmody/dobreprogramy_01.png' description = u'Aktualności i blogi z dobreprogramy.pl' encoding = 'utf-8' + index='http://www.dobreprogramy.pl/' no_stylesheets = True language = 'pl' extra_css = '.title {font-size:22px;}' @@ -22,3 +23,10 @@ class Dobreprogramy_pl(BasicNewsRecipe): #remove_tags = [dict(name='div', attrs={'class':['komentarze', 'block', 'portalInfo', 'menuBar', 'topBar']})] feeds = [(u'Aktualności', 'http://feeds.feedburner.com/dobreprogramy/Aktualnosci'), ('Blogi', 'http://feeds.feedburner.com/dobreprogramy/BlogCzytelnikow')] + + + def preprocess_html(self, soup): + for a in soup('a'): + if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']: + a['href']=self.index + a['href'] + return soup \ No newline at end of file diff --git a/recipes/dzieje_pl.recipe b/recipes/dzieje_pl.recipe index d80161e71a..4c583e4815 100644 --- a/recipes/dzieje_pl.recipe +++ b/recipes/dzieje_pl.recipe @@ -7,6 +7,7 @@ class Dzieje(BasicNewsRecipe): cover_url = 'http://www.dzieje.pl/sites/default/files/dzieje_logo.png' category = 'history' language = 'pl' + index='http://dzieje.pl' oldest_article = 8 max_articles_per_feed = 100 remove_javascript=True @@ -15,3 +16,10 @@ class Dzieje(BasicNewsRecipe): remove_tags_after= dict(id='dogory') remove_tags=[dict(id='dogory')] feeds = [(u'Dzieje', u'http://dzieje.pl/rss.xml')] + + + def preprocess_html(self, soup): + for a in soup('a'): + if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']: + a['href']=self.index + a['href'] + return soup \ No newline at end of file diff --git a/recipes/eioba.recipe b/recipes/eioba.recipe index 14256c5811..1df79d64bd 100644 --- a/recipes/eioba.recipe +++ b/recipes/eioba.recipe @@ -21,3 +21,8 @@ class eioba(BasicNewsRecipe): (u'Rozrywka', u'http://www.eioba.pl/feed/categories/10.xml'), (u'Rożne', u'http://www.eioba.pl/feed/categories/9.xml') ] + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup diff --git a/recipes/emuzica_pl.recipe b/recipes/emuzica_pl.recipe index 75271c510a..2fbf9ff514 100644 --- a/recipes/emuzica_pl.recipe +++ b/recipes/emuzica_pl.recipe @@ -7,6 +7,7 @@ class eMuzyka(BasicNewsRecipe): description = u'Emuzyka to największa i najpopularniejsza strona o muzyce w Polsce' category = 'music' language = 'pl' + index='http://www.emuzyka.pl' cover_url='http://s.emuzyka.pl/img/emuzyka_invert_small.jpg' no_stylesheets = True oldest_article = 7 @@ -14,3 +15,9 @@ class eMuzyka(BasicNewsRecipe): keep_only_tags=[dict(name='div', attrs={'id':'news_container'}), dict(name='h3'), dict(name='div', attrs={'class':'review_text'})] remove_tags=[dict(name='span', attrs={'id':'date'})] feeds = [(u'Aktualno\u015bci', u'http://www.emuzyka.pl/rss.php?f=1'), (u'Recenzje', u'http://www.emuzyka.pl/rss.php?f=2')] + + def preprocess_html(self, soup): + for a in soup('a'): + if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']: + a['href']=self.index + a['href'] + return soup \ No newline at end of file diff --git a/recipes/fhm_uk.recipe b/recipes/fhm_uk.recipe index 0e2d5c1ebe..07f2b4b64e 100644 --- a/recipes/fhm_uk.recipe +++ b/recipes/fhm_uk.recipe @@ -7,7 +7,7 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe): # cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/373529_38324934806_64930243_n.jpg' masthead_url = 'http://www.fhm.com/App_Resources/Images/Site/re-design/logo.gif' __author__ = 'Dave Asbury' - # last updated 17/3/12 + # last updated 14/4/12 language = 'en_GB' oldest_article = 28 max_articles_per_feed = 12 @@ -28,7 +28,8 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe): #] feeds = [ - (u'From the Homepage',u'http://feed43.com/8053226782885416.xml'), + (u'From the Homepage',u'http://feed43.com/0032328550253453.xml'), + #http://feed43.com/8053226782885416.xml'), (u'Funny - The Very Best Of The Internet',u'http://feed43.com/4538510106331565.xml'), (u'Upgrade',u'http://feed43.com/0877305847443234.xml'), #(u'The Final Countdown', u'http://feed43.com/3576106158530118.xml'), diff --git a/recipes/film_web.recipe b/recipes/film_web.recipe index 877d4472bc..2a6e00d501 100644 --- a/recipes/film_web.recipe +++ b/recipes/film_web.recipe @@ -7,6 +7,7 @@ class Filmweb_pl(BasicNewsRecipe): cover_url = 'http://userlogos.org/files/logos/crudus/filmweb.png' category = 'movies' language = 'pl' + index='http://www.filmweb.pl' oldest_article = 8 max_articles_per_feed = 100 no_stylesheets= True @@ -39,3 +40,9 @@ class Filmweb_pl(BasicNewsRecipe): self.log.warn(skip_tag) return self.index_to_soup(skip_tag['href'], raw=True) + + def preprocess_html(self, soup): + for a in soup('a'): + if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']: + a['href']=self.index + a['href'] + return soup \ No newline at end of file diff --git a/recipes/fotoblogia_pl.recipe b/recipes/fotoblogia_pl.recipe new file mode 100644 index 0000000000..99df46419a --- /dev/null +++ b/recipes/fotoblogia_pl.recipe @@ -0,0 +1,16 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class Fotoblogia_pl(BasicNewsRecipe): + title = u'Fotoblogia.pl' + __author__ = 'fenuks' + category = 'photography' + language = 'pl' + masthead_url = 'http://img.interia.pl/komputery/nimg/u/0/fotoblogia21.jpg' + cover_url= 'http://fotoblogia.pl/images/2009/03/fotoblogia2.jpg' + oldest_article = 7 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + keep_only_tags=[dict(name='div', attrs={'class':'post-view post-standard'})] + remove_tags=[dict(attrs={'class':['external fotoblogia', 'categories', 'tags']})] + feeds = [(u'Wszystko', u'http://fotoblogia.pl/feed/rss2')] diff --git a/recipes/gameplay_pl.recipe b/recipes/gameplay_pl.recipe index f3384263d6..7b0ccb4f55 100644 --- a/recipes/gameplay_pl.recipe +++ b/recipes/gameplay_pl.recipe @@ -6,16 +6,24 @@ class Gameplay_pl(BasicNewsRecipe): description = u'gameplay.pl - serwis o naszych zainteresowaniach, grach, filmach, książkach, muzyce, fotografii i konsolach.' category = 'games, movies, books, music' language = 'pl' + index='http://gameplay.pl' masthead_url= 'http://gameplay.pl/img/gpy_top_logo.png' cover_url= 'http://gameplay.pl/img/gpy_top_logo.png' max_articles_per_feed = 100 + remove_javascript= True no_stylesheets= True keep_only_tags=[dict(name='div', attrs={'class':['news_endpage_tit', 'news']})] - remove_tags=[dict(name='div', attrs={'class':['galeria', 'noedit center im']})] + remove_tags=[dict(name='div', attrs={'class':['galeria', 'noedit center im', 'news_list', 'news_list_autor', 'stop_bot', 'tagi']}), dict(attrs={'usemap':'#map'})] feeds = [(u'Wiadomo\u015bci', u'http://gameplay.pl/rss/')] def image_url_processor(self, baseurl, url): if 'http' not in url: return 'http://gameplay.pl'+ url[2:] else: - return url + return url + + def preprocess_html(self, soup): + for a in soup('a'): + if a.has_key('href') and '../' in a['href']: + a['href']=self.index + a['href'][2:] + return soup \ No newline at end of file diff --git a/recipes/gildia_pl.recipe b/recipes/gildia_pl.recipe index 042902b5fc..36d3ef4da2 100644 --- a/recipes/gildia_pl.recipe +++ b/recipes/gildia_pl.recipe @@ -9,6 +9,7 @@ class Gildia(BasicNewsRecipe): language = 'pl' oldest_article = 8 max_articles_per_feed = 100 + remove_empty_feeds=True no_stylesheets=True remove_tags=[dict(name='div', attrs={'class':'backlink'}), dict(name='div', attrs={'class':'im_img'}), dict(name='div', attrs={'class':'addthis_toolbox addthis_default_style'})] keep_only_tags=dict(name='div', attrs={'class':'widetext'}) @@ -24,3 +25,16 @@ class Gildia(BasicNewsRecipe): self.log.warn('odnosnik') self.log.warn(link['href']) return self.index_to_soup(link['href'], raw=True) + + def preprocess_html(self, soup): + for a in soup('a'): + if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']: + if '/gry/' in a['href']: + a['href']='http://www.gry.gildia.pl' + a['href'] + elif u'książk' in soup.title.string.lower() or u'komiks' in soup.title.string.lower(): + a['href']='http://www.literatura.gildia.pl' + a['href'] + elif u'komiks' in soup.title.string.lower(): + a['href']='http://www.literatura.gildia.pl' + a['href'] + else: + a['href']='http://www.gildia.pl' + a['href'] + return soup diff --git a/recipes/gram_pl.recipe b/recipes/gram_pl.recipe index 07927796c0..1f8147ba3d 100644 --- a/recipes/gram_pl.recipe +++ b/recipes/gram_pl.recipe @@ -7,6 +7,7 @@ class Gram_pl(BasicNewsRecipe): category = 'games' language = 'pl' oldest_article = 8 + index='http://www.gram.pl' max_articles_per_feed = 100 no_stylesheets= True extra_css = 'h2 {font-style: italic; font-size:20px;} .picbox div {float: left;}' @@ -52,4 +53,7 @@ class Gram_pl(BasicNewsRecipe): tag=soup.findAll(name='div', attrs={'class':'picbox'}) for t in tag: t['style']='float: left;' + for a in soup('a'): + if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']: + a['href']=self.index + a['href'] return soup \ No newline at end of file diff --git a/recipes/heise.recipe b/recipes/heise.recipe index 56d5516656..ba93ea96ce 100644 --- a/recipes/heise.recipe +++ b/recipes/heise.recipe @@ -59,6 +59,7 @@ class heiseDe(BasicNewsRecipe): dict(name='span', attrs={'class':'rsaquo'}), dict(name='div', attrs={'class':'news_logo'}), dict(name='div', attrs={'class':'bcadv ISI_IGNORE'}), + dict(name='div', attrs={'class':'navi_top_container'}), dict(name='p', attrs={'class':'news_option'}), dict(name='p', attrs={'class':'news_navi'}), dict(name='div', attrs={'class':'news_foren'})] @@ -69,3 +70,5 @@ class heiseDe(BasicNewsRecipe): + + diff --git a/recipes/historia_news.recipe b/recipes/historia_news.recipe new file mode 100644 index 0000000000..4eca8ade91 --- /dev/null +++ b/recipes/historia_news.recipe @@ -0,0 +1,20 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class historia_news(BasicNewsRecipe): + title = u'historia-news' + __author__ = 'fenuks' + description = u'Historia-news to portal dla ludzi kochających historię. Najnowsze newsy z historii bliższej i dalszej, archeologii, paleontologii oraz ciekawostki i podcasty z historii kultury, sportu, motoryzacji i inne.' + masthead_url = 'http://historia-news.pl/templates/hajak4/images/header.jpg' + cover_url= 'http://www.historia-news.pl/templates/hajak4/images/header.jpg' + category = 'history' + language = 'pl' + oldest_article = 7 + max_articles_per_feed = 100 + no_stylesheets = True + remove_empty_feeds = True + remove_tags=[dict(name='form'), dict(name='img', attrs={'alt':'Print'}), dict(attrs={'class':['commbutt', 'cpr']}), dict(id=['plusone', 'facebook'])] + feeds = [(u'Wiadomo\u015bci', u'http://historia-news.pl/wiadomoci.feed?type=rss'), (u'Artyku\u0142y', u'http://historia-news.pl/artykuy.feed?type=rss')] + + + def print_version(self, url): + return url + '?tmpl=component&print=1&layout=default&page=' diff --git a/recipes/icons/fotoblogia_pl.png b/recipes/icons/fotoblogia_pl.png new file mode 100644 index 0000000000..0204a04e62 Binary files /dev/null and b/recipes/icons/fotoblogia_pl.png differ diff --git a/recipes/icons/historia_news.png b/recipes/icons/historia_news.png new file mode 100644 index 0000000000..79b1b52859 Binary files /dev/null and b/recipes/icons/historia_news.png differ diff --git a/recipes/icons/swiat_obrazu.png b/recipes/icons/swiat_obrazu.png new file mode 100644 index 0000000000..a61662a864 Binary files /dev/null and b/recipes/icons/swiat_obrazu.png differ diff --git a/recipes/in4_pl.recipe b/recipes/in4_pl.recipe index 16ad622b46..e385522714 100644 --- a/recipes/in4_pl.recipe +++ b/recipes/in4_pl.recipe @@ -8,6 +8,7 @@ class in4(BasicNewsRecipe): description = u'Serwis Informacyjny - Aktualnosci, recenzje' category = 'IT' language = 'pl' + index='http://www.in4.pl/' #cover_url= 'http://www.in4.pl/recenzje/337/in4pl.jpg' no_stylesheets = True remove_empty_feeds = True @@ -39,6 +40,7 @@ class in4(BasicNewsRecipe): def preprocess_html(self, soup): self.append_page(soup, soup.body) + for a in soup('a'): + if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']: + a['href']=self.index + a['href'] return soup - - diff --git a/recipes/infra_pl.recipe b/recipes/infra_pl.recipe index 0e035e0980..e021fa0c17 100644 --- a/recipes/infra_pl.recipe +++ b/recipes/infra_pl.recipe @@ -8,6 +8,7 @@ class INFRA(BasicNewsRecipe): description = u'Serwis Informacyjny INFRA - UFO, Zjawiska Paranormalne, Duchy, Tajemnice świata.' cover_url = 'http://npn.nazwa.pl/templates/ja_teline_ii/images/logo.jpg' category = 'UFO' + index='http://infra.org.pl' language = 'pl' max_articles_per_feed = 100 no_stylesheers=True @@ -15,3 +16,11 @@ class INFRA(BasicNewsRecipe): remove_tags_after=dict(attrs={'class':'pagenav'}) remove_tags=[dict(attrs={'class':'pagenav'})] feeds = [(u'Najnowsze wiadomo\u015bci', u'http://www.infra.org.pl/index.php?option=com_rd_rss&id=1')] + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + for a in soup('a'): + if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']: + a['href']=self.index + a['href'] + return soup \ No newline at end of file diff --git a/recipes/jakarta_globe.recipe b/recipes/jakarta_globe.recipe new file mode 100644 index 0000000000..1414ac6e5b --- /dev/null +++ b/recipes/jakarta_globe.recipe @@ -0,0 +1,34 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class JakartaGlobe(BasicNewsRecipe): + title = u'Jakarta Globe' + oldest_article = 3 + max_articles_per_feed = 100 + + feeds = [ + (u'News', u'http://www.thejakartaglobe.com/pages/getrss/getrss-news.php'), + (u'Business', u'http://www.thejakartaglobe.com/pages/getrss/getrss-business.php'), + (u'Technology', u'http://www.thejakartaglobe.com/pages/getrss/getrss-tech.php'), + (u'My Jakarta', u'http://www.thejakartaglobe.com/pages/getrss/getrss-myjakarta.php'), + (u'International', u'http://www.thejakartaglobe.com/pages/getrss/getrss-international.php'), + (u'Life and Times', u'http://www.thejakartaglobe.com/pages/getrss/getrss-lifeandtimes.php'), + ] + __author__ = 'rty' + pubisher = 'JakartaGlobe.com' + description = 'JakartaGlobe, Indonesia, Newspaper' + category = 'News, Indonesia' + + + remove_javascript = True + use_embedded_content = False + no_stylesheets = True + language = 'en_ID' + encoding = 'utf-8' + conversion_options = {'linearize_tables':True} + masthead_url = 'http://www.thejakartaglobe.com/pages/2010/images/jak-globe-logo.jpg' + keep_only_tags = [ + dict(name='div', attrs={'class':'story'}), + dict(name='span', attrs={'class':'headline'}), + dict(name='div', attrs={'class':'story'}), + dict(name='p', attrs={'id':'bodytext'}) + ] diff --git a/recipes/konflikty_zbrojne.recipe b/recipes/konflikty_zbrojne.recipe index 7921e98f48..e8b28b49bf 100644 --- a/recipes/konflikty_zbrojne.recipe +++ b/recipes/konflikty_zbrojne.recipe @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import BeautifulSoup class Konflikty(BasicNewsRecipe): title = u'Konflikty Zbrojne' @@ -10,6 +11,23 @@ class Konflikty(BasicNewsRecipe): category='military, history' oldest_article = 7 max_articles_per_feed = 100 - auto_cleanup = True + no_stylesheets = True + keep_only_tags=[dict(attrs={'class':['title1', 'image']}), dict(id='body')] - feeds = [(u'Aktualności', u'http://www.konflikty.pl/rss_aktualnosci_10.xml'), (u'Artyku\u0142y', u'http://www.konflikty.pl/rss_artykuly_10.xml'), (u'Relacje', u'http://www.konflikty.pl/rss_relacje_10.xml'), (u'Recenzje', u'http://www.konflikty.pl/rss_recenzje_10.xml')] + feeds = [(u'Aktualności', u'http://www.konflikty.pl/rss_aktualnosci_10.xml'), + (u'Artyku\u0142y', u'http://www.konflikty.pl/rss_artykuly_10.xml'), + (u'Historia', u'http://www.konflikty.pl/rss_historia_10.xml'), + (u'Militaria', u'http://www.konflikty.pl/rss_militaria_10.xml'), + (u'Relacje', u'http://www.konflikty.pl/rss_relacje_10.xml'), + (u'Recenzje', u'http://www.konflikty.pl/rss_recenzje_10.xml'), + (u'Teksty źródłowe', u'http://www.konflikty.pl/rss_tekstyzrodlowe_10.xml')] + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + for image in soup.findAll(name='a', attrs={'class':'image'}): + if image.img and image.img.has_key('alt'): + image.name='div' + pos = len(image.contents) + image.insert(pos, BeautifulSoup('

'+image.img['alt']+'

')) + return soup diff --git a/recipes/liberatorio_politico.recipe b/recipes/liberatorio_politico.recipe new file mode 100644 index 0000000000..bbffcd89b1 --- /dev/null +++ b/recipes/liberatorio_politico.recipe @@ -0,0 +1,12 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe1334649829(BasicNewsRecipe): + title = u'Liberatorio Politico' + oldest_article = 7 + max_articles_per_feed = 100 + auto_cleanup = True + masthead_url = 'http://liberatorio.altervista.org/wp-content/uploads/2012/01/Testata-LIBERATORIO-Altervista1.jpg' + feeds = [(u'Liberatorio Politico', u'http://liberatorio.altervista.org/feed/')] + __author__ = 'faber1971' + description = 'Inquiry journalism - a blog on Molfetta, Land of Bari, Apulia and Italy - v1.00 (07, April 2012)' + language = 'it' diff --git a/recipes/limes.recipe b/recipes/limes.recipe new file mode 100644 index 0000000000..2290b7099e --- /dev/null +++ b/recipes/limes.recipe @@ -0,0 +1,50 @@ +#!/usr/bin/env python +__license__ = 'GPL v3' +__copyright__ = '2012, faber1971' +__version__ = 'v1.00' +__date__ = '16, April 2012' +__description__ = 'Geopolitical Italian magazine' + + +from calibre.web.feeds.news import BasicNewsRecipe + +class Limes(BasicNewsRecipe): + description = 'Italian weekly magazine' + __author__ = 'faber1971' + + cover_url = 'http://temi.repubblica.it/UserFiles/limes/Image/Loghi/logo-limes.gif' + title = 'Limes' + category = 'Geopolitical news' + + language = 'it' +# encoding = 'cp1252' + timefmt = '[%a, %d %b, %Y]' + + oldest_article = 16 + max_articles_per_feed = 100 + use_embedded_content = False + recursion = 10 + + remove_javascript = True + no_stylesheets = True + masthead_url = 'http://temi.repubblica.it/UserFiles/limes/Image/Loghi/logo-limes.gif' + + feeds = [ + (u'Limes', u'http://temi.repubblica.it/limes/feed/') + ] + + + + keep_only_tags = [ + dict(name='div', attrs={'class':['testo','copertina','occhiello','firma','didascalia','content-second-right','detail-articles','titolo-local','generic-articles']}), + dict(name='div', attrs={'class':['generic-articles','summary','detail-articles']}), + dict(name='div', attrs={'id':['content-second-right','content2']}) + ] + + remove_tags = [ + dict(name='div',attrs={'class':['servizi','aggiungi','label-web','bottom-mobile','box-abbonamenti','box-cerca','big','little','stampaweb']}), + dict(name='div',attrs={'id':['topheader','header','navigation-new','navigation','content-second-left','menutext']}), + dict(name='ul',attrs={'id':'user-utility'}), + dict(name=['script','noscript','iframe']) + ] + diff --git a/recipes/metro_news_nl.recipe b/recipes/metro_news_nl.recipe index ac3e23869b..d95f9bdfd7 100644 --- a/recipes/metro_news_nl.recipe +++ b/recipes/metro_news_nl.recipe @@ -3,25 +3,6 @@ from calibre.web.feeds.news import BasicNewsRecipe import re from calibre.utils.magick import Image from BeautifulSoup import BeautifulSoup -try: - from calibre_plugins.drMerry.debug import debuglogger as mlog - print 'drMerry debuglogger found, debug options can be used' - from calibre_plugins.drMerry.stats import statslogger as mstat - print 'drMerry stats tracker found, stat can be tracked' - mlog.setLoglevel(1) #-1 == no log; 0 for normal output - mstat.calculateStats(False) #track stats (to track stats loglevel must be > 0 - KEEPSTATS = mstat.keepmystats() - SHOWDEBUG0 = mlog.showdebuglevel(0) - SHOWDEBUG1 = mlog.showdebuglevel(1) - SHOWDEBUG2 = mlog.showdebuglevel(2) -except: - #print 'drMerry debuglogger not found, skipping debug options' - SHOWDEBUG0 = False - SHOWDEBUG1 = False - SHOWDEBUG2 = False - KEEPSTATS = False - -#print ('level0: %s\nlevel1: %s\nlevel2: %s' % (SHOWDEBUG0,SHOWDEBUG1,SHOWDEBUG2)) ''' Version 1.2, updated cover image to match the changed website. added info date on title @@ -43,6 +24,9 @@ except: extended timeout from 2 to 10 changed oldest article from 10 to 1.2 changed max articles from 15 to 25 + Version 1.9.1 18-04-2012 + removed some debug settings + updated code to match new metro-layout ''' class AdvancedUserRecipe1306097511(BasicNewsRecipe): @@ -70,34 +54,40 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe): 'author_sort' : 'Metro Nederland & calibre & DrMerry', 'publisher' : 'DrMerry/Metro Nederland' } - extra_css = 'body {padding:5px 0px; background:#fff;font-size: 13px;}\ - #date, div.share-and-byline div.byline div.text div.title, div.share-and-byline div.byline div.text div.name {clear: both;margin-bottom: 10px;font-size:0.5em; color: #616262;}\ - .article-box-fact.module-title {clear:both;padding: 8px 0;color: #24763b;font-family: arial, sans-serif;font-size: 14px;font-weight: bold;}\ - h1.title {color: #000000;font-size: 44px;padding-bottom: 10px;font-weight: 300;} h2.subtitle {font-size: 13px;font-weight: 700;padding-bottom: 10px;}\ - .article-body p{padding-bottom:10px;}div.column-1-3{margin-left: 19px;padding-right: 9px;}\ - div.column-1-2 {display: inline;padding-right: 7px;}\ - p.article-image-caption {font-size: 12px;font-weight: 300;color: #616262;margin-top: 5px;} \ - p.article-image-caption .credits {font-style: italic;font-size: 10px;}\ - div.article-image-caption {width: 246px;margin-bottom: 5px;margin-left: 10px;}\ - div.article-image-caption-2column {margin-bottom: 10px;width: 373px;} div.article-image-caption-3column {}\ - img {border:0px; padding:2px;} hr.merryhr {width:30%; border-width:0px; color:green; margin-left:5px; background-color: green} div.column-3 {background-color:#eee; width:50%; margin:2px; float:right; padding:2px;} div.column-3 module-title {border: 1px solid #aaa} div.article-box-fact div.subtitle {font-weight:bold; color:green;}' + extra_css = 'body {padding:5px 0; background-color:#fff;font-size: 1em}\ + #date, div.share-and-byline div.byline div.text div.title, div.share-and-byline div.byline div.text div.name {margin-bottom: 10px}\ + #date, div.share-and-byline div.byline div.text div.title, div.share-and-byline div.byline div.text div.name, p.article-image-caption .credits {font-size:0.5em}\ + .article-box-fact.module-title, #date, div.share-and-byline div.byline div.text div.title, div.share-and-byline div.byline div.text div.name {clear:both}\ + .article-box-fact.module-title {padding: 8px 0}\ + h1.title {color: #000;font-size: 1.4em}\ + .article-box-fact.module-title, h2.subtitle {font-size: 1.2em}\ + h1.title, h2.subtitle, .article-body p{padding-bottom:10px}\ + h1.title, p.article-image-caption {font-weight: 300}\ + div.column-1-3{margin-left: 19px;padding-right: 9px}\ + div.column-1-2 {display: inline;padding-right: 7px}\ + p.article-image-caption {font-size: 0.6em;margin-top: 5px}\ + p.article-image-caption, #date, div.share-and-byline div.byline div.text div.title, div.share-and-byline div.byline div.text div.name {color: #616262}\ + p.article-image-caption .credits {font-style: italic}\ + div.article-image-caption {width: 246px;margin: 5px}\ + div.article-image-caption-2column {width: 373px}\ + div.article-image-caption-2column, div.article-image-caption-3column {margin-bottom: 5px}\ + img {border:0}\ + img, div.column-3 {padding:2px}\ + hr.merryhr {width:30%; border-width:0; margin-left:5px; background-color: #24763b}\ + div.column-3 {background-color:#eee; width:50%; margin:2px; float:right}\ + div.column-3 module-title {border: 1px solid #aaa}\ + div.article-box-fact div.subtitle, .article-box-fact.module-title, h2.subtitle {font-weight:bold}\ + div.article-box-fact div.subtitle, hr.merryhr, .article-box-fact.module-title {color: #24763b}' + preprocess_regexps = [ (re.compile(r']+top-line[^>]+>', re.DOTALL|re.IGNORECASE), lambda match: '
'), - (re.compile(r'(]+metronieuws\.nl/[^>]+/templates/[^>]+jpe?g[^>]+>|metronieuws\.nl/internal\-roxen\-unit\.gif)', re.DOTALL|re.IGNORECASE), + (re.compile(r']+(metronieuws\.nl/[^>]+/templates/[^>]+jpe?g|metronieuws\.nl/internal\-roxen\-unit\.gif)[^>]+>', re.DOTALL|re.IGNORECASE), lambda match: ''), ] def preprocess_html(self, soup): - if SHOWDEBUG0 == True: - mlog.setdefaults() - mlog.addTextAndTag(['Show debug = on with level'], [str(mlog.debuglevel)]) - if KEEPSTATS == True: - mlog.addDebug('Stats will be calculated') - else: - mlog.addTextAndTag(['Stats won\'t be calculated\nTo be enabled, stats must be true, currently','and debug level must be 1 or higher, currently'],[mstat.dokeepmystats, mlog.debuglevel]) - mlog.showDebug() myProcess = MerryProcess() myProcess.removeUnwantedTags(soup) return soup @@ -105,18 +95,6 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe): def postprocess_html(self, soup, first): myProcess = MerryProcess() myProcess.optimizeLayout(soup) - if SHOWDEBUG0 == True: - if KEEPSTATS == True: - statinfo = 'generated stats:' - statinfo += str(mstat.stats(mstat.statslist)) - print statinfo - statinfo = 'generated stats (for removed tags):' - statinfo += str(mstat.stats(mstat.removedtagslist)) - print statinfo - #show all Debug info we forgot to report - #Using print to be sure that this text will not be added at the end of the log. - print '\n!!!!!unreported messages:\n(should be empty)\n' - mlog.showDebug() return soup feeds = [ @@ -142,44 +120,24 @@ class MerryPreProcess(): return soup def optimizePicture(self,soup): - if SHOWDEBUG0 == True: - mlog.addDebug('start image optimize') for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')): iurl = tag['src'] img = Image() img.open(iurl) img.trim(0) img.save(iurl) - if SHOWDEBUG0 == True: - mlog.addDebug('Images optimized') - mlog.showDebug() return soup class MerryExtract(): def safeRemovePart(self, killingSoup, soupIsArray): if killingSoup and not killingSoup == None: - if SHOWDEBUG2 == True: - mlog.addTextAndTag(['items to remove'],[killingSoup]) try: if soupIsArray == True: for killer in killingSoup: killer.extract() else: killingSoup.extract() - if SHOWDEBUG1 == True: - mlog.addDebug('tag extracted') - mlog.showDebug() - if KEEPSTATS == True: - try: - mstat.addstat(mstat.removedtagslist,str(killingSoup.name)) - except: - mstat.addstat(mstat.removedtagslist,'unknown') except: - if SHOWDEBUG1 == True: - mlog.addDebug('tag extraction failed') - mlog.showDebug() - if KEEPSTATS == True: - mstat.addstat(mstat.removedtagslist,'exception') return False else: return False @@ -230,60 +188,26 @@ class MerryProcess(BeautifulSoup): def optimizeLayout(self,soup): self.myPrepare.optimizePicture(soup) - if SHOWDEBUG0 == True: - mlog.addDebug('End of Optimize Layout') - mlog.showDebug() return soup def insertFacts(self, soup): allfacts = soup.findAll('div', {'class':re.compile('^article-box-fact.*$')}) - if SHOWDEBUG0 == True: - mlog.addTextAndTag(['allfacts'],[allfacts]) - mlog.showDebug() if allfacts and not allfacts == None: allfactsparent = soup.find('div', {'class':re.compile('^article-box-fact.*$')}).parent - if SHOWDEBUG0 == True: - mlog.addTextAndTag(['allfactsparent'],[allfactsparent]) - mlog.showDebug() for part in allfactsparent: if not part in allfacts: - if SHOWDEBUG0 == True: - mlog.addTextAndTag(['FOUND A non-fact'],[part]) - mlog.showDebug() self.myKiller.safeRemovePart(part, True) - if SHOWDEBUG1 == True: - mlog.addTextAndTag(['New All Facts'],[allfacts]) - mlog.showDebug() articlefacts = soup.find('div', {'class':'article-box-fact column'}) - errorOccured=False if (articlefacts and not articlefacts==None): try: contenttag = soup.find('div', {'class':'article-body'}) - if SHOWDEBUG0 == True: - mlog.addTextAndTag(['curcontag'],[contenttag]) - mlog.showDebug() foundrighttag = False if contenttag and not contenttag == None: foundrighttag = True - if SHOWDEBUG0 == True: - if errorOccured == False: - mlog.addTextAndTag(['type','curcontag (in while)'],[type(contenttag),contenttag]) - else: - mlog.addDebug('Could not find right parent tag. Error Occured') - mlog.showDebug() if foundrighttag == True: contenttag.insert(0, allfactsparent) - if SHOWDEBUG2 == True: - mlog.addTextAndTag(['added parent'],[soup.prettify()]) - mlog.showDebug() except: - errorOccured=True - mlog.addTrace() - else: - errorOccured=True - if SHOWDEBUG0 == True and errorOccured == True: - mlog.addTextAndTag(['no articlefacts'],[articlefacts]) - mlog.showDebug() + pass return soup def previousNextSibRemover(self, soup, previous=True, soupIsArray=False): @@ -300,71 +224,38 @@ class MerryProcess(BeautifulSoup): sibs = findsibsof.nextSiblingGenerator() for sib in sibs: self.myKiller.safeRemovePart(sib, True) - else: - if SHOWDEBUG1 == True: - mlog.addDebug('Not any sib found') return def removeUnwantedTags(self,soup): - if SHOWDEBUG1 == True: - mlog.addTextAndTag(['Len of Soup before RemoveTagsByName'],[len(str(soup))]) - mlog.showDebug() self.removeTagsByName(soup) - if SHOWDEBUG1 == True: - mlog.addDebug('Len of Soup before firstandlastpart: %s' % len(str(soup))) - mlog.showDebug() self.insertFacts(soup) self.removeFirstAndLastPart(soup) - if SHOWDEBUG1 == True: - mlog.addDebug('Len of Soup before unwantedpart: %s' % len(str(soup))) - mlog.showDebug() self.removeUnwantedParts(soup) - if SHOWDEBUG1 == True: - mlog.addDebug('Len of Soup before EmptyParts: %s' % len(str(soup))) - mlog.showDebug() self.removeEmptyTags(soup) - if SHOWDEBUG1 == True: - mlog.addDebug('Len of Soup after EmptyParts: %s' % len(str(soup))) - mlog.showDebug() self.myReplacer.replaceATag(soup) return soup def removeUnwantedParts(self, soup): - if SHOWDEBUG1 == True: - mlog.addDebug('Len of Soup before UnwantedID: %s' % len(str(soup))) - mlog.showDebug() self.removeUnwantedTagsByID(soup) - if SHOWDEBUG1 == True: - mlog.addDebug('Len of Soup before Class: %s' % len(str(soup))) - mlog.showDebug() self.removeUnwantedTagsByClass(soup) - if SHOWDEBUG1 == True: - mlog.addDebug('Len of Soup before Style: %s' % len(str(soup))) - mlog.showDebug() self.removeUnwantedTagsByStyle(soup) return soup def removeUnwantedTagsByStyle(self,soup): - self.removeArrayOfTags(soup.findAll(attrs={'style' : re.compile("^(.*(display\s?:\s?none|img-mask|white)\s?;?.*)$")})) - if SHOWDEBUG0 == True: - mlog.addDebug('end remove by style') + self.removeArrayOfTags(soup.findAll(attrs={'style':re.compile("^(.*(display\s?:\s?none|img-mask|white)\s?;?.*)$")})) + self.removeArrayOfTags(soup.findAll(attrs={'title':'volledig scherm'})) return soup def removeArrayOfTags(self,souparray): return self.myKiller.safeRemovePart(souparray, True) def removeUnwantedTagsByClass(self,soup): - if SHOWDEBUG0 == True: - mlog.addDebug('start remove by class') - self.removeArrayOfTags(soup.findAll("div", { "class" :re.compile('^(promo.*?|article-tools-below-title|metroCommentFormWrap|ad|share-tools|tools|header-links|related-links|padding-top-15)$')})) + self.removeArrayOfTags(soup.findAll("div", { "class" :re.compile('^(promo.*?|share-tools-top|share-tools-bottom|article-tools-below-title|metroCommentFormWrap|ad|share-tools|tools|header-links|related-links|padding-top-15|footer-[a-zA-Z0-9]+)$')})) return soup def removeUnwantedTagsByID(self,soup): - defaultids = ['footer-extra',re.compile('^ad(\d+|adcomp.*?)?$'),'column-4-5','navigation','header',re.compile('^column-1-5-(top|bottom)$'),'footer','hidden_div','sidebar',re.compile('^article-\d$'),'comments','footer'] + defaultids = ['footer-extra',re.compile('^ad(\d+|adcomp.*?)?$'),'column-4-5','navigation','header',re.compile('^column-1-5-(top|bottom)$'),'footer','hidden_div','sidebar',re.compile('^article-\d$'),'comments','footer','gallery-1'] for removeid in defaultids: - if SHOWDEBUG1 == True: - mlog.addDebug('RemoveTagByID, tag: %s, Len of Soup: %s' % (str(removeid), len(str(soup)))) - mlog.showDebug() self.removeArrayOfTags(soup.findAll(id=removeid)) return soup @@ -380,33 +271,12 @@ class MerryProcess(BeautifulSoup): return soup def removeEmptyTags(self,soup,run=0): - if SHOWDEBUG0 == True: - mlog.addDebug('starting removeEmptyTags') - if SHOWDEBUG1 == True: - run += 1 - mlog.addDebug(run) - if SHOWDEBUG2 == True: - mlog.addDebug(str(soup.prettify())) - mlog.showDebug() emptymatches = re.compile('^( |\s|\n|\r|\t)*$') emptytags = soup.findAll(lambda tag: tag.find(True) is None and (tag.string is None or tag.string.strip()=="" or tag.string.strip()==emptymatches) and not tag.isSelfClosing) if emptytags and not (emptytags == None or emptytags == []): - if SHOWDEBUG1 == True: - mlog.addDebug('tags found') - mlog.addDebug(str(emptytags)) self.removeArrayOfTags(emptytags) #recursive in case removing empty tag creates new empty tag self.removeEmptyTags(soup, run=run) - else: - if SHOWDEBUG1 == True: - mlog.addDebug('no empty tags found') - mlog.showDebug() - if SHOWDEBUG0 == True: - if SHOWDEBUG2 == True: - mlog.addDebug('new soup:') - mlog.addDebug(str(soup.prettify())) - mlog.addDebug('RemoveEmptyTags Completed') - mlog.showDebug() return soup def removeFirstAndLastPart(self,soup): diff --git a/recipes/national_geographic_pl.recipe b/recipes/national_geographic_pl.recipe index a2f759e878..07fc0da666 100644 --- a/recipes/national_geographic_pl.recipe +++ b/recipes/national_geographic_pl.recipe @@ -9,8 +9,9 @@ from calibre.web.feeds.recipes import BasicNewsRecipe class recipeMagic(BasicNewsRecipe): title = 'National Geographic PL' __author__ = 'Marcin Urban 2011' + __modified_by__ = 'fenuks' description = 'legenda wśród magazynów z historią sięgającą 120 lat' - cover_url = 'http://www.guj.pl/var/guj/storage/images/media/nasze_magazyny/national_geographic/logo/ng_logo/2606-1-pol-PL/ng_logo.jpg' + #cover_url = 'http://www.guj.pl/var/guj/storage/images/media/nasze_magazyny/national_geographic/logo/ng_logo/2606-1-pol-PL/ng_logo.jpg' oldest_article = 7 max_articles_per_feed = 100 no_stylesheets = True @@ -42,11 +43,43 @@ class recipeMagic(BasicNewsRecipe): ] remove_attributes = ['width','height'] + feeds=[] - feeds = [ - ('National Geographic PL', 'http://www.national-geographic.pl/rss/'), - ] + def find_articles(self, url): + articles = [] + soup=self.index_to_soup(url) + tag=soup.find(attrs={'class':'arl'}) + art=tag.ul.findAll('li') + for i in art: + title=i.a['title'] + url=i.a['href'] + #date=soup.find(id='footer').ul.li.string[41:-1] + desc=i.div.p.string + articles.append({'title' : title, + 'url' : url, + 'date' : '', + 'description' : desc + }) + return articles + + def parse_index(self): + feeds = [] + feeds.append((u"Aktualności", self.find_articles('http://www.national-geographic.pl/aktualnosci/'))) + feeds.append((u"Artykuły", self.find_articles('http://www.national-geographic.pl/artykuly/'))) + + return feeds def print_version(self, url): - return url.replace('artykuly0Cpokaz', 'drukuj-artykul') + if 'artykuly' in url: + return url.replace('artykuly/pokaz', 'drukuj-artykul') + elif 'aktualnosci' in url: + return url.replace('aktualnosci/pokaz', 'drukuj-artykul') + else: + return url + + def get_cover_url(self): + soup = self.index_to_soup('http://www.national-geographic.pl/biezace-wydania/') + tag=soup.find(attrs={'class':'txt jus'}) + self.cover_url=tag.img['src'] + return getattr(self, 'cover_url', self.cover_url) diff --git a/recipes/nowa_fantastyka.recipe b/recipes/nowa_fantastyka.recipe index ec556da5fa..0371cb1f58 100644 --- a/recipes/nowa_fantastyka.recipe +++ b/recipes/nowa_fantastyka.recipe @@ -81,5 +81,7 @@ class Nowa_Fantastyka(BasicNewsRecipe): title=soup.find(attrs={'class':'tytul'}) if title: title['style']='font-size: 20px; font-weight: bold;' - self.log.warn(soup) + for a in soup('a'): + if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']: + a['href']=self.INDEX + a['href'] return soup diff --git a/recipes/orlando_sentinel.recipe b/recipes/orlando_sentinel.recipe index 7a59f6f6ba..b327bc2b74 100644 --- a/recipes/orlando_sentinel.recipe +++ b/recipes/orlando_sentinel.recipe @@ -1,3 +1,4 @@ +import urllib, re from calibre.web.feeds.news import BasicNewsRecipe class AdvancedUserRecipe1279258912(BasicNewsRecipe): @@ -27,12 +28,30 @@ class AdvancedUserRecipe1279258912(BasicNewsRecipe): encoding = 'utf-8' conversion_options = {'linearize_tables':True} masthead_url = 'http://www.orlandosentinel.com/media/graphic/2009-07/46844851.gif' - keep_only_tags = [ - dict(name='div', attrs={'class':'story'}) - ] - remove_tags = [ - dict(name='div', attrs={'class':['articlerail','tools','comment-group','clearfix']}), - ] - remove_tags_after = [ - dict(name='p', attrs={'class':'copyright'}), - ] + + auto_cleanup = True + + def get_article_url(self, article): + ans = None + try: + s = article.summary + ans = urllib.unquote( + re.search(r'href=".+?bookmark.cfm.+?link=(.+?)"', s).group(1)) + except: + pass + if ans is None: + link = article.get('feedburner_origlink', None) + if link and link.split('/')[-1]=="story01.htm": + link=link.split('/')[-2] + encoding = {'0B': '.', '0C': '/', '0A': '0', '0F': '=', '0G': '&', + '0D': '?', '0E': '-', '0N': '.com', '0L': 'http:', + '0S':'//'} + for k, v in encoding.iteritems(): + link = link.replace(k, v) + ans = link + elif link: + ans = link + if ans is not None: + return ans.replace('?track=rss', '') + + diff --git a/recipes/pc_arena.recipe b/recipes/pc_arena.recipe index 952db30c3e..56bb601f70 100644 --- a/recipes/pc_arena.recipe +++ b/recipes/pc_arena.recipe @@ -7,6 +7,7 @@ class PC_Arena(BasicNewsRecipe): description = u'Najnowsze informacje z branży IT - testy, recenzje, aktualności, rankingi, wywiady. Twoje źródło informacji o sprzęcie komputerowym.' category = 'IT' language = 'pl' + index='http://pcarena.pl' masthead_url='http://pcarena.pl/pcarena/img/logo.png' cover_url= 'http://pcarena.pl/pcarena/img/logo.png' no_stylesheets = True @@ -22,4 +23,10 @@ class PC_Arena(BasicNewsRecipe): if 'http' not in url: return 'http://pcarena.pl' + url else: - return url \ No newline at end of file + return url + + def preprocess_html(self, soup): + for a in soup('a'): + if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']: + a['href']=self.index + a['href'] + return soup \ No newline at end of file diff --git a/recipes/readitlater.recipe b/recipes/readitlater.recipe index 38f7ec1a9a..92c9aaf9d6 100644 --- a/recipes/readitlater.recipe +++ b/recipes/readitlater.recipe @@ -1,5 +1,5 @@ """ -readitlaterlist.com +Pocket Calibre Recipe v1.0 """ __license__ = 'GPL v3' __copyright__ = ''' @@ -12,22 +12,23 @@ from calibre import strftime from calibre.web.feeds.news import BasicNewsRecipe -class Readitlater(BasicNewsRecipe): - title = 'ReadItLater' +class Pocket(BasicNewsRecipe): + title = 'Pocket' __author__ = 'Darko Miletic, Przemyslaw Kryger, Keith Callenberg, tBunnyMan' - description = '''Personalized news feeds. Go to readitlaterlist.com to setup \ - up your news. This version displays pages of articles from \ + description = '''Personalized news feeds. Go to getpocket.com to setup up \ + your news. This version displays pages of articles from \ oldest to newest, with max & minimum counts, and marks articles \ read after downloading.''' - publisher = 'readitlaterlist.com' + publisher = 'getpocket.com' category = 'news, custom' oldest_article = 7 max_articles_per_feed = 50 - minimum_articles = 1 + minimum_articles = 10 + mark_as_read_after_dl = True no_stylesheets = True use_embedded_content = False needs_subscription = True - INDEX = u'http://readitlaterlist.com' + INDEX = u'http://getpocket.com' LOGIN = INDEX + u'/l' readList = [] @@ -100,9 +101,31 @@ class Readitlater(BasicNewsRecipe): br = self.get_browser() for link in markList: url = self.INDEX + link + print 'Marking read: ', url response = br.open(url) - response + print response.info() def cleanup(self): - self.mark_as_read(self.readList) + if self.mark_as_read_after_dl: + self.mark_as_read(self.readList) + else: + pass + def default_cover(self, cover_file): + ''' + Create a generic cover for recipes that don't have a cover + This override adds time to the cover + ''' + try: + from calibre.ebooks import calibre_cover + title = self.title if isinstance(self.title, unicode) else \ + self.title.decode('utf-8', 'replace') + date = strftime(self.timefmt) + time = strftime('[%I:%M %p]') + img_data = calibre_cover(title, date, time) + cover_file.write(img_data) + cover_file.flush() + except: + self.log.exception('Failed to generate default cover') + return False + return True diff --git a/recipes/swiat_obrazu.recipe b/recipes/swiat_obrazu.recipe new file mode 100644 index 0000000000..68740fa4dd --- /dev/null +++ b/recipes/swiat_obrazu.recipe @@ -0,0 +1,25 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class Swiat_Obrazu(BasicNewsRecipe): + title = u'Swiat Obrazu' + __author__ = 'fenuks' + description = u'Internetowy Dziennik o Fotografii i Wideo www.SwiatObrazu.pl to źródło informacji o technice fotografii i wideo, o sprzęcie najbardziej znanych i uznanych firm: Canon, Nikon, Sony, Hasselblad i wielu innych. Znajdziecie tu programy do obróbki zdjęć, forum foto i forum wideo i galerie zdjęć. Codziennie najświeższe informacje: aktualności, testy, poradniki, wywiady, felietony. Swiatobrazu.pl stale organizuje konkursy oraz warsztaty fotograficzne i wideo.' + category = 'photography' + masthead_url = 'http://www.swiatobrazu.pl/img/logo.jpg' + cover_url = 'http://www.swiatobrazu.pl/img/logo.jpg' + language = 'pl' + oldest_article = 7 + max_articles_per_feed = 100 + no_stylesheets = True + remove_javascript= True + use_embedded_content = False + feeds = [(u'Wszystko', u'http://www.swiatobrazu.pl/rss')] + + def print_version(self, url): + return url + ',drukuj' + + def image_url_processor(self, baseurl, url): + if 'http://' not in url or 'https://' not in url: + return 'http://www.swiatobrazu.pl' + url[5:] + else: + return url diff --git a/recipes/tanuki.recipe b/recipes/tanuki.recipe index 666cb8aa77..a615763307 100644 --- a/recipes/tanuki.recipe +++ b/recipes/tanuki.recipe @@ -34,4 +34,12 @@ class tanuki(BasicNewsRecipe): def preprocess_html(self, soup): self.append_page(soup, soup.body) + for a in soup('a'): + if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']: + if 'tanuki-anime' in soup.title.string.lower(): + a['href']='http://anime.tanuki.pl' + a['href'] + elif 'tanuki-manga' in soup.title.string.lower(): + a['href']='http://manga.tanuki.pl' + a['href'] + elif 'tanuki-czytelnia' in soup.title.string.lower(): + a['href']='http://czytelnia.tanuki.pl' + a['href'] return soup \ No newline at end of file diff --git a/recipes/webhosting_pl.recipe b/recipes/webhosting_pl.recipe index aeb98477f3..8ebb91c4ba 100644 --- a/recipes/webhosting_pl.recipe +++ b/recipes/webhosting_pl.recipe @@ -8,6 +8,7 @@ class webhosting_pl(BasicNewsRecipe): cover_url='http://webhosting.pl/images/logo.png' masthead_url='http://webhosting.pl/images/logo.png' oldest_article = 7 + index='http://webhosting.pl' max_articles_per_feed = 100 no_stylesheets = True remove_empty_feeds = True @@ -36,4 +37,10 @@ class webhosting_pl(BasicNewsRecipe): (u'Marketing', u'http://webhosting.pl/feed/rss/n/11535')] def print_version(self, url): - return url.replace('webhosting.pl', 'webhosting.pl/print') \ No newline at end of file + return url.replace('webhosting.pl', 'webhosting.pl/print') + + def preprocess_html(self, soup): + for a in soup('a'): + if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']: + a['href']=self.index + a['href'] + return soup \ No newline at end of file diff --git a/recipes/xkcd.recipe b/recipes/xkcd.recipe index ce63b0a99a..42dceda65b 100644 --- a/recipes/xkcd.recipe +++ b/recipes/xkcd.recipe @@ -21,7 +21,7 @@ class XkcdCom(BasicNewsRecipe): use_embedded_content = False oldest_article = 60 - keep_only_tags = [dict(id='middleContent')] + keep_only_tags = [dict(id='middleContainer')] remove_tags = [dict(name='ul'), dict(name='h3'), dict(name='br')] no_stylesheets = True # turn image bubblehelp into a paragraph diff --git a/setup/hosting.py b/setup/hosting.py index 394d32702e..33bb3bff04 100644 --- a/setup/hosting.py +++ b/setup/hosting.py @@ -26,7 +26,7 @@ def login_to_google(username, password): br.form['Email'] = username br.form['Passwd'] = password raw = br.submit().read() - if re.search(br'.*?Account Settings', raw) is None: + if re.search(br'(?i).*?Account Settings', raw) is None: x = re.search(br'(?is).*?', raw) if x is not None: print ('Title of post login page: %s'%x.group()) diff --git a/setup/iso_639/ca.po b/setup/iso_639/ca.po index 1286dcebc2..63b910ff93 100644 --- a/setup/iso_639/ca.po +++ b/setup/iso_639/ca.po @@ -12,14 +12,14 @@ msgstr "" "Report-Msgid-Bugs-To: Debian iso-codes team \n" "POT-Creation-Date: 2011-11-25 14:01+0000\n" -"PO-Revision-Date: 2011-12-14 19:48+0000\n" -"Last-Translator: Ferran Rius \n" +"PO-Revision-Date: 2012-04-12 09:56+0000\n" +"Last-Translator: Dídac Rios \n" "Language-Team: Catalan \n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" -"X-Launchpad-Export-Date: 2011-12-15 05:18+0000\n" -"X-Generator: Launchpad (build 14487)\n" +"X-Launchpad-Export-Date: 2012-04-13 05:26+0000\n" +"X-Generator: Launchpad (build 15070)\n" "Language: ca\n" #. name for aaa @@ -9584,31 +9584,31 @@ msgstr "" #. name for hoi msgid "Holikachuk" -msgstr "" +msgstr "Holikachuk" #. name for hoj msgid "Hadothi" -msgstr "" +msgstr "Hadothi" #. name for hol msgid "Holu" -msgstr "" +msgstr "Holu" #. name for hom msgid "Homa" -msgstr "" +msgstr "Homa" #. name for hoo msgid "Holoholo" -msgstr "" +msgstr "Holoholo" #. name for hop msgid "Hopi" -msgstr "" +msgstr "Hopi" #. name for hor msgid "Horo" -msgstr "" +msgstr "Horo" #. name for hos msgid "Ho Chi Minh City Sign Language" @@ -9616,15 +9616,15 @@ msgstr "Llenguatge de signes de la ciutat de Ho Chi Minh" #. name for hot msgid "Hote" -msgstr "" +msgstr "Hote" #. name for hov msgid "Hovongan" -msgstr "" +msgstr "Hovongan" #. name for how msgid "Honi" -msgstr "" +msgstr "Honi" #. name for hoy msgid "Holiya" @@ -9636,7 +9636,7 @@ msgstr "" #. name for hpo msgid "Hpon" -msgstr "" +msgstr "Hpon" #. name for hps msgid "Hawai'i Pidgin Sign Language" @@ -9644,35 +9644,35 @@ msgstr "Hawaià Pidgin; llenguatge de signes" #. name for hra msgid "Hrangkhol" -msgstr "" +msgstr "Hrangkhol" #. name for hre msgid "Hre" -msgstr "" +msgstr "Hre" #. name for hrk msgid "Haruku" -msgstr "" +msgstr "Haruku" #. name for hrm msgid "Miao; Horned" -msgstr "" +msgstr "Miao; Horned" #. name for hro msgid "Haroi" -msgstr "" +msgstr "Haroi" #. name for hrr msgid "Horuru" -msgstr "" +msgstr "Horuru" #. name for hrt msgid "Hértevin" -msgstr "" +msgstr "Hértevin" #. name for hru msgid "Hruso" -msgstr "" +msgstr "Hruso" #. name for hrv msgid "Croatian" @@ -9680,11 +9680,11 @@ msgstr "Croat" #. name for hrx msgid "Hunsrik" -msgstr "" +msgstr "Hunsrik" #. name for hrz msgid "Harzani" -msgstr "" +msgstr "Harzani" #. name for hsb msgid "Sorbian; Upper" @@ -9704,31 +9704,31 @@ msgstr "Xinès; Xiang" #. name for hss msgid "Harsusi" -msgstr "" +msgstr "Harsusi" #. name for hti msgid "Hoti" -msgstr "" +msgstr "Hoti" #. name for hto msgid "Huitoto; Minica" -msgstr "" +msgstr "Huitoto; Minica" #. name for hts msgid "Hadza" -msgstr "" +msgstr "Hadza" #. name for htu msgid "Hitu" -msgstr "" +msgstr "Hitu" #. name for htx msgid "Hittite; Middle" -msgstr "" +msgstr "Hittite; Middle" #. name for hub msgid "Huambisa" -msgstr "" +msgstr "Huambisa" #. name for huc msgid "=/Hua" @@ -9736,27 +9736,27 @@ msgstr "" #. name for hud msgid "Huaulu" -msgstr "" +msgstr "Huaulu" #. name for hue msgid "Huave; San Francisco Del Mar" -msgstr "" +msgstr "Huave; San Francisco Del Mar" #. name for huf msgid "Humene" -msgstr "" +msgstr "Humene" #. name for hug msgid "Huachipaeri" -msgstr "" +msgstr "Huachipaeri" #. name for huh msgid "Huilliche" -msgstr "" +msgstr "Huilliche" #. name for hui msgid "Huli" -msgstr "" +msgstr "Huli" #. name for huj msgid "Miao; Northern Guiyang" @@ -9764,15 +9764,15 @@ msgstr "Miao; Guiyang septentrional" #. name for huk msgid "Hulung" -msgstr "" +msgstr "Hulung" #. name for hul msgid "Hula" -msgstr "" +msgstr "Hula" #. name for hum msgid "Hungana" -msgstr "" +msgstr "Hungana" #. name for hun msgid "Hungarian" @@ -9780,43 +9780,43 @@ msgstr "Hongarès" #. name for huo msgid "Hu" -msgstr "" +msgstr "Hu" #. name for hup msgid "Hupa" -msgstr "" +msgstr "Hupa" #. name for huq msgid "Tsat" -msgstr "" +msgstr "Tsat" #. name for hur msgid "Halkomelem" -msgstr "" +msgstr "Halkomelem" #. name for hus msgid "Huastec" -msgstr "" +msgstr "Huastec" #. name for hut msgid "Humla" -msgstr "" +msgstr "Humla" #. name for huu msgid "Huitoto; Murui" -msgstr "" +msgstr "Huitoto; Murui" #. name for huv msgid "Huave; San Mateo Del Mar" -msgstr "" +msgstr "Huave; San Mateo Del Mar" #. name for huw msgid "Hukumina" -msgstr "" +msgstr "Hukumina" #. name for hux msgid "Huitoto; Nüpode" -msgstr "" +msgstr "Huitoto; Nüpode" #. name for huy msgid "Hulaulá" diff --git a/setup/iso_639/es.po b/setup/iso_639/es.po index 0bd14a5857..8e0046ddf9 100644 --- a/setup/iso_639/es.po +++ b/setup/iso_639/es.po @@ -8,14 +8,14 @@ msgstr "" "Project-Id-Version: calibre\n" "Report-Msgid-Bugs-To: FULL NAME \n" "POT-Creation-Date: 2011-11-25 14:01+0000\n" -"PO-Revision-Date: 2012-03-11 10:13+0000\n" -"Last-Translator: Jellby \n" +"PO-Revision-Date: 2012-04-18 20:56+0000\n" +"Last-Translator: David de Obregon \n" "Language-Team: Spanish \n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" -"X-Launchpad-Export-Date: 2012-03-12 04:38+0000\n" -"X-Generator: Launchpad (build 14933)\n" +"X-Launchpad-Export-Date: 2012-04-19 04:37+0000\n" +"X-Generator: Launchpad (build 15108)\n" #. name for aaa msgid "Ghotuo" @@ -4931,7 +4931,7 @@ msgstr "Como karim" #. name for cfm msgid "Chin; Falam" -msgstr "" +msgstr "Chin; Falam" #. name for cga msgid "Changriwa" @@ -5071,7 +5071,7 @@ msgstr "Chinali" #. name for cik msgid "Kinnauri; Chitkuli" -msgstr "" +msgstr "Kinnauri; Chitkuli" #. name for cim msgid "Cimbrian" @@ -5147,7 +5147,7 @@ msgstr "Chino jin" #. name for cka msgid "Chin; Khumi Awa" -msgstr "" +msgstr "Chin; Khumi Awa" #. name for ckb msgid "Kurdish; Central" @@ -5287,7 +5287,7 @@ msgstr "Mnong central" #. name for cmr msgid "Chin; Mro" -msgstr "" +msgstr "Chin; Mro" #. name for cms msgid "Messapic" @@ -5303,7 +5303,7 @@ msgstr "Changthang" #. name for cnb msgid "Chin; Chinbon" -msgstr "" +msgstr "Chin; Chinbon" #. name for cnc msgid "Côông" @@ -5315,7 +5315,7 @@ msgstr "Qiang septentrional" #. name for cnh msgid "Chin; Haka" -msgstr "" +msgstr "Chin; Haka" #. name for cni msgid "Asháninka" @@ -5323,7 +5323,7 @@ msgstr "Asháninka" #. name for cnk msgid "Chin; Khumi" -msgstr "" +msgstr "Chin; Khumi" #. name for cnl msgid "Chinantec; Lalana" @@ -5347,7 +5347,7 @@ msgstr "Chenoua" #. name for cnw msgid "Chin; Ngawn" -msgstr "" +msgstr "Chin; Ngawn" #. name for cnx msgid "Cornish; Middle" @@ -5459,7 +5459,7 @@ msgstr "Chinanteco de Palantla" #. name for cpb msgid "Ashéninka; Ucayali-Yurúa" -msgstr "" +msgstr "Ashéninka; Ucayali-Yurúa" #. name for cpc msgid "Ajyíninka Apurucayali" @@ -5483,7 +5483,7 @@ msgstr "Capiznon" #. name for cpu msgid "Ashéninka; Pichis" -msgstr "" +msgstr "Ashéninka; Pichis" #. name for cpx msgid "Chinese; Pu-Xian" @@ -5491,11 +5491,11 @@ msgstr "Chino puxian" #. name for cpy msgid "Ashéninka; South Ucayali" -msgstr "" +msgstr "Ashéninka; South Ucayali" #. name for cqd msgid "Miao; Chuanqiandian Cluster" -msgstr "" +msgstr "Miao; Chuanqiandian Cluster" #. name for cqu msgid "Quechua; Chilean" @@ -5507,7 +5507,7 @@ msgstr "Chara" #. name for crb msgid "Carib; Island" -msgstr "" +msgstr "Carib; Island" #. name for crc msgid "Lonwolwol" @@ -5539,23 +5539,23 @@ msgstr "Forro" #. name for crj msgid "Cree; Southern East" -msgstr "" +msgstr "Cree; Southern East" #. name for crk msgid "Cree; Plains" -msgstr "" +msgstr "Cree; Plains" #. name for crl msgid "Cree; Northern East" -msgstr "" +msgstr "Cree; Northern East" #. name for crm msgid "Cree; Moose" -msgstr "" +msgstr "Cree; Moose" #. name for crn msgid "Cora; El Nayar" -msgstr "" +msgstr "Cora; El Nayar" #. name for cro msgid "Crow" @@ -5563,11 +5563,11 @@ msgstr "Crow" #. name for crq msgid "Chorote; Iyo'wujwa" -msgstr "" +msgstr "Chorote; Iyo'wujwa" #. name for crr msgid "Algonquian; Carolina" -msgstr "" +msgstr "Algonquian; Carolina" #. name for crs msgid "Creole French; Seselwa" @@ -5575,7 +5575,7 @@ msgstr "Francés criollo seychellense" #. name for crt msgid "Chorote; Iyojwa'ja" -msgstr "" +msgstr "Chorote; Iyojwa'ja" #. name for crv msgid "Chaura" @@ -5627,11 +5627,11 @@ msgstr "Lengua de signos chilena" #. name for csh msgid "Chin; Asho" -msgstr "" +msgstr "Chin; Asho" #. name for csi msgid "Miwok; Coast" -msgstr "" +msgstr "Miwok; Coast" #. name for csk msgid "Jola-Kasa" @@ -5643,7 +5643,7 @@ msgstr "Lengua de signos china" #. name for csm msgid "Miwok; Central Sierra" -msgstr "" +msgstr "Miwok; Central Sierra" #. name for csn msgid "Colombian Sign Language" @@ -5671,11 +5671,11 @@ msgstr "Ohlone septentrional" #. name for csw msgid "Cree; Swampy" -msgstr "" +msgstr "Cree; Swampy" #. name for csy msgid "Chin; Siyin" -msgstr "" +msgstr "Chin; Siyin" #. name for csz msgid "Coos" @@ -5691,7 +5691,7 @@ msgstr "Chetco" #. name for ctd msgid "Chin; Tedim" -msgstr "" +msgstr "Chin; Tedim" #. name for cte msgid "Chinantec; Tepinapa" @@ -5727,7 +5727,7 @@ msgstr "Pandan" #. name for ctt msgid "Chetti; Wayanad" -msgstr "" +msgstr "Chetti; Wayanad" #. name for ctu msgid "Chol" @@ -5767,7 +5767,7 @@ msgstr "Mashco piro" #. name for cuk msgid "Kuna; San Blas" -msgstr "" +msgstr "Kuna; San Blas" #. name for cul msgid "Culina" @@ -5795,7 +5795,7 @@ msgstr "Chhulung" #. name for cut msgid "Cuicatec; Teutila" -msgstr "" +msgstr "Cuicatec; Teutila" #. name for cuu msgid "Tai Ya" @@ -5811,7 +5811,7 @@ msgstr "Chukwa" #. name for cux msgid "Cuicatec; Tepeuxila" -msgstr "" +msgstr "Cuicatec; Tepeuxila" #. name for cvg msgid "Chug" @@ -5831,7 +5831,7 @@ msgstr "Maindo" #. name for cwd msgid "Cree; Woods" -msgstr "" +msgstr "Cree; Woods" #. name for cwe msgid "Kwere" @@ -5879,7 +5879,7 @@ msgstr "Chino minzhong" #. name for czt msgid "Chin; Zotung" -msgstr "" +msgstr "Chin; Zotung" #. name for daa msgid "Dangaléat" @@ -5935,7 +5935,7 @@ msgstr "Danés" #. name for dao msgid "Chin; Daai" -msgstr "" +msgstr "Chin; Daai" #. name for dap msgid "Nisi (India)" @@ -5943,7 +5943,7 @@ msgstr "Nisi (India)" #. name for daq msgid "Maria; Dandami" -msgstr "" +msgstr "Maria; Dandami" #. name for dar msgid "Dargwa" @@ -5995,7 +5995,7 @@ msgstr "Edopi" #. name for dbg msgid "Dogon; Dogul Dom" -msgstr "" +msgstr "Dogon; Dogul Dom" #. name for dbi msgid "Doka" @@ -6035,7 +6035,7 @@ msgstr "Dabarre" #. name for dbu msgid "Dogon; Bondum Dom" -msgstr "" +msgstr "Dogon; Bondum Dom" #. name for dbv msgid "Dungu" @@ -6067,7 +6067,7 @@ msgstr "Fataluku" #. name for ddi msgid "Goodenough; West" -msgstr "" +msgstr "Goodenough; West" #. name for ddj msgid "Jaru" @@ -6083,7 +6083,7 @@ msgstr "Dido" #. name for dds msgid "Dogon; Donno So" -msgstr "" +msgstr "Dogon; Donno So" #. name for ddw msgid "Dawera-Daweloor" @@ -6135,7 +6135,7 @@ msgstr "Slave (atabascano)" #. name for dep msgid "Delaware; Pidgin" -msgstr "" +msgstr "Delaware; Pidgin" #. name for deq msgid "Dendi (Central African Republic)" @@ -6167,11 +6167,11 @@ msgstr "Dagaare meridional" #. name for dgb msgid "Dogon; Bunoge" -msgstr "" +msgstr "Dogon; Bunoge" #. name for dgc msgid "Agta; Casiguran Dumagat" -msgstr "" +msgstr "Agta; Casiguran Dumagat" #. name for dgd msgid "Dagaari Dioula" @@ -6283,7 +6283,7 @@ msgstr "Dinka centromeridional" #. name for dic msgid "Dida; Lakota" -msgstr "" +msgstr "Dida; Lakota" #. name for did msgid "Didinga" @@ -6411,7 +6411,7 @@ msgstr "Djiwarli" #. name for djm msgid "Dogon; Jamsay" -msgstr "" +msgstr "Dogon; Jamsay" #. name for djn msgid "Djauan" @@ -6471,7 +6471,7 @@ msgstr "Duma" #. name for dmb msgid "Dogon; Mombo" -msgstr "" +msgstr "Dogon; Mombo" #. name for dmc msgid "Dimir" @@ -6483,7 +6483,7 @@ msgstr "Dugwor" #. name for dmg msgid "Kinabatangan; Upper" -msgstr "" +msgstr "Kinabatangan; Upper" #. name for dmk msgid "Domaaki" @@ -6503,7 +6503,7 @@ msgstr "Kemezung" #. name for dmr msgid "Damar; East" -msgstr "" +msgstr "Damar; East" #. name for dms msgid "Dampelas" @@ -6527,7 +6527,7 @@ msgstr "Demta" #. name for dna msgid "Dani; Upper Grand Valley" -msgstr "" +msgstr "Dani; Upper Grand Valley" #. name for dnd msgid "Daonda" @@ -6543,7 +6543,7 @@ msgstr "Dungan" #. name for dni msgid "Dani; Lower Grand Valley" -msgstr "" +msgstr "Dani; Lower Grand Valley" #. name for dnk msgid "Dengka" @@ -6559,7 +6559,7 @@ msgstr "Danaru" #. name for dnt msgid "Dani; Mid Grand Valley" -msgstr "" +msgstr "Dani; Mid Grand Valley" #. name for dnu msgid "Danau" @@ -6695,7 +6695,7 @@ msgstr "Damar occidental" #. name for dro msgid "Melanau; Daro-Matu" -msgstr "" +msgstr "Melanau; Daro-Matu" #. name for drq msgid "Dura" @@ -6723,7 +6723,7 @@ msgstr "Darai" #. name for dsb msgid "Sorbian; Lower" -msgstr "" +msgstr "Sorbian; Lower" #. name for dse msgid "Dutch Sign Language" @@ -6759,7 +6759,7 @@ msgstr "Daur" #. name for dtb msgid "Kadazan; Labuk-Kinabatangan" -msgstr "" +msgstr "Kadazan; Labuk-Kinabatangan" #. name for dtd msgid "Ditidaht" @@ -6767,15 +6767,15 @@ msgstr "Ditidaht" #. name for dti msgid "Dogon; Ana Tinga" -msgstr "" +msgstr "Dogon; Ana Tinga" #. name for dtk msgid "Dogon; Tene Kan" -msgstr "" +msgstr "Dogon; Tene Kan" #. name for dtm msgid "Dogon; Tomo Kan" -msgstr "" +msgstr "Dogon; Tomo Kan" #. name for dtp msgid "Dusun; Central" @@ -6787,15 +6787,15 @@ msgstr "Lotud" #. name for dts msgid "Dogon; Toro So" -msgstr "" +msgstr "Dogon; Toro So" #. name for dtt msgid "Dogon; Toro Tegu" -msgstr "" +msgstr "Dogon; Toro Tegu" #. name for dtu msgid "Dogon; Tebul Ure" -msgstr "" +msgstr "Dogon; Tebul Ure" #. name for dua msgid "Duala" @@ -6815,7 +6815,7 @@ msgstr "Hun-saare" #. name for due msgid "Agta; Umiray Dumaget" -msgstr "" +msgstr "Agta; Umiray Dumaget" #. name for duf msgid "Dumbea" @@ -6843,7 +6843,7 @@ msgstr "Uyajitaya" #. name for dul msgid "Agta; Alabat Island" -msgstr "" +msgstr "Agta; Alabat Island" #. name for dum msgid "Dutch; Middle (ca. 1050-1350)" @@ -6855,7 +6855,7 @@ msgstr "Dusun deyah" #. name for duo msgid "Agta; Dupaninan" -msgstr "" +msgstr "Agta; Dupaninan" #. name for dup msgid "Duano" @@ -6891,7 +6891,7 @@ msgstr "Duungooma" #. name for duy msgid "Agta; Dicamay" -msgstr "" +msgstr "Agta; Dicamay" #. name for duz msgid "Duli" @@ -6907,7 +6907,7 @@ msgstr "Diri" #. name for dwl msgid "Dogon; Walo Kumbe" -msgstr "" +msgstr "Dogon; Walo Kumbe" #. name for dwr msgid "Dawro" @@ -6935,15 +6935,15 @@ msgstr "Dyugun" #. name for dyg msgid "Agta; Villa Viciosa" -msgstr "" +msgstr "Agta; Villa Viciosa" #. name for dyi msgid "Senoufo; Djimini" -msgstr "" +msgstr "Senoufo; Djimini" #. name for dym msgid "Dogon; Yanda Dom" -msgstr "" +msgstr "Dogon; Yanda Dom" #. name for dyn msgid "Dyangadi" @@ -7095,19 +7095,19 @@ msgstr "Kol" #. name for ekm msgid "Elip" -msgstr "" +msgstr "Elip" #. name for eko msgid "Koti" -msgstr "" +msgstr "Koti" #. name for ekp msgid "Ekpeye" -msgstr "" +msgstr "Ekpeye" #. name for ekr msgid "Yace" -msgstr "" +msgstr "Yace" #. name for eky msgid "Kayah; Eastern" @@ -7115,19 +7115,19 @@ msgstr "Kayah oriental" #. name for ele msgid "Elepi" -msgstr "" +msgstr "Elepi" #. name for elh msgid "El Hugeirat" -msgstr "" +msgstr "El Hugeirat" #. name for eli msgid "Nding" -msgstr "" +msgstr "Nding" #. name for elk msgid "Elkei" -msgstr "" +msgstr "Elkei" #. name for ell msgid "Greek; Modern (1453-)" @@ -7135,19 +7135,19 @@ msgstr "Griego moderno (1453-)" #. name for elm msgid "Eleme" -msgstr "" +msgstr "Eleme" #. name for elo msgid "El Molo" -msgstr "" +msgstr "El Molo" #. name for elp msgid "Elpaputih" -msgstr "" +msgstr "Elpaputih" #. name for elu msgid "Elu" -msgstr "" +msgstr "Elu" #. name for elx msgid "Elamite" @@ -7155,15 +7155,15 @@ msgstr "Elamita" #. name for ema msgid "Emai-Iuleha-Ora" -msgstr "" +msgstr "Emai-Iuleha-Ora" #. name for emb msgid "Embaloh" -msgstr "" +msgstr "Embaloh" #. name for eme msgid "Emerillon" -msgstr "" +msgstr "Emerillon" #. name for emg msgid "Meohang; Eastern" @@ -7171,7 +7171,7 @@ msgstr "Meohang oriental" #. name for emi msgid "Mussau-Emira" -msgstr "" +msgstr "Mussau-Emira" #. name for emk msgid "Maninkakan; Eastern" @@ -7179,15 +7179,15 @@ msgstr "Maninkakan oriental" #. name for emm msgid "Mamulique" -msgstr "" +msgstr "Mamulique" #. name for emn msgid "Eman" -msgstr "" +msgstr "Eman" #. name for emo msgid "Emok" -msgstr "" +msgstr "Emok" #. name for emp msgid "Emberá; Northern" @@ -7203,11 +7203,11 @@ msgstr "Muria oriental" #. name for emw msgid "Emplawas" -msgstr "" +msgstr "Emplawas" #. name for emx msgid "Erromintxela" -msgstr "" +msgstr "Erromintxela" #. name for emy msgid "Mayan; Epigraphic" diff --git a/setup/iso_639/eu.po b/setup/iso_639/eu.po index bcae01cb23..a262c93085 100644 --- a/setup/iso_639/eu.po +++ b/setup/iso_639/eu.po @@ -9,14 +9,14 @@ msgstr "" "Report-Msgid-Bugs-To: Debian iso-codes team \n" "POT-Creation-Date: 2011-11-25 14:01+0000\n" -"PO-Revision-Date: 2012-03-06 13:55+0000\n" +"PO-Revision-Date: 2012-04-18 13:08+0000\n" "Last-Translator: Asier Iturralde Sarasola \n" "Language-Team: Euskara \n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" -"X-Launchpad-Export-Date: 2012-03-07 05:12+0000\n" -"X-Generator: Launchpad (build 14907)\n" +"X-Launchpad-Export-Date: 2012-04-19 04:36+0000\n" +"X-Generator: Launchpad (build 15108)\n" "Language: eu\n" #. name for aaa @@ -27125,7 +27125,7 @@ msgstr "" #. name for vie msgid "Vietnamese" -msgstr "Mahastiak" +msgstr "Vietnamera" #. name for vif msgid "Vili" diff --git a/src/calibre/constants.py b/src/calibre/constants.py index 402fef4c67..1db9c90466 100644 --- a/src/calibre/constants.py +++ b/src/calibre/constants.py @@ -4,7 +4,7 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __docformat__ = 'restructuredtext en' __appname__ = u'calibre' -numeric_version = (0, 8, 47) +numeric_version = (0, 8, 48) __version__ = u'.'.join(map(unicode, numeric_version)) __author__ = u"Kovid Goyal " diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index d91fc97a1d..af5590cc53 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -259,7 +259,7 @@ class LRXMetadataReader(MetadataReaderPlugin): class MOBIMetadataReader(MetadataReaderPlugin): name = 'Read MOBI metadata' - file_types = set(['mobi', 'prc', 'azw', 'azw4', 'pobi']) + file_types = set(['mobi', 'prc', 'azw', 'azw3', 'azw4', 'pobi']) description = _('Read metadata from %s files')%'MOBI' def get_metadata(self, stream, ftype): diff --git a/src/calibre/devices/android/driver.py b/src/calibre/devices/android/driver.py index 571ceee55d..07be4e42c1 100644 --- a/src/calibre/devices/android/driver.py +++ b/src/calibre/devices/android/driver.py @@ -40,6 +40,7 @@ class ANDROID(USBMS): 0xcac : [0x100, 0x0227, 0x0226, 0x222], 0xccf : [0x100, 0x0227, 0x0226, 0x222], 0x2910 : [0x222], + 0xff9 : [0x9999], }, # Eken @@ -174,7 +175,7 @@ class ANDROID(USBMS): 'TELECHIP', 'HUAWEI', 'T-MOBILE', 'SEMC', 'LGE', 'NVIDIA', 'GENERIC-', 'ZTE', 'MID', 'QUALCOMM', 'PANDIGIT', 'HYSTON', 'VIZIO', 'GOOGLE', 'FREESCAL', 'KOBO_INC', 'LENOVO', 'ROCKCHIP', - 'POCKET', 'ONDA_MID', 'ZENITHIN', 'INGENIC'] + 'POCKET', 'ONDA_MID', 'ZENITHIN', 'INGENIC', 'PMID701C'] WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE', '__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897', 'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID', @@ -189,7 +190,8 @@ class ANDROID(USBMS): 'UMS', '.K080', 'P990', 'LTE', 'MB853', 'GT-S5660_CARD', 'A107', 'GT-I9003_CARD', 'XT912', 'FILE-CD_GADGET', 'RK29_SDK', 'MB855', 'XT910', 'BOOK_A10', 'USB_2.0_DRIVER', 'I9100T', 'P999DW', - 'KTABLET_PC', 'INGENIC', 'GT-I9001_CARD'] + 'KTABLET_PC', 'INGENIC', 'GT-I9001_CARD', 'USB_2.0_DRIVER', + 'GT-S5830L_CARD'] WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897', 'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD', 'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD', @@ -197,7 +199,7 @@ class ANDROID(USBMS): 'ANDROID_MID', 'P990_SD_CARD', '.K080', 'LTE_CARD', 'MB853', 'A1-07___C0541A4F', 'XT912', 'MB855', 'XT910', 'BOOK_A10_CARD', 'USB_2.0_DRIVER', 'I9100T', 'P999DW_SD_CARD', 'KTABLET_PC', - 'FILE-CD_GADGET', 'GT-I9001_CARD'] + 'FILE-CD_GADGET', 'GT-I9001_CARD', 'USB_2.0_DRIVER'] OSX_MAIN_MEM = 'Android Device Main Memory' diff --git a/src/calibre/devices/kindle/driver.py b/src/calibre/devices/kindle/driver.py index c71eb67985..8154b7d3a0 100644 --- a/src/calibre/devices/kindle/driver.py +++ b/src/calibre/devices/kindle/driver.py @@ -325,6 +325,10 @@ class KINDLE2(KINDLE): OPT_APNX_ACCURATE = 1 OPT_APNX_CUST_COL = 2 + def formats_to_scan_for(self): + ans = USBMS.formats_to_scan_for(self) | {'azw3'} + return ans + def books(self, oncard=None, end_session=True): bl = USBMS.books(self, oncard=oncard, end_session=end_session) # Read collections information @@ -423,6 +427,8 @@ class KINDLE_FIRE(KINDLE2): name = 'Kindle Fire Device Interface' description = _('Communicate with the Kindle Fire') gui_name = 'Fire' + FORMATS = list(KINDLE2.FORMATS) + FORMATS.insert(0, 'azw3') PRODUCT_ID = [0x0006] BCD = [0x216, 0x100] diff --git a/src/calibre/devices/kobo/driver.py b/src/calibre/devices/kobo/driver.py index f68ea8feff..1384ec0810 100644 --- a/src/calibre/devices/kobo/driver.py +++ b/src/calibre/devices/kobo/driver.py @@ -298,7 +298,7 @@ class KOBO(USBMS): changed = False for i, row in enumerate(cursor): # self.report_progress((i+1) / float(numrows), _('Getting list of books on device...')) - if row[3].startswith("file:///usr/local/Kobo/help/"): + if not hasattr(row[3], 'startswith') or row[3].startswith("file:///usr/local/Kobo/help/"): # These are internal to the Kobo device and do not exist continue path = self.path_from_contentid(row[3], row[5], row[4], oncard) diff --git a/src/calibre/devices/prst1/driver.py b/src/calibre/devices/prst1/driver.py index 12867e0859..c2b04f11f7 100644 --- a/src/calibre/devices/prst1/driver.py +++ b/src/calibre/devices/prst1/driver.py @@ -307,11 +307,21 @@ class PRST1(USBMS): # Work-around for Sony Bug (SD Card DB not using right SQLite sequence) if source_id == 1: + # Update any existing sequence numbers in the table that aren't in the required range sdcard_sequence_start = '4294967296' query = 'UPDATE sqlite_sequence SET seq = ? WHERE seq < ?' t = (sdcard_sequence_start, sdcard_sequence_start,) cursor.execute(query, t) + # Insert sequence numbers for tables we will be manipulating, if they don't already exist + query = ('INSERT INTO sqlite_sequence (name, seq) ' + 'SELECT ?, ? ' + 'WHERE NOT EXISTS (SELECT 1 FROM sqlite_sequence WHERE name = ?)'); + cursor.execute(query, ('books',sdcard_sequence_start,'books',)) + cursor.execute(query, ('collection',sdcard_sequence_start,'collection',)) + cursor.execute(query, ('collections',sdcard_sequence_start,'collections',)) + + for book in booklist: # Run through plugboard if needed if plugboard is not None: diff --git a/src/calibre/devices/usbms/driver.py b/src/calibre/devices/usbms/driver.py index a5ab0bd15c..dc0299b46e 100644 --- a/src/calibre/devices/usbms/driver.py +++ b/src/calibre/devices/usbms/driver.py @@ -128,6 +128,9 @@ class USBMS(CLI, Device): elif location_code == 'B': self._update_driveinfo_file(self._card_b_prefix, location_code, name) + def formats_to_scan_for(self): + return set(self.settings().format_map) | set(self.FORMATS) + def books(self, oncard=None, end_session=True): from calibre.ebooks.metadata.meta import path_to_ext @@ -166,7 +169,7 @@ class USBMS(CLI, Device): for idx,b in enumerate(bl): bl_cache[b.lpath] = idx - all_formats = set(self.settings().format_map) | set(self.FORMATS) + all_formats = self.formats_to_scan_for() def update_booklist(filename, path, prefix): changed = False diff --git a/src/calibre/ebooks/__init__.py b/src/calibre/ebooks/__init__.py index 82e8c6f925..09cc2fbaaf 100644 --- a/src/calibre/ebooks/__init__.py +++ b/src/calibre/ebooks/__init__.py @@ -31,7 +31,7 @@ BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'txtz', 'text', 'ht 'epub', 'fb2', 'djv', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip', 'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'pmlz', 'mbp', 'tan', 'snb', 'xps', 'oxps', 'azw4', 'book', 'zbf', 'pobi', 'docx', 'md', - 'textile', 'markdown', 'ibook', 'iba'] + 'textile', 'markdown', 'ibook', 'iba', 'azw3'] class HTMLRenderer(object): diff --git a/src/calibre/ebooks/conversion/cli.py b/src/calibre/ebooks/conversion/cli.py index 559402ca1c..877b15c24a 100644 --- a/src/calibre/ebooks/conversion/cli.py +++ b/src/calibre/ebooks/conversion/cli.py @@ -156,9 +156,10 @@ def add_pipeline_options(parser, plumber): 'SEARCH AND REPLACE' : ( _('Modify the document text and structure using user defined patterns.'), [ - 'sr1_search', 'sr1_replace', - 'sr2_search', 'sr2_replace', - 'sr3_search', 'sr3_replace', + 'sr1_search', 'sr1_replace', + 'sr2_search', 'sr2_replace', + 'sr3_search', 'sr3_replace', + 'search_replace', ] ), @@ -211,6 +212,7 @@ def add_pipeline_options(parser, plumber): if rec.level < rec.HIGH: option_recommendation_to_cli_option(add_option, rec) + def option_parser(): parser = OptionParser(usage=USAGE) parser.add_option('--list-recipes', default=False, action='store_true', @@ -271,6 +273,34 @@ def abspath(x): return x return os.path.abspath(os.path.expanduser(x)) +def read_sr_patterns(path, log=None): + import json, re, codecs + pats = [] + with codecs.open(path, 'r', 'utf-8') as f: + pat = None + for line in f.readlines(): + if line.endswith(u'\n'): + line = line[:-1] + + if pat is None: + if not line.strip(): + continue + try: + re.compile(line) + except: + msg = u'Invalid regular expression: %r from file: %r'%( + line, path) + if log is not None: + log.error(msg) + raise SystemExit(1) + else: + raise ValueError(msg) + pat = line + else: + pats.append((pat, line)) + pat = None + return json.dumps(pats) + def main(args=sys.argv): log = Log() parser, plumber = create_option_parser(args, log) @@ -278,6 +308,9 @@ def main(args=sys.argv): for x in ('read_metadata_from_opf', 'cover'): if getattr(opts, x, None) is not None: setattr(opts, x, abspath(getattr(opts, x))) + if opts.search_replace: + opts.search_replace = read_sr_patterns(opts.search_replace, log) + recommendations = [(n.dest, getattr(opts, n.dest), OptionRecommendation.HIGH) \ for n in parser.options_iter() diff --git a/src/calibre/ebooks/conversion/plugins/mobi_input.py b/src/calibre/ebooks/conversion/plugins/mobi_input.py index 0e12dd5db7..3817a7bda9 100644 --- a/src/calibre/ebooks/conversion/plugins/mobi_input.py +++ b/src/calibre/ebooks/conversion/plugins/mobi_input.py @@ -28,7 +28,7 @@ class MOBIInput(InputFormatPlugin): name = 'MOBI Input' author = 'Kovid Goyal' description = 'Convert MOBI files (.mobi, .prc, .azw) to HTML' - file_types = set(['mobi', 'prc', 'azw']) + file_types = set(['mobi', 'prc', 'azw', 'azw3']) def convert(self, stream, options, file_ext, log, accelerators): diff --git a/src/calibre/ebooks/conversion/plugins/mobi_output.py b/src/calibre/ebooks/conversion/plugins/mobi_output.py index 72314b4237..89ab91f8eb 100644 --- a/src/calibre/ebooks/conversion/plugins/mobi_output.py +++ b/src/calibre/ebooks/conversion/plugins/mobi_output.py @@ -6,8 +6,6 @@ __license__ = 'GPL v3' __copyright__ = '2009, Kovid Goyal ' __docformat__ = 'restructuredtext en' -from cStringIO import StringIO - from calibre.customize.conversion import OutputFormatPlugin from calibre.customize.conversion import OptionRecommendation @@ -79,18 +77,9 @@ class MOBIOutput(OutputFormatPlugin): def check_for_masthead(self): found = 'masthead' in self.oeb.guide if not found: + from calibre.ebooks import generate_masthead self.oeb.log.debug('No masthead found in manifest, generating default mastheadImage...') - try: - from PIL import Image as PILImage - PILImage - except ImportError: - import Image as PILImage - - raw = open(P('content_server/calibre_banner.png'), 'rb') - im = PILImage.open(raw) - of = StringIO() - im.save(of, 'GIF') - raw = of.getvalue() + raw = generate_masthead(unicode(self.oeb.metadata['title'][0])) id, href = self.oeb.manifest.generate('masthead', 'masthead') self.oeb.manifest.add(id, href, 'image/gif', data=raw) self.oeb.guide.add('masthead', 'Masthead Image', href) @@ -151,17 +140,46 @@ class MOBIOutput(OutputFormatPlugin): # Fix up the periodical href to point to first section href toc.nodes[0].href = toc.nodes[0].nodes[0].href + def remove_html_cover(self): + from calibre.ebooks.oeb.base import OEB_DOCS + + oeb = self.oeb + if not oeb.metadata.cover \ + or 'cover' not in oeb.guide: + return + href = oeb.guide['cover'].href + del oeb.guide['cover'] + item = oeb.manifest.hrefs[href] + if item.spine_position is not None: + self.log.warn('Found an HTML cover: ', item.href, 'removing it.', + 'If you find some content missing from the output MOBI, it ' + 'is because you misidentified the HTML cover in the input ' + 'document') + oeb.spine.remove(item) + if item.media_type in OEB_DOCS: + self.oeb.manifest.remove(item) + def convert(self, oeb, output_path, input_plugin, opts, log): + from calibre.utils.config import tweaks + from calibre.ebooks.mobi.writer2.resources import Resources self.log, self.opts, self.oeb = log, opts, oeb - kf8 = self.create_kf8() - self.write_mobi(input_plugin, output_path, kf8) + create_kf8 = tweaks.get('create_kf8', False) - def create_kf8(self): + self.remove_html_cover() + resources = Resources(oeb, opts, self.is_periodical, + add_fonts=create_kf8) + + kf8 = self.create_kf8(resources) if create_kf8 else None + + self.log('Creating MOBI 6 output') + self.write_mobi(input_plugin, output_path, kf8, resources) + + def create_kf8(self, resources): from calibre.ebooks.mobi.writer8.main import KF8Writer - return KF8Writer(self.oeb, self.opts) + return KF8Writer(self.oeb, self.opts, resources) - def write_mobi(self, input_plugin, output_path, kf8): + def write_mobi(self, input_plugin, output_path, kf8, resources): from calibre.ebooks.mobi.mobiml import MobiMLizer from calibre.ebooks.oeb.transforms.manglecase import CaseMangler from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer, Unavailable @@ -180,12 +198,15 @@ class MOBIOutput(OutputFormatPlugin): rasterizer(oeb, opts) except Unavailable: self.log.warn('SVG rasterizer unavailable, SVG will not be converted') + else: + # Add rasterized SVG images + resources.add_extra_images() mobimlizer = MobiMLizer(ignore_tables=opts.linearize_tables) mobimlizer(oeb, opts) self.check_for_periodical() write_page_breaks_after_item = input_plugin is not plugin_for_input_format('cbz') from calibre.ebooks.mobi.writer2.main import MobiWriter - writer = MobiWriter(opts, + writer = MobiWriter(opts, resources, kf8, write_page_breaks_after_item=write_page_breaks_after_item) writer(oeb, output_path) diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index 8bb4fdd891..dbba38e987 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -626,6 +626,14 @@ OptionRecommendation(name='sr3_search', OptionRecommendation(name='sr3_replace', recommended_value='', level=OptionRecommendation.LOW, help=_('Replacement to replace the text found with sr3-search.')), + +OptionRecommendation(name='search_replace', + recommended_value=None, level=OptionRecommendation.LOW, help=_( + 'Path to a file containing search and replace regular expressions. ' + 'The file must contain alternating lines of regular expression ' + 'followed by replacement pattern (which can be an empty line). ' + 'The regular expression must be in the python regex syntax and ' + 'the file must be UTF-8 encoded.')), ] # }}} diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index 617de18555..c526cba8a9 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -5,7 +5,7 @@ __license__ = 'GPL v3' __copyright__ = '2009, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import functools, re +import functools, re, json from calibre import entity_to_unicode, as_unicode @@ -515,18 +515,31 @@ class HTMLPreProcessor(object): if not getattr(self.extra_opts, 'keep_ligatures', False): html = _ligpat.sub(lambda m:LIGATURES[m.group()], html) - for search, replace in [['sr3_search', 'sr3_replace'], ['sr2_search', 'sr2_replace'], ['sr1_search', 'sr1_replace']]: + # Function for processing search and replace + def do_search_replace(search_pattern, replace_txt): + try: + search_re = re.compile(search_pattern) + if not replace_txt: + replace_txt = '' + rules.insert(0, (search_re, replace_txt)) + except Exception as e: + self.log.error('Failed to parse %r regexp because %s' % + (search, as_unicode(e))) + + # search / replace using the sr?_search / sr?_replace options + for i in range(1, 4): + search, replace = 'sr%d_search'%i, 'sr%d_replace'%i search_pattern = getattr(self.extra_opts, search, '') + replace_txt = getattr(self.extra_opts, replace, '') if search_pattern: - try: - search_re = re.compile(search_pattern) - replace_txt = getattr(self.extra_opts, replace, '') - if not replace_txt: - replace_txt = '' - rules.insert(0, (search_re, replace_txt)) - except Exception as e: - self.log.error('Failed to parse %r regexp because %s' % - (search, as_unicode(e))) + do_search_replace(search_pattern, replace_txt) + + # multi-search / replace using the search_replace option + search_replace = getattr(self.extra_opts, 'search_replace', None) + if search_replace: + search_replace = json.loads(search_replace) + for search_pattern, replace_txt in search_replace: + do_search_replace(search_pattern, replace_txt) end_rules = [] # delete soft hyphens - moved here so it's executed after header/footer removal diff --git a/src/calibre/ebooks/mobi/debug/headers.py b/src/calibre/ebooks/mobi/debug/headers.py index 034c714d31..07a3fa91b9 100644 --- a/src/calibre/ebooks/mobi/debug/headers.py +++ b/src/calibre/ebooks/mobi/debug/headers.py @@ -308,8 +308,10 @@ class MOBIHeader(object): # {{{ self.extra_data_flags = 0 if self.has_extra_data_flags: self.unknown4 = self.raw[180:192] - self.fdst_idx, self.fdst_count = struct.unpack_from(b'>II', + self.fdst_idx, self.fdst_count = struct.unpack_from(b'>LL', self.raw, 192) + if self.fdst_count <= 1: + self.fdst_idx = NULL_INDEX (self.fcis_number, self.fcis_count, self.flis_number, self.flis_count) = struct.unpack(b'>IIII', self.raw[200:216]) @@ -342,7 +344,7 @@ class MOBIHeader(object): # {{{ 'first_non_book_record', 'datp_record_offset', 'fcis_number', 'flis_number', 'primary_index_record', 'fdst_idx', 'first_image_index'): - if hasattr(self, x): + if hasattr(self, x) and getattr(self, x) != NULL_INDEX: setattr(self, x, self.header_offset+getattr(self, x)) if self.has_exth: diff --git a/src/calibre/ebooks/mobi/debug/mobi8.py b/src/calibre/ebooks/mobi/debug/mobi8.py index 9dcc298742..1c61690d42 100644 --- a/src/calibre/ebooks/mobi/debug/mobi8.py +++ b/src/calibre/ebooks/mobi/debug/mobi8.py @@ -10,6 +10,7 @@ __docformat__ = 'restructuredtext en' import sys, os, imghdr, struct from itertools import izip +from calibre import CurrentDir from calibre.ebooks.mobi.debug.headers import TextRecord from calibre.ebooks.mobi.debug.index import (SKELIndex, SECTIndex, NCXIndex) from calibre.ebooks.mobi.utils import read_font_record @@ -43,6 +44,24 @@ class FDST(object): return '\n'.join(ans) +class File(object): + + def __init__(self, skel, skeleton, text, first_aid, sections): + self.name = 'part%04d'%skel.file_number + self.skeleton, self.text, self.first_aid = skeleton, text, first_aid + self.sections = sections + + def dump(self, ddir): + with open(os.path.join(ddir, self.name + '.html'), 'wb') as f: + f.write(self.text) + base = os.path.join(ddir, self.name + '-parts') + os.mkdir(base) + with CurrentDir(base): + with open('skeleton.html', 'wb') as f: + f.write(self.skeleton) + for i, text in enumerate(self.sections): + with open('sect-%04d.html'%i, 'wb') as f: + f.write(text) class MOBIFile(object): @@ -67,6 +86,7 @@ class MOBIFile(object): self.extract_resources() self.read_fdst() self.read_indices() + self.build_files() def print_header(self, f=sys.stdout): print (str(self.mf.palmdb).encode('utf-8'), file=f) @@ -95,6 +115,26 @@ class MOBIFile(object): self.ncx_index = NCXIndex(self.header.primary_index_record, self.mf.records, self.header.encoding) + def build_files(self): + text = self.raw_text + self.files = [] + for skel in self.skel_index.records: + sects = [x for x in self.sect_index.records if x.file_number + == skel.file_number] + skeleton = text[skel.start_position:skel.start_position+skel.length] + ftext = skeleton + first_aid = sects[0].toc_text + sections = [] + + for sect in sects: + start_pos = skel.start_position + skel.length + sect.start_pos + sect_text = text[start_pos:start_pos+sect.length] + insert_pos = sect.insert_pos - skel.start_position + ftext = ftext[:insert_pos] + sect_text + ftext[insert_pos:] + sections.append(sect_text) + + self.files.append(File(skel, skeleton, ftext, first_aid, sections)) + def extract_resources(self): self.resource_map = [] known_types = {b'FLIS', b'FCIS', b'SRCS', @@ -141,7 +181,7 @@ def inspect_mobi(mobi_file, ddir): with open(alltext, 'wb') as of: of.write(f.raw_text) - for x in ('text_records', 'images', 'fonts', 'binary'): + for x in ('text_records', 'images', 'fonts', 'binary', 'files'): os.mkdir(os.path.join(ddir, x)) for rec in f.text_records: @@ -164,3 +204,6 @@ def inspect_mobi(mobi_file, ddir): with open(os.path.join(ddir, 'ncx.record'), 'wb') as fo: fo.write(str(f.ncx_index).encode('utf-8')) + for part in f.files: + part.dump(os.path.join(ddir, 'files')) + diff --git a/src/calibre/ebooks/mobi/mobiml.py b/src/calibre/ebooks/mobi/mobiml.py index 7cda4b0a57..d276689224 100644 --- a/src/calibre/ebooks/mobi/mobiml.py +++ b/src/calibre/ebooks/mobi/mobiml.py @@ -10,7 +10,7 @@ import copy import re from lxml import etree from calibre.ebooks.oeb.base import namespace, barename -from calibre.ebooks.oeb.base import XHTML, XHTML_NS, OEB_DOCS, urlnormalize +from calibre.ebooks.oeb.base import XHTML, XHTML_NS, urlnormalize from calibre.ebooks.oeb.stylizer import Stylizer from calibre.ebooks.oeb.transforms.flatcss import KeyMapper from calibre.utils.magick.draw import identify_data @@ -109,26 +109,8 @@ class MobiMLizer(object): self.profile = profile = context.dest self.fnums = fnums = dict((v, k) for k, v in profile.fnums.items()) self.fmap = KeyMapper(profile.fbase, profile.fbase, fnums.keys()) - self.remove_html_cover() self.mobimlize_spine() - def remove_html_cover(self): - oeb = self.oeb - if not oeb.metadata.cover \ - or 'cover' not in oeb.guide: - return - href = oeb.guide['cover'].href - del oeb.guide['cover'] - item = oeb.manifest.hrefs[href] - if item.spine_position is not None: - self.log.warn('Found an HTML cover,', item.href, 'removing it.', - 'If you find some content missing from the output MOBI, it ' - 'is because you misidentified the HTML cover in the input ' - 'document') - oeb.spine.remove(item) - if item.media_type in OEB_DOCS: - self.oeb.manifest.remove(item) - def mobimlize_spine(self): 'Iterate over the spine and convert it to MOBIML' for item in self.oeb.spine: @@ -473,7 +455,7 @@ class MobiMLizer(object): if tag in TABLE_TAGS and self.ignore_tables: tag = 'span' if tag == 'td' else 'div' - if tag == 'table': + if tag in ('table', 'td', 'tr'): col = style.backgroundColor if col: elem.set('bgcolor', col) diff --git a/src/calibre/ebooks/mobi/reader/index.py b/src/calibre/ebooks/mobi/reader/index.py index f5add94eac..c732d8862e 100644 --- a/src/calibre/ebooks/mobi/reader/index.py +++ b/src/calibre/ebooks/mobi/reader/index.py @@ -114,6 +114,7 @@ class CNCX(object): # {{{ def __bool__(self): return bool(self.records) + __nonzero__ = __bool__ def iteritems(self): return self.records.iteritems() diff --git a/src/calibre/ebooks/mobi/utils.py b/src/calibre/ebooks/mobi/utils.py index 3530736ba0..0ae992f438 100644 --- a/src/calibre/ebooks/mobi/utils.py +++ b/src/calibre/ebooks/mobi/utils.py @@ -7,7 +7,7 @@ __license__ = 'GPL v3' __copyright__ = '2011, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import struct, string, imghdr, zlib +import struct, string, imghdr, zlib, os from collections import OrderedDict from calibre.utils.magick.draw import Image, save_cover_data_to, thumbnail @@ -364,7 +364,7 @@ def count_set_bits(num): num >>= 1 return ans -def to_base(num, base=32): +def to_base(num, base=32, min_num_digits=None): digits = string.digits + string.ascii_uppercase sign = 1 if num >= 0 else -1 if num == 0: return '0' @@ -373,6 +373,8 @@ def to_base(num, base=32): while num: ans.append(digits[(num % base)]) num //= base + if min_num_digits is not None and len(ans) < min_num_digits: + ans.extend('0'*(min_num_digits - len(ans))) if sign < 0: ans.append('-') ans.reverse() @@ -388,27 +390,8 @@ def mobify_image(data): data = im.export('gif') return data -def read_zlib_header(header): - header = bytearray(header) - # See sec 2.2 of RFC 1950 for the zlib stream format - # http://www.ietf.org/rfc/rfc1950.txt - if (header[0]*256 + header[1])%31 != 0: - return None, 'Bad zlib header, FCHECK failed' - - cmf = header[0] & 0b1111 - cinfo = header[0] >> 4 - if cmf != 8: - return None, 'Unknown zlib compression method: %d'%cmf - if cinfo > 7: - return None, 'Invalid CINFO field in zlib header: %d'%cinfo - fdict = (header[1]&0b10000)>>5 - if fdict != 0: - return None, 'FDICT based zlib compression not supported' - wbits = cinfo + 8 - return wbits, None - - -def read_font_record(data, extent=1040): # {{{ +# Font records {{{ +def read_font_record(data, extent=1040): ''' Return the font encoded in the MOBI FONT record represented by data. The return value in a dict with fields raw_data, font_data, err, ext, @@ -466,15 +449,8 @@ def read_font_record(data, extent=1040): # {{{ if flags & 0b1: # ZLIB compressed data - wbits, err = read_zlib_header(font_data[:2]) - if err is not None: - ans['err'] = err - return ans - adler32, = struct.unpack_from(b'>I', font_data, len(font_data) - 4) try: - # remove two bytes of zlib header and 4 bytes of trailing checksum - # negative wbits indicates no standard gzip header - font_data = zlib.decompress(font_data[2:-4], -wbits, usize) + font_data = zlib.decompress(font_data) except Exception as e: ans['err'] = 'Failed to zlib decompress font data (%s)'%e return ans @@ -483,23 +459,42 @@ def read_font_record(data, extent=1040): # {{{ ans['err'] = 'Uncompressed font size mismatch' return ans - if False: - # For some reason these almost never match, probably Amazon has a - # buggy Adler32 implementation - sig = (zlib.adler32(font_data) & 0xffffffff) - if sig != adler32: - ans['err'] = ('Adler checksum did not match. Stored: %d ' - 'Calculated: %d')%(adler32, sig) - return ans - ans['font_data'] = font_data sig = font_data[:4] ans['ext'] = ('ttf' if sig in {b'\0\1\0\0', b'true', b'ttcf'} else 'otf' if sig == b'OTTO' else 'dat') return ans + +def write_font_record(data, obfuscate=True, compress=True): + ''' + Write the ttf/otf font represented by data into a font record. See + read_font_record() for details on the format of the record. + ''' + + flags = 0 + key_len = 20 + usize = len(data) + xor_key = b'' + if compress: + flags |= 0b1 + data = zlib.compress(data, 9) + if obfuscate: + flags |= 0b10 + xor_key = os.urandom(key_len) + key = bytearray(xor_key) + data = bytearray(data) + for i in xrange(1040): + data[i] ^= key[i%key_len] + data = bytes(data) + + key_start = struct.calcsize(b'>5L') + 4 + data_start = key_start + len(xor_key) + + header = b'FONT' + struct.pack(b'>5L', usize, flags, data_start, + len(xor_key), key_start) + + return header + xor_key + data + # }}} - - - diff --git a/src/calibre/ebooks/mobi/writer2/main.py b/src/calibre/ebooks/mobi/writer2/main.py index 99321fab12..b7a0d76424 100644 --- a/src/calibre/ebooks/mobi/writer2/main.py +++ b/src/calibre/ebooks/mobi/writer2/main.py @@ -11,17 +11,15 @@ import re, random, time from cStringIO import StringIO from struct import pack -from calibre.ebooks import normalize, generate_masthead -from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES +from calibre.ebooks import normalize from calibre.ebooks.mobi.writer2.serializer import Serializer from calibre.ebooks.compression.palmdoc import compress_doc from calibre.ebooks.mobi.langcodes import iana2mobi from calibre.utils.filenames import ascii_filename from calibre.ebooks.mobi.writer2 import (PALMDOC, UNCOMPRESSED, RECORD_SIZE) -from calibre.ebooks.mobi.utils import (rescale_image, encint, mobify_image, - encode_trailing_data, align_block, detect_periodical) +from calibre.ebooks.mobi.utils import (encint, encode_trailing_data, + align_block, detect_periodical) from calibre.ebooks.mobi.writer2.indexer import Indexer -from calibre.ebooks.mobi import MAX_THUMB_DIMEN, MAX_THUMB_SIZE EXTH_CODES = { 'creator': 100, @@ -50,8 +48,10 @@ WRITE_UNCROSSABLE_BREAKS = False class MobiWriter(object): COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+') - def __init__(self, opts, write_page_breaks_after_item=True): + def __init__(self, opts, resources, kf8, write_page_breaks_after_item=True): self.opts = opts + self.resources = resources + self.kf8 = kf8 self.write_page_breaks_after_item = write_page_breaks_after_item self.compression = UNCOMPRESSED if opts.dont_compress else PALMDOC self.prefer_author_sort = opts.prefer_author_sort @@ -151,66 +151,14 @@ class MobiWriter(object): # Images {{{ def generate_images(self): - oeb = self.oeb - oeb.logger.info('Serializing images...') - self.image_records = [] - self.image_map = {} - self.masthead_offset = 0 - index = 1 + resources = self.resources + image_records = resources.records + self.image_map = resources.item_map + self.masthead_offset = resources.masthead_offset + self.cover_offset = resources.cover_offset + self.thumbnail_offset = resources.thumbnail_offset - mh_href = None - if 'masthead' in oeb.guide and oeb.guide['masthead'].href: - mh_href = oeb.guide['masthead'].href - self.image_records.append(None) - index += 1 - elif self.is_periodical: - # Generate a default masthead - data = generate_masthead(unicode(self.oeb.metadata['title'][0])) - self.image_records.append(data) - index += 1 - - cover_href = self.cover_offset = self.thumbnail_offset = None - if (oeb.metadata.cover and - unicode(oeb.metadata.cover[0]) in oeb.manifest.ids): - cover_id = unicode(oeb.metadata.cover[0]) - item = oeb.manifest.ids[cover_id] - cover_href = item.href - - for item in self.oeb.manifest.values(): - if item.media_type not in OEB_RASTER_IMAGES: continue - try: - data = item.data - if self.opts.mobi_keep_original_images: - data = mobify_image(data) - else: - data = rescale_image(data) - except: - oeb.logger.warn('Bad image file %r' % item.href) - continue - else: - if mh_href and item.href == mh_href: - self.image_records[0] = data - continue - - self.image_records.append(data) - self.image_map[item.href] = index - index += 1 - - if cover_href and item.href == cover_href: - self.cover_offset = self.image_map[item.href] - 1 - try: - data = rescale_image(item.data, dimen=MAX_THUMB_DIMEN, - maxsizeb=MAX_THUMB_SIZE) - except: - oeb.logger.warn('Failed to generate thumbnail') - else: - self.image_records.append(data) - self.thumbnail_offset = index - 1 - index += 1 - finally: - item.unload_data_from_memory() - - if self.image_records and self.image_records[0] is None: + if image_records and image_records[0] is None: raise ValueError('Failed to find masthead image in manifest') # }}} @@ -317,9 +265,12 @@ class MobiWriter(object): exth = self.build_exth(bt) first_image_record = None - if self.image_records: + if self.resources: + used_images = self.serializer.used_images + if self.kf8 is not None: + used_images |= self.kf8.used_images first_image_record = len(self.records) - self.records.extend(self.image_records) + self.resources.serialize(self.records, used_images) last_content_record = len(self.records) - 1 # FCIS/FLIS (Seems to serve no purpose) diff --git a/src/calibre/ebooks/mobi/writer2/resources.py b/src/calibre/ebooks/mobi/writer2/resources.py new file mode 100644 index 0000000000..2fcb93790c --- /dev/null +++ b/src/calibre/ebooks/mobi/writer2/resources.py @@ -0,0 +1,136 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2012, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +import imghdr + +from calibre.ebooks.mobi import MAX_THUMB_DIMEN, MAX_THUMB_SIZE +from calibre.ebooks.mobi.utils import (rescale_image, mobify_image, + write_font_record) +from calibre.ebooks import generate_masthead +from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES + +PLACEHOLDER_GIF = b'GIF89a\x01\x00\x01\x00\x80\x00\x00\x00\x00\x00\xff\xff\xff!\xf9\x04\x01\x00\x00\x00\x00,\x00\x00\x00\x00\x01\x00\x01\x00@\x02\x01D\x00;' + +class Resources(object): + + def __init__(self, oeb, opts, is_periodical, add_fonts=False): + self.oeb, self.log, self.opts = oeb, oeb.log, opts + self.is_periodical = is_periodical + + self.item_map = {} + self.records = [] + self.mime_map = {} + self.masthead_offset = 0 + self.used_image_indices = set() + self.image_indices = set() + self.cover_offset = self.thumbnail_offset = None + + self.add_resources(add_fonts) + + def process_image(self, data): + return (mobify_image(data) if self.opts.mobi_keep_original_images else + rescale_image(data)) + + def add_resources(self, add_fonts): + oeb = self.oeb + oeb.logger.info('Serializing resources...') + index = 1 + + mh_href = None + if 'masthead' in oeb.guide and oeb.guide['masthead'].href: + mh_href = oeb.guide['masthead'].href + self.records.append(None) + index += 1 + self.used_image_indices.add(0) + self.image_indices.add(0) + elif self.is_periodical: + # Generate a default masthead + data = generate_masthead(unicode(self.oeb.metadata['title'][0])) + self.records.append(data) + self.used_image_indices.add(0) + self.image_indices.add(0) + index += 1 + + cover_href = self.cover_offset = self.thumbnail_offset = None + if (oeb.metadata.cover and + unicode(oeb.metadata.cover[0]) in oeb.manifest.ids): + cover_id = unicode(oeb.metadata.cover[0]) + item = oeb.manifest.ids[cover_id] + cover_href = item.href + + for item in self.oeb.manifest.values(): + if item.media_type not in OEB_RASTER_IMAGES: continue + try: + data = self.process_image(item.data) + except: + self.log.warn('Bad image file %r' % item.href) + continue + else: + if mh_href and item.href == mh_href: + self.records[0] = data + continue + + self.image_indices.add(len(self.records)) + self.records.append(data) + self.item_map[item.href] = index + self.mime_map[item.href] = 'image/%s'%imghdr.what(None, data) + index += 1 + + if cover_href and item.href == cover_href: + self.cover_offset = self.item_map[item.href] - 1 + self.used_image_indices.add(self.cover_offset) + try: + data = rescale_image(item.data, dimen=MAX_THUMB_DIMEN, + maxsizeb=MAX_THUMB_SIZE) + except: + self.log.warn('Failed to generate thumbnail') + else: + self.image_indices.add(len(self.records)) + self.records.append(data) + self.thumbnail_offset = index - 1 + self.used_image_indices.add(self.thumbnail_offset) + index += 1 + finally: + item.unload_data_from_memory() + + if add_fonts: + for item in self.oeb.manifest.values(): + if item.href and item.href.rpartition('.')[-1].lower() in { + 'ttf', 'otf'} and isinstance(item.data, bytes): + self.records.append(write_font_record(item.data)) + self.item_map[item.href] = len(self.records) + + def add_extra_images(self): + ''' + Add any images that were created after the call to add_resources() + ''' + for item in self.oeb.manifest.values(): + if (item.media_type not in OEB_RASTER_IMAGES or item.href in + self.item_map): continue + try: + data = self.process_image(item.data) + except: + self.log.warn('Bad image file %r' % item.href) + else: + self.records.append(data) + self.item_map[item.href] = len(self.records) + finally: + item.unload_data_from_memory() + + def serialize(self, records, used_images): + used_image_indices = self.used_image_indices | { + v-1 for k, v in self.item_map.iteritems() if k in used_images} + for i in self.image_indices-used_image_indices: + self.records[i] = PLACEHOLDER_GIF + records.extend(self.records) + + def __bool__(self): + return bool(self.records) + __nonzero__ = __bool__ + diff --git a/src/calibre/ebooks/mobi/writer2/serializer.py b/src/calibre/ebooks/mobi/writer2/serializer.py index b35f33439b..d8d63bcff4 100644 --- a/src/calibre/ebooks/mobi/writer2/serializer.py +++ b/src/calibre/ebooks/mobi/writer2/serializer.py @@ -39,6 +39,7 @@ class Serializer(object): self.oeb = oeb # Map of image hrefs to image index in the MOBI file self.images = images + self.used_images = set() self.logger = oeb.logger self.is_periodical = is_periodical self.write_page_breaks_after_item = write_page_breaks_after_item @@ -329,6 +330,7 @@ class Serializer(object): href = urlnormalize(item.abshref(val)) if href in self.images: index = self.images[href] + self.used_images.add(href) buf.write(b'recindex="%05d"' % index) continue buf.write(attr.encode('utf-8')) diff --git a/src/calibre/ebooks/mobi/writer8/main.py b/src/calibre/ebooks/mobi/writer8/main.py index fc4234eb10..79ff7c3d96 100644 --- a/src/calibre/ebooks/mobi/writer8/main.py +++ b/src/calibre/ebooks/mobi/writer8/main.py @@ -7,9 +7,199 @@ __license__ = 'GPL v3' __copyright__ = '2012, Kovid Goyal ' __docformat__ = 'restructuredtext en' +import copy +from functools import partial +from collections import defaultdict + +import cssutils +from lxml import etree + +from calibre import isbytestring, force_unicode +from calibre.ebooks.mobi.utils import to_base +from calibre.ebooks.oeb.base import (OEB_DOCS, OEB_STYLES, SVG_MIME, XPath, + extract, XHTML, urlnormalize) +from calibre.ebooks.oeb.parse_utils import barename +from calibre.ebooks.mobi.writer8.skeleton import Chunker, aid_able_tags + +XML_DOCS = OEB_DOCS | {SVG_MIME} + +# References to record numbers in KF8 are stored as base-32 encoded integers, +# with 4 digits +to_ref = partial(to_base, base=32, min_num_digits=4) +# References in links are stored with 10 digits +to_href = partial(to_base, base=32, min_num_digits=10) class KF8Writer(object): - def __init__(self, oeb, opts): + def __init__(self, oeb, opts, resources): self.oeb, self.opts, self.log = oeb, opts, oeb.log + self.log.info('Creating KF8 output') + self.used_images = set() + self.resources = resources + self.dup_data() + self.flows = [None] # First flow item is reserved for the text + + self.replace_resource_links() + self.extract_css_into_flows() + self.extract_svg_into_flows() + self.replace_internal_links_with_placeholders() + self.insert_aid_attributes() + self.chunk_it_up() + + def dup_data(self): + ''' Duplicate data so that any changes we make to markup/CSS only + affect KF8 output and not MOBI 6 output ''' + self._data_cache = {} + for item in self.oeb.manifest: + if item.media_type in XML_DOCS: + self._data_cache[item.href] = copy.deepcopy(item.data) + elif item.media_type in OEB_STYLES: + # I can't figure out how to make an efficient copy of the + # in-memory CSSStylesheet, as deepcopy doesn't work (raises an + # exception) + self._data_cache[item.href] = cssutils.parseString( + item.data.cssText) + + def data(self, item): + return self._data_cache.get(item.href, item.data) + + def replace_resource_links(self): + ''' Replace links to resources (raster images/fonts) with pointers to + the MOBI record containing the resource. The pointers are of the form: + kindle:embed:XXXX?mime=image/* The ?mime= is apparently optional and + not used for fonts. ''' + + def pointer(item, oref): + ref = item.abshref(oref) + idx = self.resources.item_map.get(ref, None) + if idx is not None: + is_image = self.resources.records[idx-1][:4] not in {b'FONT'} + idx = to_ref(idx) + if is_image: + self.used_images.add(ref) + return 'kindle:embed:%s?mime=%s'%(idx, + self.resources.mime_map[ref]) + else: + return 'kindle:embed:%s'%idx + return oref + + for item in self.oeb.manifest: + + if item.media_type in XML_DOCS: + root = self.data(item) + for tag in XPath('//h:img|//svg:image')(root): + for attr, ref in tag.attrib.iteritems(): + if attr.split('}')[-1].lower() in {'src', 'href'}: + tag.attrib[attr] = pointer(item, ref) + + for tag in XPath('//h:style')(root): + if tag.text: + sheet = cssutils.parseString(tag.text) + replacer = partial(pointer, item) + cssutils.replaceUrls(sheet, replacer, + ignoreImportRules=True) + repl = sheet.cssText + if isbytestring(repl): + repl = repl.decode('utf-8') + tag.text = '\n'+ repl + '\n' + + elif item.media_type in OEB_STYLES: + sheet = self.data(item) + replacer = partial(pointer, item) + cssutils.replaceUrls(sheet, replacer, ignoreImportRules=True) + + def extract_css_into_flows(self): + inlines = defaultdict(list) # Ensure identical
Komentarze