diff --git a/Changelog.yaml b/Changelog.yaml
index 7dde319f65..cef951cc1e 100644
--- a/Changelog.yaml
+++ b/Changelog.yaml
@@ -19,6 +19,81 @@
# new recipes:
# - title:
+- version: 0.9.10
+ date: 2012-12-14
+
+ new features:
+ - title: "Drivers for Nextbook Premium 8 se, HTC Desire X and Emerson EM 543"
+ tickets: [1088149, 1088112, 1087978]
+
+ bug fixes:
+ - title: "Fix rich text delegate not working with Qt compiled in debug mode."
+ tickets: [1089011]
+
+ - title: "When deleting all books in the library, blank the book details panel"
+
+ - title: "Conversion: Fix malformed values in the bgcolor attribute causing conversion to abort"
+
+ - title: "Conversion: Fix heuristics applying incorrect style in some circumstances"
+ tickets: [1066507]
+
+ - title: "Possible fix for 64bit calibre not starting up on some Windows systems"
+ tickets: [1087816]
+
+ improved recipes:
+ - Sivil Dusunce
+ - Anchorage Daily News
+ - Le Monde
+ - Harpers
+
+ new recipes:
+ - title: Titanic
+ author: Krittika Goyal
+
+- version: 0.9.9
+ date: 2012-12-07
+
+ new features:
+ - title: "64 bit build for windows"
+ type: major
+ description: "calibre now has a 64 bit version for windows, available at: http://calibre-ebook.com/download_windows64 The 64bit build is not limited to using only 3GB of RAM when converting large/complex documents. It may also be slightly faster for some tasks. You can have both the 32 bit and the 64 bit build installed at the same time, they will use the same libraries, plugins and settings."
+
+ - title: "Content server: Make the identifiers in each books metadata clickable."
+ tickets: [1085726]
+
+ bug fixes:
+ - title: "EPUB Input: Fix an infinite loop while trying to recover a damaged EPUB file."
+ tickets: [1086917]
+
+ - title: "KF8 Input: Fix handling of links in files that link to the obsolete tags instead of tags with an id attribute."
+ tickets: [1086705]
+
+ - title: "Conversion: Fix a bug in removal of invalid entries from the spine, where not all invalid entries were removed, causing conversion to fail."
+ tickets: [1086054]
+
+ - title: "KF8 Input: Ignore invalid flow references in the KF8 document instead of erroring out on them."
+ tickets: [1085306]
+
+ - title: "Fix command line output on linux systems with incorrect LANG/LC_TYPE env vars."
+ tickets: [1085103]
+
+ - title: "KF8 Input: Fix page breaks specified using the data-AmznPageBreak attribute being ignored by calibre."
+
+ - title: "PDF Output: Fix custom size field not accepting fractional numbers as sizes"
+
+ - title: "Get Books: Update libre.de and publio for website changes"
+
+ - title: "Wireless driver: Increase timeout interval, and when allocating a random port try 9090 first"
+
+ improved recipes:
+ - New York Times
+ - Weblogs SL
+ - Zaman Gazetesi
+ - Aksiyon Dergisi
+ - Endgadget
+ - Metro UK
+ - Heise Online
+
- version: 0.9.8
date: 2012-11-30
diff --git a/recipes/adventure_zone_pl.recipe b/recipes/adventure_zone_pl.recipe
index 485a2e0c5b..2224937f3c 100644
--- a/recipes/adventure_zone_pl.recipe
+++ b/recipes/adventure_zone_pl.recipe
@@ -9,11 +9,12 @@ class Adventure_zone(BasicNewsRecipe):
no_stylesheets = True
oldest_article = 20
max_articles_per_feed = 100
+ cover_url = 'http://www.adventure-zone.info/inne/logoaz_2012.png'
index='http://www.adventure-zone.info/fusion/'
use_embedded_content=False
preprocess_regexps = [(re.compile(r"
Komentarze | ", re.IGNORECASE), lambda m: ''),
- (re.compile(r'\'), lambda match: ''),
- (re.compile(r'\'), lambda match: '')]
+ (re.compile(r'?table.*?>'), lambda match: ''),
+ (re.compile(r'?tbody.*?>'), lambda match: '')]
remove_tags_before= dict(name='td', attrs={'class':'main-bg'})
remove_tags= [dict(name='img', attrs={'alt':'Drukuj'})]
remove_tags_after= dict(id='comments')
@@ -36,11 +37,11 @@ class Adventure_zone(BasicNewsRecipe):
return feeds
- def get_cover_url(self):
+ '''def get_cover_url(self):
soup = self.index_to_soup('http://www.adventure-zone.info/fusion/news.php')
cover=soup.find(id='box_OstatninumerAZ')
self.cover_url='http://www.adventure-zone.info/fusion/'+ cover.center.a.img['src']
- return getattr(self, 'cover_url', self.cover_url)
+ return getattr(self, 'cover_url', self.cover_url)'''
def skip_ad_pages(self, soup):
diff --git a/recipes/anchorage_daily.recipe b/recipes/anchorage_daily.recipe
index 4ce2f13a14..7bda0f5bcd 100644
--- a/recipes/anchorage_daily.recipe
+++ b/recipes/anchorage_daily.recipe
@@ -5,14 +5,16 @@ class AdvancedUserRecipe1278347258(BasicNewsRecipe):
__author__ = 'rty'
oldest_article = 7
max_articles_per_feed = 100
+ auto_cleanup = True
+
feeds = [(u'Alaska News', u'http://www.adn.com/news/alaska/index.xml'),
- (u'Business', u'http://www.adn.com/money/index.xml'),
- (u'Sports', u'http://www.adn.com/sports/index.xml'),
- (u'Politics', u'http://www.adn.com/politics/index.xml'),
- (u'Lifestyles', u'http://www.adn.com/life/index.xml'),
- (u'Iditarod', u'http://www.adn.com/iditarod/index.xml')
- ]
+ (u'Business', u'http://www.adn.com/money/index.xml'),
+ (u'Sports', u'http://www.adn.com/sports/index.xml'),
+ (u'Politics', u'http://www.adn.com/politics/index.xml'),
+ (u'Lifestyles', u'http://www.adn.com/life/index.xml'),
+ (u'Iditarod', u'http://www.adn.com/iditarod/index.xml')
+ ]
description = ''''Alaska's Newspaper'''
publisher = 'http://www.adn.com'
category = 'news, Alaska, Anchorage'
@@ -28,13 +30,13 @@ class AdvancedUserRecipe1278347258(BasicNewsRecipe):
conversion_options = {'linearize_tables':True}
masthead_url = 'http://media.adn.com/includes/assets/images/adn_logo.2.gif'
- keep_only_tags = [
- dict(name='div', attrs={'class':'left_col story_mainbar'}),
- ]
- remove_tags = [
- dict(name='div', attrs={'class':'story_tools'}),
- dict(name='p', attrs={'class':'ad_label'}),
- ]
- remove_tags_after = [
- dict(name='div', attrs={'class':'advertisement'}),
- ]
+ #keep_only_tags = [
+ #dict(name='div', attrs={'class':'left_col story_mainbar'}),
+ #]
+ #remove_tags = [
+ #dict(name='div', attrs={'class':'story_tools'}),
+ #dict(name='p', attrs={'class':'ad_label'}),
+ #]
+ #remove_tags_after = [
+ #dict(name='div', attrs={'class':'advertisement'}),
+ #]
diff --git a/recipes/android_com_pl.recipe b/recipes/android_com_pl.recipe
index c7a4a97d3c..a4a387d414 100644
--- a/recipes/android_com_pl.recipe
+++ b/recipes/android_com_pl.recipe
@@ -3,11 +3,11 @@ from calibre.web.feeds.news import BasicNewsRecipe
class Android_com_pl(BasicNewsRecipe):
title = u'Android.com.pl'
__author__ = 'fenuks'
- description = 'Android.com.pl - biggest polish Android site'
+ description = u'Android.com.pl - to największe w Polsce centrum Android OS. Znajdziesz tu: nowości, forum, pomoc, recenzje, gry, aplikacje.'
category = 'Android, mobile'
language = 'pl'
use_embedded_content=True
- cover_url =u'http://upload.wikimedia.org/wikipedia/commons/thumb/d/d7/Android_robot.svg/220px-Android_robot.svg.png'
+ cover_url =u'http://android.com.pl/wp-content/themes/android/images/logo.png'
oldest_article = 8
max_articles_per_feed = 100
- feeds = [(u'Android', u'http://android.com.pl/component/content/frontpage/frontpage.feed?type=rss')]
+ feeds = [(u'Android', u'http://android.com.pl/feed/')]
diff --git a/recipes/astroflesz.recipe b/recipes/astroflesz.recipe
new file mode 100644
index 0000000000..0b92fdfa29
--- /dev/null
+++ b/recipes/astroflesz.recipe
@@ -0,0 +1,19 @@
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Astroflesz(BasicNewsRecipe):
+ title = u'Astroflesz'
+ oldest_article = 7
+ __author__ = 'fenuks'
+ description = u'astroflesz.pl - to portal poświęcony astronomii. Informuje zarówno o aktualnych wydarzeniach i odkryciach naukowych, jak również zapowiada ciekawe zjawiska astronomiczne'
+ category = 'astronomy'
+ language = 'pl'
+ cover_url = 'http://www.astroflesz.pl/templates/astroflesz/images/logo/logo.png'
+ ignore_duplicate_articles = {'title', 'url'}
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ use_embedded_content = False
+ keep_only_tags = [dict(id="k2Container")]
+ remove_tags_after = dict(name='div', attrs={'class':'itemLinks'})
+ remove_tags = [dict(name='div', attrs={'class':['itemLinks', 'itemToolbar', 'itemRatingBlock']})]
+ feeds = [(u'Wszystkie', u'http://astroflesz.pl/?format=feed')]
diff --git a/recipes/birmingham_post.recipe b/recipes/birmingham_post.recipe
index b9b3c3fc57..49c86fe3b8 100644
--- a/recipes/birmingham_post.recipe
+++ b/recipes/birmingham_post.recipe
@@ -1,9 +1,11 @@
from calibre.web.feeds.news import BasicNewsRecipe
+import re
+import mechanize
+
class AdvancedUserRecipe1306097511(BasicNewsRecipe):
title = u'Birmingham post'
description = 'Author D.Asbury. News for Birmingham UK'
#timefmt = ''
- # last update 8/9/12
__author__ = 'Dave Asbury'
cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/161987_9010212100_2035706408_n.jpg'
oldest_article = 2
@@ -15,8 +17,30 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
#auto_cleanup = True
language = 'en_GB'
+ cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/161987_9010212100_2035706408_n.jpg'
- masthead_url = 'http://www.pressgazette.co.uk/Pictures/web/t/c/g/birmingham_post.jpg'
+ masthead_url = 'http://www.trinitymirror.com/images/birminghampost-logo.gif'
+ def get_cover_url(self):
+ soup = self.index_to_soup('http://www.birminghampost.net')
+ # look for the block containing the sun button and url
+ cov = soup.find(attrs={'height' : re.compile('3'), 'alt' : re.compile('Birmingham Post')})
+ print
+ print '%%%%%%%%%%%%%%%',cov
+ print
+ cov2 = str(cov['src'])
+ # cov2=cov2[7:]
+ print '88888888 ',cov2,' 888888888888'
+
+ #cover_url=cov2
+ #return cover_url
+ br = mechanize.Browser()
+ br.set_handle_redirect(False)
+ try:
+ br.open_novisit(cov2)
+ cover_url = cov2
+ except:
+ cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/161987_9010212100_2035706408_n.jpg'
+ return cover_url
keep_only_tags = [
diff --git a/recipes/countryfile.recipe b/recipes/countryfile.recipe
index 4f2e8cd95f..86769b78cd 100644
--- a/recipes/countryfile.recipe
+++ b/recipes/countryfile.recipe
@@ -7,25 +7,30 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
#cover_url = 'http://www.countryfile.com/sites/default/files/imagecache/160px_wide/cover/2_1.jpg'
__author__ = 'Dave Asbury'
description = 'The official website of Countryfile Magazine'
- # last updated 7/10/12
+ # last updated 8/12/12
language = 'en_GB'
oldest_article = 30
max_articles_per_feed = 25
remove_empty_feeds = True
no_stylesheets = True
auto_cleanup = True
+ ignore_duplicate_articles = {'title', 'url'}
#articles_are_obfuscated = True
- ignore_duplicate_articles = {'title'}
+ #article_already_exists = False
+ #feed_hash = ''
def get_cover_url(self):
- soup = self.index_to_soup('http://www.countryfile.com/')
+ soup = self.index_to_soup('http://www.countryfile.com/magazine')
+ cov = soup.find(attrs={'class' : re.compile('imagecache imagecache-250px_wide')})#'width' : '160',
+ print '&&&&&&&& ',cov,' ***'
+ cov=str(cov)
+ #cov2 = re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', cov)
+ cov2 = re.findall('/(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', cov)
+
+ cov2 = str(cov2)
+ cov2= "http://www.countryfile.com"+cov2[2:len(cov2)-8]
- cov = soup.find(attrs={'width' : '160', 'class' : re.compile('imagecache imagecache-160px_wide')})
- print '******** ',cov,' ***'
- cov2 = str(cov)
- cov2=cov2[10:101]
print '******** ',cov2,' ***'
- #cov2='http://www.countryfile.com/sites/default/files/imagecache/160px_wide/cover/1b_0.jpg'
- # try to get cover - if can't get known cover
+ # try to get cover - if can't get known cover
br = browser()
br.set_handle_redirect(False)
@@ -45,5 +50,3 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
(u'Countryside', u'http://www.countryfile.com/rss/countryside'),
]
-
-
diff --git a/recipes/czas_gentlemanow.recipe b/recipes/czas_gentlemanow.recipe
new file mode 100644
index 0000000000..6df677f25f
--- /dev/null
+++ b/recipes/czas_gentlemanow.recipe
@@ -0,0 +1,20 @@
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class CzasGentlemanow(BasicNewsRecipe):
+ title = u'Czas Gentlemanów'
+ __author__ = 'fenuks'
+ description = u'Historia mężczyzn z dala od wielkiej polityki'
+ category = 'blog'
+ language = 'pl'
+ cover_url = 'http://czasgentlemanow.pl/wp-content/uploads/2012/10/logo-Czas-Gentlemanow1.jpg'
+ ignore_duplicate_articles = {'title', 'url'}
+ oldest_article = 7
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ remove_empty_feeds = True
+ use_embedded_content = False
+ keep_only_tags = [dict(name='div', attrs={'class':'content'})]
+ remove_tags = [dict(attrs={'class':'meta_comments'})]
+ remove_tags_after = dict(name='div', attrs={'class':'fblikebutton_button'})
+ feeds = [(u'M\u0119ski \u015awiat', u'http://czasgentlemanow.pl/category/meski-swiat/feed/'), (u'Styl', u'http://czasgentlemanow.pl/category/styl/feed/'), (u'Vademecum Gentlemana', u'http://czasgentlemanow.pl/category/vademecum/feed/'), (u'Dom i rodzina', u'http://czasgentlemanow.pl/category/dom-i-rodzina/feed/'), (u'Honor', u'http://czasgentlemanow.pl/category/honor/feed/'), (u'Gad\u017cety Gentlemana', u'http://czasgentlemanow.pl/category/gadzety-gentlemana/feed/')]
diff --git a/recipes/dzieje_pl.recipe b/recipes/dzieje_pl.recipe
index 0aafa5d2f4..603591e9f0 100644
--- a/recipes/dzieje_pl.recipe
+++ b/recipes/dzieje_pl.recipe
@@ -7,18 +7,64 @@ class Dzieje(BasicNewsRecipe):
cover_url = 'http://www.dzieje.pl/sites/default/files/dzieje_logo.png'
category = 'history'
language = 'pl'
- index='http://dzieje.pl'
+ ignore_duplicate_articles = {'title', 'url'}
+ index = 'http://dzieje.pl'
oldest_article = 8
max_articles_per_feed = 100
remove_javascript=True
no_stylesheets= True
keep_only_tags = [dict(name='h1', attrs={'class':'title'}), dict(id='content-area')]
remove_tags = [dict(attrs={'class':'field field-type-computed field-field-tagi'}), dict(id='dogory')]
- feeds = [(u'Dzieje', u'http://dzieje.pl/rss.xml')]
+ #feeds = [(u'Dzieje', u'http://dzieje.pl/rss.xml')]
+ def append_page(self, soup, appendtag):
+ tag = appendtag.find('li', attrs={'class':'pager-next'})
+ if tag:
+ while tag:
+ url = tag.a['href']
+ if not url.startswith('http'):
+ url = 'http://dzieje.pl'+tag.a['href']
+ soup2 = self.index_to_soup(url)
+ pagetext = soup2.find(id='content-area').find(attrs={'class':'content'})
+ for r in pagetext.findAll(attrs={'class':['fieldgroup group-groupkul', 'fieldgroup group-zdjeciekult', 'fieldgroup group-zdjecieciekaw', 'fieldgroup group-zdjecieksiazka', 'fieldgroup group-zdjeciedu', 'field field-type-filefield field-field-zdjecieglownawyd']}):
+ r.extract()
+ pos = len(appendtag.contents)
+ appendtag.insert(pos, pagetext)
+ tag = soup2.find('li', attrs={'class':'pager-next'})
+ for r in appendtag.findAll(attrs={'class':['item-list', 'field field-type-computed field-field-tagi', ]}):
+ r.extract()
+
+ def find_articles(self, url):
+ articles = []
+ soup=self.index_to_soup(url)
+ tag=soup.find(id='content-area').div.div
+ for i in tag.findAll('div', recursive=False):
+ temp = i.find(attrs={'class':'views-field-title'}).span.a
+ title = temp.string
+ url = self.index + temp['href']
+ date = '' #i.find(attrs={'class':'views-field-created'}).span.string
+ articles.append({'title' : title,
+ 'url' : url,
+ 'date' : date,
+ 'description' : ''
+ })
+ return articles
+
+ def parse_index(self):
+ feeds = []
+ feeds.append((u"Wiadomości", self.find_articles('http://dzieje.pl/wiadomosci')))
+ feeds.append((u"Kultura i sztuka", self.find_articles('http://dzieje.pl/kulturaisztuka')))
+ feeds.append((u"Film", self.find_articles('http://dzieje.pl/kino')))
+ feeds.append((u"Rozmaitości historyczne", self.find_articles('http://dzieje.pl/rozmaitości')))
+ feeds.append((u"Książka", self.find_articles('http://dzieje.pl/ksiazka')))
+ feeds.append((u"Wystawa", self.find_articles('http://dzieje.pl/wystawa')))
+ feeds.append((u"Edukacja", self.find_articles('http://dzieje.pl/edukacja')))
+ feeds.append((u"Dzieje się", self.find_articles('http://dzieje.pl/wydarzenia')))
+ return feeds
def preprocess_html(self, soup):
for a in soup('a'):
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
a['href']=self.index + a['href']
+ self.append_page(soup, soup.body)
return soup
\ No newline at end of file
diff --git a/recipes/ekologia_pl.recipe b/recipes/ekologia_pl.recipe
new file mode 100644
index 0000000000..2b0933b58d
--- /dev/null
+++ b/recipes/ekologia_pl.recipe
@@ -0,0 +1,24 @@
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+class EkologiaPl(BasicNewsRecipe):
+ title = u'Ekologia.pl'
+ __author__ = 'fenuks'
+ description = u'Portal ekologiczny - eko, ekologia, ochrona przyrody, ochrona środowiska, przyroda, środowisko online. Ekologia i ochrona środowiska. Ekologia dla dzieci.'
+ category = 'ecology'
+ language = 'pl'
+ cover_url = 'http://www.ekologia.pl/assets/images/logo/ekologia_pl_223x69.png'
+ ignore_duplicate_articles = {'title', 'url'}
+ extra_css = '.title {font-size: 200%;}'
+ oldest_article = 7
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ remove_empty_feeds = True
+ use_embedded_content = False
+ remove_tags = [dict(attrs={'class':['ekoLogo', 'powrocArt', 'butonDrukuj']})]
+
+ feeds = [(u'Wiadomo\u015bci', u'http://www.ekologia.pl/rss/20,53,0'), (u'\u015arodowisko', u'http://www.ekologia.pl/rss/20,56,0'), (u'Styl \u017cycia', u'http://www.ekologia.pl/rss/20,55,0')]
+
+ def print_version(self, url):
+ id = re.search(r',(?P\d+)\.html', url).group('id')
+ return 'http://drukuj.ekologia.pl/artykul/' + id
diff --git a/recipes/film_org_pl.recipe b/recipes/film_org_pl.recipe
new file mode 100644
index 0000000000..442e273b1b
--- /dev/null
+++ b/recipes/film_org_pl.recipe
@@ -0,0 +1,19 @@
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+class FilmOrgPl(BasicNewsRecipe):
+ title = u'Film.org.pl'
+ __author__ = 'fenuks'
+ description = u"Recenzje, analizy, artykuły, rankingi - wszystko o filmie dla miłośników kina. Opisy efektów specjalnych, wersji reżyserskich, remake'ów, sequeli. No i forum filmowe. Jedne z największych w Polsce."
+ category = 'film'
+ language = 'pl'
+ cover_url = 'http://film.org.pl/wp-content/themes/KMF/images/logo_kmf10.png'
+ ignore_duplicate_articles = {'title', 'url'}
+ oldest_article = 7
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ remove_empty_feeds = True
+ use_embedded_content = True
+ preprocess_regexps = [(re.compile(ur'Przeczytaj także:
.*', re.IGNORECASE|re.DOTALL), lambda m: '