diff --git a/Changelog.yaml b/Changelog.yaml
index 21b92493a7..7f43887264 100644
--- a/Changelog.yaml
+++ b/Changelog.yaml
@@ -4,20 +4,91 @@
# for important features/bug fixes.
# Also, each release can have new and improved recipes.
-#- version: ?.?.?
-# date: 2011-??-??
+# - version: ?.?.?
+# date: 2011-??-??
#
-# new features:
-# - title:
+# new features:
+# - title:
#
-# bug fixes:
-# - title:
+# bug fixes:
+# - title:
#
-# improved recipes:
-# -
+# improved recipes:
+# -
#
-# new recipes:
-# - title:
+# new recipes:
+# - title:
+
+- version: 0.8.29
+ date: 2011-12-02
+
+ new features:
+ - title: "When searching for author names with accented characters, allow the non accented version to match. For example, searching for Nino will now match Niño."
+ tickets: [879729]
+
+ - title: "Driver for Blackberry Playbook, Motorola Electrify and Samsung Galaxy GIO S5660"
+ tickets: [805745,898123,897330]
+
+ - title: "Metadata search and replace, make the regular expressions unicode aware"
+
+ bug fixes:
+ - title: "Fix regression in 0.8.28 that broke sending PDF files to iTunes"
+ tickets: [896791]
+
+ - title: "Metadata download, do not strip # from titles."
+ tickets: [898310]
+
+ - title: "Conversion pipeline: Do not error out on books that set font size to zero."
+ tickets: [898194]
+
+ - title: "News download: Respect the delay setting when downloading RSS feeds as well."
+ tickets: [897907]
+
+ - title: "EPUB Output: Ensure that xml:lang is set if lang is set as ADE looks for xml:lang, not lang"
+ tickets: [897531]
+
+ - title: "Content server: Reduce memory consumption when sending very large files"
+ tickets: [897343]
+
+ - title: "Preserve capitalization of Scottish author names when downloading metadata"
+
+ - title: "Fix update title sort in bulk metadata edit not using language information"
+
+ - title: "Fix sorting by published column in the download metadata dialog broken"
+ tickets: [896832]
+
+ - title: "Allow use of languages field when generating CSV/XML catalogs"
+ tickets: [896620]
+
+ - title: "Get Books: Fix ebookpoint.pl"
+
+ - title: "When calculating title sort for a book based on its language, only use the specified language not a combination of the language and english"
+ tickets: [896412]
+
+ improved recipes:
+ - Metro NL
+ - Ming Pao
+ - Rolling Stones Mag
+ - Buffalo News
+
+ new recipes:
+ - title: gs24.pl and Gazeta.pl Szczecin
+ author: Michal Szkutnik
+
+ - title: Vanity Fair
+ author: Barty
+
+ - title: Skylife
+ author: thomass
+
+ - title: Daily Writing Tips
+ author: NotTaken
+
+ - title: TechDirt
+ author: Krittika Goyal
+
+ - title: Cosmopolitan UK
+ author: Dave Asbury
- version: 0.8.28
date: 2011-11-25
diff --git a/recipes/cosmopolitan_uk.recipe b/recipes/cosmopolitan_uk.recipe
new file mode 100644
index 0000000000..21317063ab
--- /dev/null
+++ b/recipes/cosmopolitan_uk.recipe
@@ -0,0 +1,51 @@
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+#from calibre import __appname__
+from calibre.utils.magick import Image
+class AdvancedUserRecipe1306097511(BasicNewsRecipe):
+ title = u'Cosmopolitan UK'
+ description = 'Fashion, beauty and Gossip for women from COSMOPOLITAN -UK'
+
+ __author__ = 'Dave Asbury'
+ # greyscale code by Starson
+ cover_url = 'http://www.cosmopolitan.magazine.co.uk/files/4613/2085/8988/Cosmo_Cover3.jpg'
+ no_stylesheets = True
+ oldest_article = 7
+ max_articles_per_feed = 20
+ remove_empty_feeds = True
+ remove_javascript = True
+
+ preprocess_regexps = [
+ (re.compile(r'.*?', re.IGNORECASE | re.DOTALL), lambda match: '')]
+ language = 'en_GB'
+
+
+ masthead_url = 'http://www.cosmopolitan.co.uk/cm/cosmopolitanuk/site_images/header/cosmouk_logo_home.gif'
+
+
+ keep_only_tags = [
+ dict(attrs={'class' : ['dateAuthor', 'publishDate']}),
+ dict(name='div',attrs ={'id' : ['main_content']})
+ ]
+ remove_tags = [
+ dict(name='div',attrs={'class' : ['blogInfo','viral_toolbar','comment_number','prevEntry nav']}),
+ dict(name='div',attrs={'class' : 'blog_module_about_the_authors'}),
+ dict(attrs={'id': ['breadcrumbs','comment','related_links_list','right_rail','content_sec_fb_more','content_sec_mostpopularstories','content-sec_fb_frame_viewfb_bot']}),
+ dict(attrs={'class' : ['read_liked_that_header','fb_back_next_area']})
+ ]
+
+ feeds = [
+ (u'Love & Sex', u'http://www.cosmopolitan.co.uk/love-sex/rss/'), (u'Men', u'http://cosmopolitan.co.uk/men/rss/'), (u'Fashion', u'http://cosmopolitan.co.uk/fashion/rss/'), (u'Hair & Beauty', u'http://cosmopolitan.co.uk/beauty-hair/rss/'), (u'LifeStyle', u'http://cosmopolitan.co.uk/lifestyle/rss/'), (u'Cosmo On Campus', u'http://cosmopolitan.co.uk/campus/rss/'), (u'Celebrity Gossip', u'http://cosmopolitan.co.uk/celebrity-gossip/rss/')]
+
+ def postprocess_html(self, soup, first):
+ #process all the images
+ for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
+ iurl = tag['src']
+ img = Image()
+ img.open(iurl)
+ if img < 0:
+ raise RuntimeError('Out of memory')
+ img.type = "GrayscaleType"
+ img.save(iurl)
+ return soup
+
diff --git a/recipes/daily_writing_tips.recipe b/recipes/daily_writing_tips.recipe
new file mode 100644
index 0000000000..836f8ec4d1
--- /dev/null
+++ b/recipes/daily_writing_tips.recipe
@@ -0,0 +1,18 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class DailyWritingTips(BasicNewsRecipe):
+ title = u'Daily Writing Tips'
+ language = 'en_GB'
+ __author__ = 'NotTaken'
+ oldest_article = 7 #days
+ max_articles_per_feed = 40
+ use_embedded_content = True
+ no_stylesheets = True
+ auto_cleanup = False
+ encoding = 'utf-8'
+
+
+ feeds = [
+('Latest tips',
+ 'http://feeds2.feedburner.com/DailyWritingTips'),
+]
diff --git a/recipes/gazeta_pl_szczecin.recipe b/recipes/gazeta_pl_szczecin.recipe
new file mode 100644
index 0000000000..af229c5721
--- /dev/null
+++ b/recipes/gazeta_pl_szczecin.recipe
@@ -0,0 +1,35 @@
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+import re
+import string
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class GazetaPlSzczecin(BasicNewsRecipe):
+ title = u'Gazeta.pl Szczecin'
+ description = u'Wiadomości ze Szczecina na portalu Gazeta.pl.'
+ __author__ = u'Michał Szkutnik'
+ __license__ = u'GPL v3'
+ language = 'pl'
+ publisher = 'Agora S.A.'
+ category = 'news, szczecin'
+ oldest_article = 2
+ max_articles_per_feed = 100
+ auto_cleanup = True
+ remove_tags = [ { "name" : "a", "attrs" : { "href" : "http://szczecin.gazeta.pl/szczecin/www.gazeta.pl" }}]
+ cover_url = "http://bi.gazeta.pl/i/hp/hp2009/logo.gif"
+ feeds = [(u'Wszystkie', u'http://rss.feedsportal.com/c/32739/f/530434/index.rss')]
+
+ def get_article_url(self, article):
+ s = re.search("""/0L(szczecin.*)/story01.htm""", article.link)
+ s = s.group(1)
+ replacements = { "0B" : ".", "0C" : "/", "0H" : ",", "0I" : "_"}
+ for (a, b) in replacements.iteritems():
+ s = string.replace(s, a, b)
+ s = string.replace(s, "0A", "0")
+ return "http://"+s
+
+ def print_version(self, url):
+ s = re.search("""/(\d*),(\d*),(\d*),.*\.html""", url)
+ no1 = s.group(2)
+ no2 = s.group(3)
+ return """http://szczecin.gazeta.pl/szczecin/2029020,%s,%s.html""" % (no1, no2)
diff --git a/recipes/gs24_pl.recipe b/recipes/gs24_pl.recipe
new file mode 100644
index 0000000000..db7125b116
--- /dev/null
+++ b/recipes/gs24_pl.recipe
@@ -0,0 +1,43 @@
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+import re
+import string
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1322322819(BasicNewsRecipe):
+ title = u'GS24.pl (Głos Szczeciński)'
+ description = u'Internetowy serwis Głosu Szczecińskiego'
+ __author__ = u'Michał Szkutnik'
+ __license__ = u'GPL v3'
+ language = 'pl'
+ publisher = 'Media Regionalne sp. z o.o.'
+ category = 'news, szczecin'
+ oldest_article = 2
+ max_articles_per_feed = 100
+ auto_cleanup = True
+ cover_url = "http://www.gs24.pl/images/top_logo.png"
+
+ feeds = [
+ # (u'Wszystko', u'http://www.gs24.pl/rss.xml'),
+ (u'Szczecin', u'http://www.gs24.pl/szczecin.xml'),
+ (u'Stargard', u'http://www.gs24.pl/stargard.xml'),
+ (u'Świnoujście', u'http://www.gs24.pl/swinoujscie.xml'),
+ (u'Goleniów', u'http://www.gs24.pl/goleniow.xml'),
+ (u'Gryfice', u'http://www.gs24.pl/gryfice.xml'),
+ (u'Kamień Pomorski', u'http://www.gs24.pl/kamienpomorski.xml'),
+ (u'Police', u'http://www.gs24.pl/police.xml'),
+ (u'Region', u'http://www.gs24.pl/region.xml'),
+ (u'Sport', u'http://www.gs24.pl/sport.xml'),
+ ]
+
+ def get_article_url(self, article):
+ s = re.search("""/0L0S(gs24.*)/story01.htm""", article.link)
+ s = s.group(1)
+ replacements = { "0B" : ".", "0C" : "/", "0H" : ",", "0I" : "_", "0D" : "?", "0F" : "="}
+ for (a, b) in replacements.iteritems():
+ s = string.replace(s, a, b)
+ s = string.replace(s, "0A", "0")
+ return "http://"+s
+
+ def print_version(self, url):
+ return url + "&Template=printpicart"
diff --git a/recipes/icons/skylife.png b/recipes/icons/skylife.png
new file mode 100644
index 0000000000..6cfcf5c797
Binary files /dev/null and b/recipes/icons/skylife.png differ
diff --git a/recipes/metro_news_nl.recipe b/recipes/metro_news_nl.recipe
index ce54f6099c..e7bb58757f 100644
--- a/recipes/metro_news_nl.recipe
+++ b/recipes/metro_news_nl.recipe
@@ -2,7 +2,26 @@
from calibre.web.feeds.news import BasicNewsRecipe
import re
from calibre.utils.magick import Image
+from BeautifulSoup import BeautifulSoup
+try:
+ from calibre_plugins.drMerry.debug import debuglogger as mlog
+ print 'drMerry debuglogger found, debug options can be used'
+ from calibre_plugins.drMerry.stats import statslogger as mstat
+ print 'drMerry stats tracker found, stat can be tracked'
+ mlog.setLoglevel(1) #-1 == no log; 0 for normal output
+ mstat.calculateStats(False) #track stats (to track stats loglevel must be > 0
+ KEEPSTATS = mstat.keepmystats()
+ SHOWDEBUG0 = mlog.showdebuglevel(0)
+ SHOWDEBUG1 = mlog.showdebuglevel(1)
+ SHOWDEBUG2 = mlog.showdebuglevel(2)
+except:
+ print 'drMerry debuglogger not found, skipping debug options'
+ SHOWDEBUG0 = False
+ SHOWDEBUG1 = False
+ SHOWDEBUG2 = False
+ KEEPSTATS = False
+print ('level0: %s\nlevel1: %s\nlevel2: %s' % (SHOWDEBUG0,SHOWDEBUG1,SHOWDEBUG2))
''' Version 1.2, updated cover image to match the changed website.
added info date on title
@@ -17,39 +36,37 @@ from calibre.utils.magick import Image
changed è into è
updated remove tags
removed keep_only tags
+ Version 1.8 26-11-2022
+ added remove tag: article-slideshow
'''
class AdvancedUserRecipe1306097511(BasicNewsRecipe):
title = u'Metro Nieuws NL'
- oldest_article = 1.5
- max_articles_per_feed = 100
+ oldest_article = 10
+ max_articles_per_feed = 15
__author__ = u'DrMerry'
description = u'Metro Nederland'
language = u'nl'
simultaneous_downloads = 5
+ masthead_url = 'http://blog.metronieuws.nl/wp-content/themes/metro/images/header.gif'
timeout = 2
- #delay = 1
center_navbar = True
- #auto_cleanup = True
- #auto_cleanup_keep = '//div[@class="article-image-caption-2column"]/*|//div[@id="date"]/*|//div[@class="article-image-caption-3column"]/*'
timefmt = ' [%A, %d %b %Y]'
no_stylesheets = True
remove_javascript = True
remove_empty_feeds = True
cover_url = 'http://www.oldreadmetro.com/img/en/metroholland/last/1/small.jpg'
publication_type = 'newspaper'
- remove_tags_before = dict(id='date')
- remove_tags_after = dict(name='div', attrs={'class':'article-body'})
encoding = 'utf-8'
remove_attributes = ['style', 'font', 'width', 'height']
use_embedded_content = False
conversion_options = {
- 'authors' : 'Metro Nederland',
- 'author_sort' : 'Metro Nederland',
+ 'authors' : 'Metro Nederland & calibre & DrMerry',
+ 'author_sort' : 'Metro Nederland & calibre & DrMerry',
'publisher' : 'DrMerry/Metro Nederland'
}
extra_css = 'body {padding:5px 0px; background:#fff;font-size: 13px;}\
- #date {clear: both;margin-left: 19px;font-size: 11px;font-weight: 300;color: #616262;height: 15px;}\
+ #date, div.share-and-byline div.byline div.text div.title, div.share-and-byline div.byline div.text div.name {clear: both;margin-bottom: 10px;font-size:0.5em; color: #616262;}\
.article-box-fact.module-title {clear:both;padding: 8px 0;color: #24763b;font-family: arial, sans-serif;font-size: 14px;font-weight: bold;}\
h1.title {color: #000000;font-size: 44px;padding-bottom: 10px;font-weight: 300;} h2.subtitle {font-size: 13px;font-weight: 700;padding-bottom: 10px;}\
.article-body p{padding-bottom:10px;}div.column-1-3{margin-left: 19px;padding-right: 9px;}\
@@ -58,28 +75,43 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
p.article-image-caption .credits {font-style: italic;font-size: 10px;}\
div.article-image-caption {width: 246px;margin-bottom: 5px;margin-left: 10px;}\
div.article-image-caption-2column {margin-bottom: 10px;width: 373px;} div.article-image-caption-3column {}\
- img {border:0px;} .img-mask {position:absolute;top:0px;left:0px;}'
+ img {border:0px; padding:2px;} hr.merryhr {width:30%; border-width:0px; color:green; margin-left:5px; background-color: green} div.column-3 {background-color:#eee; width:50%; margin:2px; float:right; padding:2px;} div.column-3 module-title {border: 1px solid #aaa} div.article-box-fact div.subtitle {font-weight:bold; color:green;}'
- remove_tags = [dict(name='div', attrs={'class':[ 'metroCommentFormWrap', 'related-links'
- 'commentForm', 'metroCommentInnerWrap', 'article-slideshow-counter-container', 'article-slideshow-control', 'ad', 'header-links',
- 'art-rgt','pluck-app pluck-comm', 'share-and-byline', 'article-tools-below-title', 'col-179 ', 'related-links', 'clear padding-top-15', 'share-tools',
- 'article1','article-page-auto-pushes', 'footer-edit','clear']}),
- dict(name='div', attrs={'id':['article-2', 'article-4', 'article-1', 'navigation', 'footer', 'header', 'comments', 'sidebar', 'share-and-byline']}),
- dict(name='iframe')]
-
- preprocess_regexps = [(re.compile(r'(
'
next_is_img_txt = False
title_started = False
+ title_break_reached = False
met_article_start_char = False
for item in splitter.split(raw_html):
item = item.strip()
- if item.startswith(u'\u3010'):
- met_article_start_char = True
- new_raw_html = new_raw_html + '
' + item + '
\n'
+ # if title already reached but break between title and content not yet found, record title_break_reached
+ if title_started == True and title_break_reached == False and item == '':
+ title_break_reached = True
+ # if title reached and title_break_reached and met_article_start_char == False and item is not empty
+ # start content
+ elif title_started == True and title_break_reached == True and met_article_start_char == False:
+ if item <> '':
+ met_article_start_char = True
+ new_raw_html = new_raw_html + '
' + item + '
\n'
+ #if item.startswith(u'\u3010'):
+ # met_article_start_char = True
+ # new_raw_html = new_raw_html + '