From 454b14c68f04aa8341806a2ed41824a522e91bf7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= <tomek3d@gmail.com>
Date: Mon, 16 May 2011 23:29:40 +0200
Subject: [PATCH 01/11] icons for Polish recipes

---
 recipes/icons/osnews_pl.png    | Bin 0 -> 1006 bytes
 recipes/icons/rmf24_opinie.png | Bin 0 -> 722 bytes
 recipes/icons/swiatkindle.png  | Bin 0 -> 425 bytes
 3 files changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 recipes/icons/osnews_pl.png
 create mode 100644 recipes/icons/rmf24_opinie.png
 create mode 100644 recipes/icons/swiatkindle.png

diff --git a/recipes/icons/osnews_pl.png b/recipes/icons/osnews_pl.png
new file mode 100644
index 0000000000000000000000000000000000000000..157bbefa727e9e1fa700369009b21592d44ecc83
GIT binary patch
literal 1006
zcmeAS@N?(olHy`uVBq!ia0vp^0w65F1|<EHm6d=LTavfC%YQK7jQD;BD8gCb5m^kR
zJ;2!QWVRgx1GA2&i(`n#@wp-AvxNf%{-wo#dz-g8dr_wE#So34oQn<zlvt0-eR;K1
zfPYcNg~>noMXEwRgbDb6jnWdSzjJF*K(&S!|It+mE5*9Zc$b=<U6yCQd-mUl4+D!X
z%sBGisDEaqvAps9>J~9WgAWf5-kGCf=;pFzYiYrQ%6;)#s*`7_ozBiy_frv?wQbw9
z)2T)$pLC1!%g61Wc2-Z{e6_Y|e(nE%Urds>$@UwU{VFlal$i7WZEnsKFLw6y^L0N0
zG-f<IapKOMSFgT(d$A%hW0p?*4uyA~zORE9A9#CveSH0|E0>caw?+xrRE22V*!KF_
z=i1si#d@bFPtL!$X8rZg=jV7X=Vi7ifA{C-yS($a?`+XIK69R$l9H5kbkwQm?QQ$&
z^DCJSAD($;W3sUD-MhQiX_)76tk+SW+}W~<$-QsIDxK?}H{YDBF3h<gVoUD%$1#^v
zW^SBq?pl(y>aFOJq)9Gco^$N4y?uXsZbHF_D=QbfzpguMz~jnz$e?QxhsSc$-eQMI
z9sk0XYceXD{YrLS_Hx%g{+L}??$`gZxNv*6*U`rg?(OS}<Ia08x3&^teGn7yqn}ei
z&~@S}BQ?*!C9gM~k#G=kWiYUg)|#niU{muk$)7)9rGc6f)7-g9u5-L6shFw>WQzoZ
z9_MsER%CfDbZSQZ{cXD33>`^^2TWv_hcS1YN<Gia#@52N>1K%6^~D;KFP4U~PMfxE
zySC_pT&CBrckiCYTJvYe*3?-CHm9FHJ$tspS}~Ty8#@xhr?1oIy7=SA2@gH?JG-Wy
zEGcC)HqI3i=<WFSE$8;<SC6vV?#7-jihXc*cW>`}`^M+z!^5~(kKMY~@c2iagq7dc
zsMV~D7ovV!m6g@~Es8zxxyGI+c6%<<wzrjzAD(hQ@9`A+ps)I$x3wuVY<0tPR!50-
z>$dqm`20NSqDlNdpQVoqT`g<QuND6BadFJHw}(0Z9CtUdu6}>->FM|P#k;3^ZT<D?
z>i&5_i$yKYo#}aRUpMFc`SaU-9|+{%ulu`ebMg0e5>Fo`Wxov%1KPXz#ksj@GdHGj
zI#oU0WD&blX-|9opR216X?1Pj=00Rs%Vkqp^mo@|lhyyFQ{z&Woj<IW49q~PC9V-A
zDTyViR>?)FK#IZ0z|c_Fz*N`JEX2sr%GA`#&_dh5$jZQ=QtgEwiiX_$l+3hBEE<d;
V8rE?0?*VFH@O1TaS?83{1OQA+%<upJ

literal 0
HcmV?d00001

diff --git a/recipes/icons/rmf24_opinie.png b/recipes/icons/rmf24_opinie.png
new file mode 100644
index 0000000000000000000000000000000000000000..53ad00078a964edca84fbfb957006de84ec6ab90
GIT binary patch
literal 722
zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`Y)RhkE)4%caKYZ?lYt_f1s;*b
zK-vS0-A-oPfdtD69Mgd`SU*F|v9*U87#M$hx;TbdoW45AJ6kwWqIG_LTG3U3jX|m=
zjz_t61r(~fx@s?4z|9?7(U`!huOMn3Dv%k>GJR2=mxK8t1*IFkADeVTSsYg?X>3u7
zEm<UbG$42R+?o0_uOIdK%Uczuuit+7;fDkD`dbTF9_Ivh8L>$(S}zdbp1~lsA~}Hl
z6${J7_p_Kd>@F=j;KsO8<6_67=M4-VOxNyftZ4SgaIV>PVd28Zrwl`$`>uIZ`s@8W
zR^by3v1cz;Z98&mUfmr@VHOwTM8=M^c8vf%5%ayvYOk^+2YYKy<-fT7x8b(AOAGe#
zKNAx^?>E^*dV!;fz7B&)u7GLAe}__=!xNvkpL~8y;_sBjZ;Q;g_uYGTaQD~5rzQc+
zoDyb#1jD-3R{9iViQV>-Q;*k~=we)w*6>!`f5V|Qg4gBx<JKyk=CRN*k=Xd*X6B5C
z8L{Ek6_!yC>dy0i%lYRMlxe27{?l#Gf~o}jci%<!XK~J8y=d_<_C^K&PGK9zCz_kq
zeVsDnCHE$cYQK)Jr+$AjnrPm!$MxOUbyJ<Zx1TUja_)$KDA`cN{&SJY&Ka!s%Y@HV
zKGbPzs@W{8l-TgP>G845s7H%yIa+E@GKc6~v|xyGE<V=0#=S-GYh?xpL)@7M(ZQ$w
zzdD=Lon&y;Q}xc?gVz=uR8=s3pX~17+m$3V!%O~2ddoTcRrdX^m+SwtEENN$3DpwU
zh?11Vl2ohYqEsNoU}Ruuple{MYh)B+U}<G&W(8y$07-*eoZ6>RH00)|WTsW(*04Hc
Rayd`~gQu&X%Q~loCIBf=9ykC1

literal 0
HcmV?d00001

diff --git a/recipes/icons/swiatkindle.png b/recipes/icons/swiatkindle.png
new file mode 100644
index 0000000000000000000000000000000000000000..1fc505bfbfabd98cc547b7844357266c21e1502e
GIT binary patch
literal 425
zcmeAS@N?(olHy`uVBq!ia0vp^0wBx*Bp9q_EZ7UAn3BBRT^JZv^(q?yd7K3vk;OpT
z1B~5HX4?T7uRUEHLnJPno;&Sz*g>N0;r~y2c*NJm?AY>vvunzwlv$Hs${0<~Oq&$q
ztC1+g-_ad(mn*i?{$Jo7yFE2E2U@3it>;e8jlBAm%_3Q8a^<?~9ml2}=xYCceA(Ty
zb>+48JR4T+`P_Z_^~;<p*Aqq^4Hs{G@AMa2eq7ysatA}IWS&&?&+VT#&n{!uK4r0P
zRm?P3o6BOC0tL2YSxw{OF4u_RSUIIEbe^Qgr23Qd;<J`2_6heCxPQuf-_v67OY{^=
zF8}-9mQ;a7J%P9C_HQ%&q4J|UBWwSh+e$s{%2TG)zA0nYvB_T|pjl;IWA~TAyng1=
zlQXxK1HG+U;u=wsl30>zm0Xkxq!^403=MS+Omz*-LW~TpOo7N$+rY@mz+h^CIv<LL
s-29Zxv`VN3LtSHo5JL+qATl!0HZTNgDBm$P0;qw()78&qol`;+0O1;+1poj5

literal 0
HcmV?d00001


From 3f0b7afd52d36cc9d8fc8376d7081070bcffaba5 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 17 May 2011 08:27:27 -0600
Subject: [PATCH 02/11] ...

---
 src/calibre/manual/faq.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/manual/faq.rst b/src/calibre/manual/faq.rst
index 1c6b65c770..d3784eda6f 100644
--- a/src/calibre/manual/faq.rst
+++ b/src/calibre/manual/faq.rst
@@ -22,7 +22,7 @@ It can convert every input format in the following list, to every output format.
 
 *Input Formats:* CBZ, CBR, CBC, CHM, EPUB, FB2, HTML, HTMLZ, LIT, LRF, MOBI, ODT, PDF, PRC, PDB, PML, RB, RTF, SNB, TCR, TXT, TXTZ
 
-*Output Formats:* EPUB, FB2, OEB, LIT, LRF, MOBI, HTMLZ, PDB, PML, RB, PDF, SNB, TCR, TXT, TXTZ
+*Output Formats:* EPUB, FB2, OEB, LIT, LRF, MOBI, HTMLZ, PDB, PML, RB, PDF, RTF, SNB, TCR, TXT, TXTZ
 
 .. note ::
 

From dc6f033b466d63269fa2c7f856b49f4e57937c62 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 17 May 2011 09:30:51 -0600
Subject: [PATCH 03/11] Updated United Daily

---
 recipes/united_daily.recipe | 27 +++++++++++++++++++++++----
 1 file changed, 23 insertions(+), 4 deletions(-)

diff --git a/recipes/united_daily.recipe b/recipes/united_daily.recipe
index 6954a7e725..1013b3d2b6 100644
--- a/recipes/united_daily.recipe
+++ b/recipes/united_daily.recipe
@@ -14,6 +14,7 @@ class UnitedDaily(BasicNewsRecipe):
              (u'生活', u'http://udn.com/udnrss/life.xml'),
              (u'綜合', u'http://udn.com/udnrss/education.xml'),
              (u'意見評論', u'http://udn.com/udnrss/opinion.xml'),
+             (u'校園博覽會', u'http://mag.udn.com/udnrss/campus_rss.xml'),
              (u'大台北', u'http://udn.com/udnrss/local_taipei.xml'),
              (u'桃竹苗', u'http://udn.com/udnrss/local_tyhcml.xml'),
              (u'中彰投', u'http://udn.com/udnrss/local_tcchnt.xml'),
@@ -21,15 +22,21 @@ class UnitedDaily(BasicNewsRecipe):
              (u'高屏離島', u'http://udn.com/udnrss/local_ksptisland.xml'),
              (u'基宜花東', u'http://udn.com/udnrss/local_klilhltt.xml'),
              (u'台灣百寶鄉', u'http://udn.com/udnrss/local_oddlyenough.xml'),
+             (u'台灣人物', u'http://mag.udn.com/udnrss/people_rss.xml'),
              (u'兩岸要聞', u'http://udn.com/udnrss/mainland.xml'),
              (u'國際焦點', u'http://udn.com/udnrss/international.xml'),
              (u'台商經貿', u'http://udn.com/udnrss/financechina.xml'),
              (u'國際財經', u'http://udn.com/udnrss/financeworld.xml'),
+             (u'全球觀察', u'http://mag.udn.com/udnrss/world_rss.xml'),
              (u'財經焦點', u'http://udn.com/udnrss/financesfocus.xml'),
              (u'股市要聞', u'http://udn.com/udnrss/stock.xml'),
              (u'股市快訊', u'http://udn.com/udnrss/stklatest.xml'),
              (u'稅務法務', u'http://udn.com/udnrss/tax.xml'),
              (u'房市情報', u'http://udn.com/udnrss/houses.xml'),
+             (u'個人理財', u'http://mag.udn.com/udnrss/wealth_rss.xml'),
+             (u'研究報告', u'http://mag.udn.com/udnrss/report_rss.xml'),
+             (u'基金', u'http://mag.udn.com/udnrss/fund_rss.xml'),
+             (u'理財會客室', u'http://mag.udn.com/udnrss/m_forum_rss.xml'),
              (u'棒球', u'http://udn.com/udnrss/baseball.xml'),
              (u'籃球', u'http://udn.com/udnrss/basketball.xml'),
              (u'體壇動態', u'http://udn.com/udnrss/sportsfocus.xml'),
@@ -40,19 +47,24 @@ class UnitedDaily(BasicNewsRecipe):
              (u'電影世界', u'http://udn.com/udnrss/movie.xml'),
              (u'流行音樂', u'http://udn.com/udnrss/music.xml'),
              (u'觀點專題', u'http://udn.com/udnrss/starssubject.xml'),
+             (u'消費流行', u'http://mag.udn.com/udnrss/happylife_rss.xml'),
              (u'食樂指南', u'http://udn.com/udnrss/food.xml'),
+             (u'數位資訊', u'http://mag.udn.com/udnrss/digital_rss.xml'),
              (u'折扣好康', u'http://udn.com/udnrss/shopping.xml'),
+             (u'發燒車訊', u'http://mag.udn.com/udnrss/car_rss.xml'),
              (u'醫藥新聞', u'http://udn.com/udnrss/health.xml'),
              (u'家婦繽紛', u'http://udn.com/udnrss/benfen.xml'),
              (u'談星論命', u'http://udn.com/udnrss/astrology.xml'),
              (u'文化副刊', u'http://udn.com/udnrss/reading.xml'),
+             (u'旅遊休閒', u'http://travel.udn.com/udnrss/travel_rss.xml'),
+             (u'健康醫藥', u'http://mag.udn.com/udnrss/life_rss.xml'),
              ]
 
-    extra_css = '''div[id='story_title'] {font-size:200%; font-weight:bold;}'''
+    extra_css = '''div[id='story_title'] {font-size:200%; font-weight:bold;} td[class='story_title'] {font-size:200%; font-weight:bold;} td[class='story_title'] td[class='story_title']>div {font-size:200%; font-weight:bold;}'''
 
     __author__ = 'Eddie Lau'
-    __version__ = '1.0'
-    language = 'zh'
+    __version__ = '1.1'
+    language = 'zh-TW'
     publisher = 'United Daily News Group'
     description = 'United Daily (Taiwan)'
     category = 'News, Chinese, Taiwan'
@@ -63,5 +75,12 @@ class UnitedDaily(BasicNewsRecipe):
     conversion_options = {'linearize_tables':True}
     masthead_url = 'http://udn.com/NEWS/2004/images/logo_udn.gif'
     cover_url = 'http://udn.com/NEWS/2004/images/logo_udn.gif'
-    keep_only_tags = [dict(name='div', attrs={'id':['story_title','story_author', 'story']})]
+    keep_only_tags = [dict(name='td', attrs={'class':['story_title']}),
+                      dict(name='div', attrs={'id':['story_title']}),
+                      dict(name='td', attrs={'class':['story_author']}),
+                      dict(name='div', attrs={'id':['story_author']}),
+                      dict(name='td', attrs={'class':['story']}),
+                      dict(name='div', attrs={'id':['story']}),
+                      ]
     remove_tags = [dict(name='div', attrs={'id':['mvouter']})]
+

From 253e49141a1739423fdf793c9098359a78c116f2 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 17 May 2011 09:35:45 -0600
Subject: [PATCH 04/11] Men's Health by Anonymous

---
 recipes/mens_health.recipe | 10 ++++++++++
 1 file changed, 10 insertions(+)
 create mode 100644 recipes/mens_health.recipe

diff --git a/recipes/mens_health.recipe b/recipes/mens_health.recipe
new file mode 100644
index 0000000000..4e69db8a7c
--- /dev/null
+++ b/recipes/mens_health.recipe
@@ -0,0 +1,10 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1305636254(BasicNewsRecipe):
+    title          = u'Mens Health (US)'
+    language = 'en'
+    __author__ = 'Anonymous'
+    oldest_article = 14
+    max_articles_per_feed = 100
+
+    feeds          = [(u'News', u'http://blogs.menshealth.com/health-headlines/feed')]

From a0942198acb97419569a83bd10ce4462cfaf8155 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 17 May 2011 13:06:56 -0600
Subject: [PATCH 05/11] Good to Know by Anonymous

---
 recipes/good_to_know.recipe | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)
 create mode 100644 recipes/good_to_know.recipe

diff --git a/recipes/good_to_know.recipe b/recipes/good_to_know.recipe
new file mode 100644
index 0000000000..cf374128ce
--- /dev/null
+++ b/recipes/good_to_know.recipe
@@ -0,0 +1,32 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1305547242(BasicNewsRecipe):
+    title          = u'Good to Know (uk)'
+    oldest_article = 14
+    max_articles_per_feed = 100
+    no_stylesheets         = True
+    use_embedded_content   = False
+    remove_javascript      = True
+    __author__  = 'Anonymous'
+    language = 'en_GB'
+    remove_tags = [dict(name='div', attrs={'class':'articles_footer', 'class':'printoptions'})]
+
+    def print_version(self, url):
+        return url + '/print/1'
+
+    def preprocess_html(self, soup):
+        for alink in soup.findAll('a'):
+            if alink.string is not None:
+               tstr = alink.string
+               alink.replaceWith(tstr)
+        return soup
+
+    feeds  = [	(u'Family Conception Advice', u'http://www.goodtoknow.co.uk/feeds/family.rss'),
+	(u'Family Health Advice', u'http://www.goodtoknow.co.uk/feeds/health.rss'),
+	(u'Diet Advice', u'http://www.goodtoknow.co.uk/feeds/diet.rss'),
+                    (u'Food Advice', u'http://www.goodtoknow.co.uk/feeds/food.rss'),
+                    (u'Sex Advice', u'http://www.goodtoknow.co.uk/feeds/sex.rss'),
+                    (u'Easy Exercise', u'http://www.goodtoknow.co.uk/feeds/easyexercise.rss'),
+                    (u'Recipes', u'http://www.goodtoknow.co.uk/feeds/recipes.rss'),
+                    (u'Food Quick-tips', u'http://www.goodtoknow.co.uk/feeds/foodquicktips.rss'),
+                    ]

From 9908fc66325028ea80b958d6d8fe0cc0db990984 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 17 May 2011 14:19:06 -0600
Subject: [PATCH 06/11] Glamour by Anonymous

---
 recipes/glamour.recipe | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)
 create mode 100644 recipes/glamour.recipe

diff --git a/recipes/glamour.recipe b/recipes/glamour.recipe
new file mode 100644
index 0000000000..40e6b6e88b
--- /dev/null
+++ b/recipes/glamour.recipe
@@ -0,0 +1,38 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1305547242(BasicNewsRecipe):
+    title          = u'Glamour (US)'
+    oldest_article = 21
+    max_articles_per_feed = 100
+    no_stylesheets         = True
+    use_embedded_content   = False
+    language = 'en'
+    remove_javascript      = True
+    __author__  = 'Anonymous'
+    remove_tags = [dict(name='div', attrs={'class':'articles_footer', 'class':'printoptions'})]
+
+    def print_version(self, url):
+        return url + '?printable=true'
+
+    def preprocess_html(self, soup):
+        for alink in soup.findAll('a'):
+            if alink.string is not None:
+               tstr = alink.string
+               alink.replaceWith(tstr)
+        return soup
+
+    feeds  = [	(u'All Fashion', u'http://feeds.glamour.com/glamour/all_fashion'),
+                    (u'All Beauty', u'http://feeds.glamour.com/glamour/all_beauty'),
+                    (u'All Sex, Love & Life', u'http://feeds.glamour.com/glamour/sex_love_life'),
+                    (u'All Health & Fitness', u'http://feeds.glamour.com/glamour/health_fitness'),
+                    (u'Shopping', u'http://feeds.glamour.com/glamour/shopping'),
+                    (u'Slaves to Fashion blog', u'http://feeds.glamour.com/glamour/slavestofashion'),
+                    (u'The Girls in the Beauty Department', u'http://feeds.glamour.com/glamour/thegirlsinthebeautydepartment'),
+                    (u'Smitten blog', u'http://feeds.glamour.com/glamour/smitten'),
+                    (u'Save the Date', u'http://feeds.feedburner.com/glamour/save-the-date'),
+                    (u'Single-ish blog', u'http://feeds.glamour.com/glamour/glamoursingle-ish'),
+                    (u'Save the Date', u'http://feeds.feedburner.com/glamour/save-the-date'),
+                    (u'Vitamin G blog', u'http://feeds.glamour.com/glamour/vitamin-g'),
+                    (u'Margarita Shapes Up blog', u'http://feeds.glamour.com/glamour/margaritashapesup'),
+                    (u'Little Miss Fortune blog', u'http://feeds.glamour.com/glamour/little-miss-fortune'),
+ ]

From 1f31873432f6eccaf44931dd76f4a9225f56aba1 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 17 May 2011 14:31:54 -0600
Subject: [PATCH 07/11] Add a tweak that controls what words are treated as
 suffixes when geenrating an author sort string from an author name. Also Fix
 #782551 (authorsort error on brackets)

---
 resources/default_tweaks.py             |  7 +++-
 src/calibre/__init__.py                 | 18 +++++++++++
 src/calibre/ebooks/metadata/__init__.py | 43 +++++++++++++++++--------
 3 files changed, 54 insertions(+), 14 deletions(-)

diff --git a/resources/default_tweaks.py b/resources/default_tweaks.py
index e91b4a62d5..691a82fc36 100644
--- a/resources/default_tweaks.py
+++ b/resources/default_tweaks.py
@@ -41,14 +41,19 @@ authors_completer_append_separator = False
 #: Author sort name algorithm
 # The algorithm used to copy author to author_sort
 # Possible values are:
-#  invert: use "fn ln" -> "ln, fn" (the default algorithm)
+#  invert: use "fn ln" -> "ln, fn"
 #  copy  : copy author to author_sort without modification
 #  comma : use 'copy' if there is a ',' in the name, otherwise use 'invert'
 #  nocomma : "fn ln" -> "ln fn" (without the comma)
 # When this tweak is changed, the author_sort values stored with each author
 # must be recomputed by right-clicking on an author in the left-hand tags pane,
 # selecting 'manage authors', and pressing 'Recalculate all author sort values'.
+# The author name suffixes are words that are ignored when they occur at the
+# end of an author name. The case of the suffix is ignored and trailing
+# periods are automatically handled.
 author_sort_copy_method = 'comma'
+author_name_suffixes = ('Jr', 'Sr', 'Inc', 'Ph.D', 'Phd',
+                        'MD', 'M.D', 'I', 'II', 'III', 'IV')
 
 #: Use author sort in Tag Browser
 # Set which author field to display in the tags pane (the list of authors,
diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py
index bc99947345..b82ea984ec 100644
--- a/src/calibre/__init__.py
+++ b/src/calibre/__init__.py
@@ -630,6 +630,24 @@ def human_readable(size):
         size = size[:-2]
     return size + " " + suffix
 
+def remove_bracketed_text(src,
+        brackets={u'(':u')', u'[':u']', u'{':u'}'}):
+    from collections import Counter
+    counts = Counter()
+    buf = []
+    src = force_unicode(src)
+    rmap = dict([(v, k) for k, v in brackets.iteritems()])
+    for char in src:
+        if char in brackets:
+            counts[char] += 1
+        elif char in rmap:
+            idx = rmap[char]
+            if counts[idx] > 0:
+                counts[idx] -= 1
+        elif sum(counts.itervalues()) < 1:
+            buf.append(char)
+    return u''.join(buf)
+
 if isosx:
     import glob, shutil
     fdir = os.path.expanduser('~/.fonts')
diff --git a/src/calibre/ebooks/metadata/__init__.py b/src/calibre/ebooks/metadata/__init__.py
index 9c7838cb2c..2c26d011b7 100644
--- a/src/calibre/ebooks/metadata/__init__.py
+++ b/src/calibre/ebooks/metadata/__init__.py
@@ -10,7 +10,7 @@ import os, sys, re
 from urllib import unquote, quote
 from urlparse import urlparse
 
-from calibre import relpath, guess_type
+from calibre import relpath, guess_type, remove_bracketed_text
 
 from calibre.utils.config import tweaks
 
@@ -27,20 +27,37 @@ def authors_to_string(authors):
     else:
         return ''
 
-_bracket_pat = re.compile(r'[\[({].*?[})\]]')
-def author_to_author_sort(author):
+def author_to_author_sort(author, method=None):
     if not author:
-        return ''
-    method = tweaks['author_sort_copy_method']
-    if method == 'copy' or (method == 'comma' and ',' in author):
+        return u''
+    sauthor = remove_bracketed_text(author).strip()
+    tokens = sauthor.split()
+    if len(tokens) < 2:
         return author
-    author = _bracket_pat.sub('', author).strip()
-    tokens = author.split()
-    if tokens and tokens[-1] not in ('Inc.', 'Inc'):
-        tokens = tokens[-1:] + tokens[:-1]
-        if len(tokens) > 1 and method != 'nocomma':
-            tokens[0] += ','
-    return ' '.join(tokens)
+    if method is None:
+        method = tweaks['author_sort_copy_method']
+    if method == u'copy':
+        return author
+    suffixes = set([x.lower() for x in tweaks['author_name_suffixes']])
+    suffixes |= set([x+u'.' for x in suffixes])
+
+    last = tokens[-1].lower()
+    suffix = None
+    if last in suffixes:
+        suffix = tokens[-1]
+        tokens = tokens[:-1]
+
+    if method == u'comma' and u',' in u''.join(tokens):
+        return author
+
+    atokens = tokens[-1:] + tokens[:-1]
+    if suffix:
+        atokens.append(suffix)
+
+    if method != u'nocomma' and len(atokens) > 1:
+        atokens[0] += u','
+
+    return u' '.join(atokens)
 
 def authors_to_sort_string(authors):
     return ' & '.join(map(author_to_author_sort, authors))

From faf5ba7d7c80b248621bd0b0a4f7f0f41d12f27d Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 17 May 2011 17:31:49 -0600
Subject: [PATCH 08/11] Add API to run arbitrary functions in worker processes

---
 src/calibre/utils/ipc/worker.py | 54 +++++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)

diff --git a/src/calibre/utils/ipc/worker.py b/src/calibre/utils/ipc/worker.py
index a891d09f3d..88e571673f 100644
--- a/src/calibre/utils/ipc/worker.py
+++ b/src/calibre/utils/ipc/worker.py
@@ -50,6 +50,12 @@ PARALLEL_FUNCS = {
 
       'save_book' :
       ('calibre.ebooks.metadata.worker', 'save_book', 'notification'),
+
+      'arbitrary' :
+      ('calibre.utils.ipc.worker', 'arbitrary', None),
+
+      'arbitrary_n' :
+      ('calibre.utils.ipc.worker', 'arbitrary', 'notification'),
 }
 
 class Progress(Thread):
@@ -73,7 +79,55 @@ class Progress(Thread):
             except:
                 break
 
+def arbitrary(module_name, func_name, args, kwargs={}):
+    '''
+    An entry point that allows arbitrary functions to be run in a parallel
+    process. useful for plugin developers that want to run jobs in a parallel
+    process.
 
+    To use this entry point, simply create a ParallelJob with the module and
+    function names for the real entry point.
+
+    Remember that args and kwargs must be serialized so only use basic types
+    for them.
+
+    To use this, you will do something like
+
+    from calibre.gui2 import Dispatcher
+    gui.job_manager.run_job(Dispatcher(job_done), 'arbitrary',
+        args=('calibre_plugins.myplugin.worker', 'do_work',
+                ('arg1' 'arg2', 'arg3')),
+                description='Change the world')
+
+    The function job_done will be called on completion, see the code in
+    gui2.actions.catalog for an example of using run_job and Dispatcher.
+
+    :param module_name: The fully qualified name of the module that contains
+    the actual function to be run. For example:
+    calibre_plugins.myplugin.worker
+    :param func_name: The name of the function to be run.
+    :param name: A list (or tuple) of arguments that will be passed to the
+    function ``func_name``
+    :param kwargs: A dictionary of keyword arguments to pass to func_name
+    '''
+    module = importlib.import_module(module_name)
+    func = getattr(module, func_name)
+    return func(*args, **kwargs)
+
+def arbitrary_n(module_name, func_name, args, kwargs={},
+        notification=lambda x, y: y):
+    '''
+    Same as :func:`arbitrary` above, except that func_name must support a
+    keyword argument "notification". This will be a function that accepts two
+    arguments. func_name should call it periodically with progress information.
+    The first argument is a float between 0 and 1 that represent percent
+    completed and the second is a string with a message (it can be an empty
+    string).
+    '''
+    module = importlib.import_module(module_name)
+    func = getattr(module, func_name)
+    kwargs['notification'] = notification
+    return func(*args, **kwargs)
 
 def get_func(name):
     module, func, notification = PARALLEL_FUNCS[name]

From 93f8e4f7c58b5da03b596be5a3f46dc7d3073e2b Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 18 May 2011 08:36:49 -0600
Subject: [PATCH 09/11] Update Dvhn

---
 recipes/dvhn.recipe | 29 +++++++++++++++++++++++------
 1 file changed, 23 insertions(+), 6 deletions(-)

diff --git a/recipes/dvhn.recipe b/recipes/dvhn.recipe
index 4c093aa9d2..d0330990fc 100644
--- a/recipes/dvhn.recipe
+++ b/recipes/dvhn.recipe
@@ -1,19 +1,21 @@
+import re
 from calibre.web.feeds.news import BasicNewsRecipe
 
 class AdvancedUserRecipe1302341394(BasicNewsRecipe):
     title          = u'DvhN'
-    oldest_article = 1
+    __author__ = 'Reijndert'
+    oldest_article = 7
     max_articles_per_feed = 200
 
-    __author__ = 'Reijndert'
     no_stylesheets = True
-    cover_url = 'http://www.dvhn.nl/template/Dagblad_v2.0/gfx/logo_DvhN.gif'
+    cover_url = 'http://members.home.nl/apm.de.haas/calibre/DvhN.jpg'
     language = 'nl'
     country = 'NL'
     version = 1
     publisher = u'Dagblad van het Noorden'
     category = u'Nieuws'
     description = u'Nieuws uit Noord Nederland'
+    timefmt = ' %Y-%m-%d (%a)'
 
 
     keep_only_tags = [dict(name='div', attrs={'id':'fullPicture'})
@@ -21,11 +23,26 @@ class AdvancedUserRecipe1302341394(BasicNewsRecipe):
                   ]
 
     remove_tags = [
-                    dict(name=['object','link','iframe','base'])
-                    ,dict(name='span',attrs={'class':'copyright'})
+                    dict(name='span',attrs={'class':'location'})
                 ]
 
-    feeds          = [(u'Drenthe', u'http://www.dvhn.nl/nieuws/drenthe/index.jsp?service=rss'), (u'Groningen', u'http://www.dvhn.nl/nieuws/groningen/index.jsp?service=rss'), (u'Nederland', u'http://www.dvhn.nl/nieuws/nederland/index.jsp?service=rss'), (u'Wereld', u'http://www.dvhn.nl/nieuws/wereld/index.jsp?service=rss'), (u'Economie', u'http://www.dvhn.nl/nieuws/economie/index.jsp?service=rss'), (u'Sport', u'http://www.dvhn.nl/nieuws/sport/index.jsp?service=rss'), (u'Cultuur', u'http://www.dvhn.nl/nieuws/kunst/index.jsp?service=rss'), (u'24 Uur', u'http://www.dvhn.nl/nieuws/24uurdvhn/index.jsp?service=rss&selectiontype=last24hours')]
+    preprocess_regexps = [
+        (re.compile(r'<a.*?>'), lambda h1: '')
+       ,(re.compile(r'</a>'), lambda h2: '')
+       ,(re.compile(r'Word vriend van Dagblad van het Noorden op Facebook'), lambda h3: '')
+       ,(re.compile(r'Volg Dagblad van het Noorden op Twitter'), lambda h3: '')
+     ]
+
+
+    feeds          = [(u'Drenthe', u'http://www.dvhn.nl/nieuws/drenthe/index.jsp?service=rss')
+	, (u'Groningen', u'http://www.dvhn.nl/nieuws/groningen/index.jsp?service=rss')
+	, (u'Nederland', u'http://www.dvhn.nl/nieuws/nederland/index.jsp?service=rss')
+	, (u'Wereld', u'http://www.dvhn.nl/nieuws/wereld/index.jsp?service=rss')
+	, (u'Economie', u'http://www.dvhn.nl/nieuws/economie/index.jsp?service=rss')
+	, (u'Sport', u'http://www.dvhn.nl/nieuws/sport/index.jsp?service=rss')
+	, (u'Cultuur', u'http://www.dvhn.nl/nieuws/kunst/index.jsp?service=rss')
+	, (u'24 Uur', u'http://www.dvhn.nl/nieuws/24uurdvhn/index.jsp?service=rss&selectiontype=last24hours')
+     ]
 
     extra_css = '''
                 body {font-family: verdana, arial, helvetica, geneva, sans-serif;}

From d882c28144e28d7cbe78addb95e3c3402e1c7ada Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 18 May 2011 09:15:17 -0600
Subject: [PATCH 10/11] Updated Newsweek

---
 recipes/newsweek.recipe | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/recipes/newsweek.recipe b/recipes/newsweek.recipe
index 97abd69aac..a31706e257 100644
--- a/recipes/newsweek.recipe
+++ b/recipes/newsweek.recipe
@@ -11,6 +11,20 @@ class Newsweek(BasicNewsRecipe):
 
     BASE_URL = 'http://www.newsweek.com'
 
+    topics = {
+        'Culture' : '/tag/culture.html',
+        'Business' : '/tag/business.html',
+        'Society' : '/tag/society.html',
+        'Science' : '/tag/science.html',
+        'Education' : '/tag/education.html',
+        'Politics' : '/tag/politics.html',
+        'Health' : '/tag/health.html',
+        'World' : '/tag/world.html',
+        'Nation' : '/tag/nation.html',
+        'Technology' : '/tag/technology.html',
+        'Game Changers' : '/tag/game-changers.html',
+    }
+
     keep_only_tags = dict(name='article', attrs={'class':'article-text'})
     remove_tags = [dict(attrs={'data-dartad':True})]
     remove_attributes = ['property']
@@ -21,14 +35,10 @@ class Newsweek(BasicNewsRecipe):
         return soup
 
     def newsweek_sections(self):
-        return [
-                ('Nation', 'http://www.newsweek.com/tag/nation.html'),
-                ('Society', 'http://www.newsweek.com/tag/society.html'),
-                ('Culture', 'http://www.newsweek.com/tag/culture.html'),
-                ('World', 'http://www.newsweek.com/tag/world.html'),
-                ('Politics', 'http://www.newsweek.com/tag/politics.html'),
-                ('Business', 'http://www.newsweek.com/tag/business.html'),
-        ]
+        for topic_name, topic_url in self.topics.iteritems():
+            yield (topic_name,
+                    self.BASE_URL+topic_url)
+
 
     def newsweek_parse_section_page(self, soup):
         for article in soup.findAll('article', about=True,

From 5317f8bb9c0acd80576ca577ffc02d33fb138c1e Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 18 May 2011 14:16:41 -0600
Subject: [PATCH 11/11] Various German news sources by schuster

---
 recipes/borse_online.recipe | 33 ++++++++++++++++++++
 recipes/capital_de.recipe   | 61 +++++++++++++++++++++++++++++++++++++
 recipes/impulse_de.recipe   | 32 +++++++++++++++++++
 3 files changed, 126 insertions(+)
 create mode 100644 recipes/borse_online.recipe
 create mode 100644 recipes/capital_de.recipe
 create mode 100644 recipes/impulse_de.recipe

diff --git a/recipes/borse_online.recipe b/recipes/borse_online.recipe
new file mode 100644
index 0000000000..c192ce2b8d
--- /dev/null
+++ b/recipes/borse_online.recipe
@@ -0,0 +1,33 @@
+from calibre.web.feeds.recipes import BasicNewsRecipe
+class AdvancedUserRecipe1303841067(BasicNewsRecipe):
+
+    title          = u'Börse-online'
+    __author__  = 'schuster'
+    oldest_article = 1
+    max_articles_per_feed = 100
+    no_stylesheets         = True
+    use_embedded_content   = False
+    language               = 'de'
+    remove_javascript      = True
+    cover_url = 'http://www.dpv.de/images/1995/source.gif'
+    masthead_url = 'http://www.zeitschriften-cover.de/cover/boerse-online-cover-januar-2010-x1387.jpg'
+    extra_css = '''
+                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
+                    h4{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
+                    img {min-width:300px; max-width:600px; min-height:300px; max-height:800px}
+                    p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+                    body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
+	'''
+    remove_tags_bevor = [dict(name='h3')]
+    remove_tags_after = [dict(name='div', attrs={'class':'artikelfuss'})]
+    remove_tags = [dict(attrs={'class':['moduleTopNav', 'moduleHeaderNav', 'text', 'blau', 'poll1150']}),
+                dict(id=['newsletterlayer', 'newsletterlayerClose', 'newsletterlayer_body', 'newsletterarray_error', 'newsletterlayer_emailadress', 'newsletterlayer_submit', 'kommentar']),
+                dict(name=['h2', 'Gesamtranking', 'h3',''])]
+
+    def print_version(self, url):
+        return url.replace('.html#nv=rss', '.html?mode=print')
+
+
+
+    feeds          = [(u'Börsennachrichten', u'http://www.boerse-online.de/rss/')]
+
diff --git a/recipes/capital_de.recipe b/recipes/capital_de.recipe
new file mode 100644
index 0000000000..6826049bc9
--- /dev/null
+++ b/recipes/capital_de.recipe
@@ -0,0 +1,61 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+class AdvancedUserRecipe1305470859(BasicNewsRecipe):
+    title                 = u'Capital.de'
+    language              = 'de'
+    __author__            = 'schuster'
+    oldest_article        =7
+    max_articles_per_feed = 35
+    no_stylesheets        = True
+    remove_javascript     = True
+    use_embedded_content  = False
+    masthead_url = 'http://www.wirtschaftsmedien-shop.de/media/stores/wirtschaftsmedien/capital/teaser_large_abo.jpg'
+    cover_url = 'http://d1kb9jvg6ylufe.cloudfront.net/WebsiteCMS/de/unternehmen/linktipps/mainColumn/08/image/DE_Capital_bis20mm_SW.jpg'
+
+    extra_css = '''
+                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
+                    h4{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
+                    img {min-width:300px; max-width:600px; min-height:300px; max-height:800px}
+                    p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+                    body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
+	'''
+    def print_version(self, url):
+        return url.replace ('nv=rss#utm_source=rss2&utm_medium=rss_feed&utm_campaign=/', 'mode=print')
+    remove_tags_bevor = [dict(name='td', attrs={'class':'textcell'})]
+    remove_tags_after = [dict(name='div', attrs={'class':'artikelsplit'})]
+
+    feeds = [   (u'Wirtschaftsmagazin', u'http://www.capital.de/rss/'),
+	(u'Unternehmen', u'http://www.capital.de/rss/unternehmen'),
+	(u'Finanz & Geldanlage', u'http://www.capital.de/rss/finanzen/geldanlage')]
+
+    def append_page(self, soup, appendtag, position):
+        pager = soup.find('div',attrs={'class':'artikelsplit'})
+        if pager:
+           nexturl = self.INDEX + pager.a['href']
+           soup2 = self.index_to_soup(nexturl)
+           texttag = soup2.find('div', attrs={'class':'printable'})
+           for it in texttag.findAll(style=True):
+               del it['style']
+           newpos = len(texttag.contents)
+           self.append_page(soup2,texttag,newpos)
+           texttag.extract()
+           appendtag.insert(position,texttag)
+
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        for item in soup.findAll('div', attrs={'class':'artikelsplit'}):
+            item.extract()
+        self.append_page(soup, soup.body, 3)
+        pager = soup.find('div',attrs={'class':'artikelsplit'})
+        if pager:
+           pager.extract()
+        return self.adeify_images(soup)
+
+
+
+    remove_tags = [dict(attrs={'class':['navSeitenAlle', 'kommentieren', 'teaserheader', 'teasercontent', 'info', 'zwischenhead', 'artikelsplit']}),
+                dict(id=['topNav', 'mainNav', 'subNav', 'socialmedia', 'footerRahmen', 'gatrixx_marktinformationen', 'pager', 'weitere']),
+                dict(span=['ratingtext', 'Gesamtranking', 'h3','']),
+	dict(rel=['canonical'])]
+
diff --git a/recipes/impulse_de.recipe b/recipes/impulse_de.recipe
new file mode 100644
index 0000000000..d38c0aa6a6
--- /dev/null
+++ b/recipes/impulse_de.recipe
@@ -0,0 +1,32 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+class AdvancedUserRecipe1305470859(BasicNewsRecipe):
+    title                 = u'Impulse.de'
+    language              = 'de'
+    __author__            = 'schuster'
+    oldest_article        =14
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    remove_javascript     = True
+    use_embedded_content  = False
+    cover_url = 'http://www.bvk.de/files/image/bilder/Logo%20Impulse.jpg'
+
+    extra_css = '''
+                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
+                    h4{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
+                    img {min-width:300px; max-width:600px; min-height:300px; max-height:800px}
+                    p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+                    body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
+	'''
+    def print_version(self, url):
+        return url.replace ('#utm_source=rss2&utm_medium=rss_feed&utm_campaign=/', '?mode=print')
+    remove_tags_bevor = [dict(name='h1', attrs={'class':'h2'})]
+    remove_tags_after = [dict(name='div', attrs={'class':'artikelfuss'})]
+
+    feeds = [   (u'impulstest', u'http://www.impulse.de/rss/')]
+
+
+    remove_tags = [dict(attrs={'class':['navSeitenAlle', 'kommentieren', 'teaserheader', 'teasercontent', 'info', 'zwischenhead', 'kasten_artikel']}),
+                dict(id=['metaNav', 'impKopf', 'impTopNav', 'impSubNav', 'footerRahmen', 'gatrixx_marktinformationen', 'pager', 'weitere', 'socialmedia', 'rating_open']),
+                dict(span=['ratingtext', 'Gesamtranking', 'h3','']),
+	dict(rel=['canonical'])]
+