From b23b32a7ea3bbbc18edc32808852a626b6f17da4 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 12 Apr 2009 19:00:31 -0700 Subject: [PATCH 1/9] Handle periods better when reading metadata from filenames --- src/calibre/ebooks/metadata/meta.py | 26 +++++++++++++------------- upload.py | 1 + 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/src/calibre/ebooks/metadata/meta.py b/src/calibre/ebooks/metadata/meta.py index de7ac8eeea..a176c12c2b 100644 --- a/src/calibre/ebooks/metadata/meta.py +++ b/src/calibre/ebooks/metadata/meta.py @@ -5,7 +5,7 @@ __copyright__ = '2008, Kovid Goyal ' import os, re, collections from calibre.utils.config import prefs - + from calibre.ebooks.metadata.opf2 import OPF from calibre.customize.ui import get_file_type_metadata, set_file_type_metadata @@ -37,18 +37,18 @@ def metadata_from_formats(formats): mi2 = opf_metadata(opf) if mi2 is not None and mi2.title: return mi2 - + for path, ext in zip(formats, extensions): with open(path, 'rb') as stream: try: - newmi = get_metadata(stream, stream_type=ext, + newmi = get_metadata(stream, stream_type=ext, use_libprs_metadata=True) mi.smart_update(newmi) except: continue if getattr(mi, 'application_id', None) is not None: return mi - + if not mi.title: mi.title = _('Unknown') if not mi.authors: @@ -64,20 +64,20 @@ def get_metadata(stream, stream_type='lrf', use_libprs_metadata=False): stream_type = 'mobi' if stream_type in ('odt', 'ods', 'odp', 'odg', 'odf'): stream_type = 'odt' - + opf = None if hasattr(stream, 'name'): c = os.path.splitext(stream.name)[0]+'.opf' if os.access(c, os.R_OK): opf = opf_metadata(os.path.abspath(c)) - + if use_libprs_metadata and getattr(opf, 'application_id', None) is not None: return opf - + mi = MetaInformation(None, None) if prefs['read_file_metadata']: mi = get_file_type_metadata(stream, stream_type) - + name = os.path.basename(getattr(stream, 'name', '')) base = metadata_from_filename(name) if base.title == os.path.splitext(name)[0] and base.authors is None: @@ -98,17 +98,17 @@ def get_metadata(stream, stream_type='lrf', use_libprs_metadata=False): base.smart_update(mi) if opf is not None: base.smart_update(opf) - + return base def set_metadata(stream, mi, stream_type='lrf'): if stream_type: stream_type = stream_type.lower() set_file_type_metadata(stream, mi, stream_type) - - + + def metadata_from_filename(name, pat=None): - name = os.path.splitext(name)[0] + name = name.rpartition('.')[0] mi = MetaInformation(None, None) if pat is None: pat = re.compile(prefs.get('filename_pattern')) @@ -161,7 +161,7 @@ def opf_metadata(opfpath): mi = MetaInformation(opf) if hasattr(opf, 'cover') and opf.cover: cpath = os.path.join(os.path.dirname(opfpath), opf.cover) - if os.access(cpath, os.R_OK): + if os.access(cpath, os.R_OK): fmt = cpath.rpartition('.')[-1] data = open(cpath, 'rb').read() mi.cover_data = (fmt, data) diff --git a/upload.py b/upload.py index b2fc81c8b6..6bc90aada2 100644 --- a/upload.py +++ b/upload.py @@ -530,6 +530,7 @@ class build_windows(VMInstaller): self.run_windows_install_jammer(installer) return os.path.basename(installer) + @classmethod def run_windows_install_jammer(self, installer): ibp = os.path.abspath('installer/windows') sys.path.insert(0, ibp) From 29c232c6ce699df6c8dea9342b447968b1958fe0 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 13 Apr 2009 09:09:07 -0700 Subject: [PATCH 2/9] New recipe for Der Standard by Gerhard Aigner --- src/calibre/web/feeds/recipes/__init__.py | 2 +- .../web/feeds/recipes/recipe_der_standard.py | 42 +++++++++++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) create mode 100644 src/calibre/web/feeds/recipes/recipe_der_standard.py diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py index 191bf905ca..ef9f58b003 100644 --- a/src/calibre/web/feeds/recipes/__init__.py +++ b/src/calibre/web/feeds/recipes/__init__.py @@ -39,7 +39,7 @@ recipe_modules = ['recipe_' + r for r in ( 'nacional_cro', '24sata', 'dnevni_avaz', 'glas_srpske', '24sata_rs', 'krstarica', 'krstarica_en', 'tanjug', 'laprensa_ni', 'azstarnet', 'corriere_della_sera_it', 'corriere_della_sera_en', 'msdnmag_en', - 'moneynews', + 'moneynews', 'der_standard', )] import re, imp, inspect, time, os diff --git a/src/calibre/web/feeds/recipes/recipe_der_standard.py b/src/calibre/web/feeds/recipes/recipe_der_standard.py new file mode 100644 index 0000000000..eec4c4e74d --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_der_standard.py @@ -0,0 +1,42 @@ + +''' http://www.derstandard.at - Austrian Newspaper ''' +import re +from calibre.web.feeds.news import BasicNewsRecipe + +class DerStandardRecipe(BasicNewsRecipe): + title = u'derStandard' + __author__ = 'Gerhard Aigner' + + oldest_article = 1 + max_articles_per_feed = 100 + feeds = [(u'International', u'http://derstandard.at/?page=rss&ressort=internationalpolitik'), + (u'Inland', u'http://derstandard.at/?page=rss&ressort=innenpolitik'), + (u'Wirtschaft', u'http://derstandard.at/?page=rss&ressort=investor'), + (u'Web', u'http://derstandard.at/?page=rss&ressort=webstandard'), + (u'Sport', u'http://derstandard.at/?page=rss&ressort=sport'), + (u'Panorama', u'http://derstandard.at/?page=rss&ressort=panorama'), + (u'Etat', u'http://derstandard.at/?page=rss&ressort=etat'), + (u'Kultur', u'http://derstandard.at/?page=rss&ressort=kultur'), + (u'Wissenschaft', u'http://derstandard.at/?page=rss&ressort=wissenschaft'), + (u'Gesundheit', u'http://derstandard.at/?page=rss&ressort=gesundheit'), + (u'Bildung', u'http://derstandard.at/?page=rss&ressort=subildung')] + + encoding = 'utf-8' + language = _('German') + recursions = 0 + remove_tags = [dict(name='div'), dict(name='a'), dict(name='link'), dict(name='meta'), + dict(name='form',attrs={'name':'sitesearch'}), dict(name='hr')] + preprocess_regexps = [ + (re.compile(r'\[[\d*]\]', re.DOTALL|re.IGNORECASE), lambda match: ''), + (re.compile(r'bgcolor="#\w{3,6}"', re.DOTALL|re.IGNORECASE), lambda match: '') + ] + + def print_version(self, url): + return url.replace('?id=', 'txt/?id=') + + def get_article_url(self, article): + '''if the article links to a index page (ressort) or a picture gallery + (ansichtssache), don't add it''' + if (article.link.count('ressort') > 0 or article.title.lower().count('ansichtssache') > 0): + return None + return article.link From bbcc9d4614ca6c1ecef1e2a9c9898c339c408950 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 13 Apr 2009 10:20:59 -0700 Subject: [PATCH 3/9] New recipe for Die Presse by Gerhard Aigner --- installer/windows/calibre/calibre.mpi | 49 ++----------------- src/calibre/web/feeds/recipes/__init__.py | 2 +- .../web/feeds/recipes/recipe_diepresse.py | 40 +++++++++++++++ 3 files changed, 46 insertions(+), 45 deletions(-) create mode 100644 src/calibre/web/feeds/recipes/recipe_diepresse.py diff --git a/installer/windows/calibre/calibre.mpi b/installer/windows/calibre/calibre.mpi index 8073c45f29..a519695367 100644 --- a/installer/windows/calibre/calibre.mpi +++ b/installer/windows/calibre/calibre.mpi @@ -571,9 +571,6 @@ Condition 08195201-0797-932C-4B51-E5EF9D1D41BD -active Yes -parent 710F2507-2557 Condition 2E18F4AE-F1BB-5C62-2900-73A576A49261 -active Yes -parent 710F2507-2557-652D-EA55-440D710EFDFA -title {String Is Condition} -component StringIsCondition -TreeObject::id 2E18F4AE-F1BB-5C62-2900-73A576A49261 InstallComponent 21B897C4-24BE-70D1-58EA-DE78EFA60719 -setup Install -type action -conditions 76FA3CA2-1F09-75C5-C6CF-72719A8EC4A5 -title {Message Box} -component MessageBox -command insert -active Yes -parent 8A7FD0C2-F053-8764-F204-4BAE71E05708 Condition 76FA3CA2-1F09-75C5-C6CF-72719A8EC4A5 -active Yes -parent 21B897C4-24BE-70D1-58EA-DE78EFA60719 -title {String Is Condition} -component StringIsCondition -TreeObject::id 76FA3CA2-1F09-75C5-C6CF-72719A8EC4A5 -InstallComponent 5D20DD8D-064A-9922-29E1-A7FABEF3666A -setup Install -type action -conditions {E5D227F7-E549-EFA9-1781-EFA6C5EEEC5C A8856922-E6C1-160B-E55C-5C1806A89136} -title {Launch Application Checkbutton} -component AddWidget -command insert -active Yes -parent 8A7FD0C2-F053-8764-F204-4BAE71E05708 -Condition E5D227F7-E549-EFA9-1781-EFA6C5EEEC5C -active Yes -parent 5D20DD8D-064A-9922-29E1-A7FABEF3666A -title {File Exists Condition} -component FileExistsCondition -TreeObject::id E5D227F7-E549-EFA9-1781-EFA6C5EEEC5C -Condition A8856922-E6C1-160B-E55C-5C1806A89136 -active Yes -parent 5D20DD8D-064A-9922-29E1-A7FABEF3666A -title {String Is Condition} -component StringIsCondition -TreeObject::id A8856922-E6C1-160B-E55C-5C1806A89136 InstallComponent 940F7FED-7D20-7264-3BF9-ED78205A76B3 -setup Install -type action -conditions {96440B8B-C6D0-FCCA-6D3C-7ECE1C304CC0 FBA33088-C809-DD6B-D337-EADBF1CEE966} -title {Desktop Shortcut Checkbutton} -component AddWidget -command insert -active Yes -parent 8A7FD0C2-F053-8764-F204-4BAE71E05708 Condition 96440B8B-C6D0-FCCA-6D3C-7ECE1C304CC0 -active Yes -parent 940F7FED-7D20-7264-3BF9-ED78205A76B3 -title {File Exists Condition} -component FileExistsCondition -TreeObject::id 96440B8B-C6D0-FCCA-6D3C-7ECE1C304CC0 Condition FBA33088-C809-DD6B-D337-EADBF1CEE966 -active Yes -parent 940F7FED-7D20-7264-3BF9-ED78205A76B3 -title {String Is Condition} -component StringIsCondition -TreeObject::id FBA33088-C809-DD6B-D337-EADBF1CEE966 @@ -630,7 +627,7 @@ Condition 03FA7EEF-F626-B69A-09C6-0AA7A54EE9E7 -active Yes -parent E32519F3-A540 InstallComponent D86BBA5C-4903-33BA-59F8-4266A3D45896 -setup Install -type action -conditions {C4C0A903-CF2A-D25A-27AB-A64219FB7E70 5EC7056B-6F90-311E-2C6F-76E96164CFFD} -title {Install Quick Launch Shortcut} -component InstallWindowsShortcut -command insert -active Yes -parent 28BAE662-E103-4E3F-D298-C8FBA36361FC Condition C4C0A903-CF2A-D25A-27AB-A64219FB7E70 -active Yes -parent D86BBA5C-4903-33BA-59F8-4266A3D45896 -title {String Is Condition} -component StringIsCondition -TreeObject::id C4C0A903-CF2A-D25A-27AB-A64219FB7E70 Condition 5EC7056B-6F90-311E-2C6F-76E96164CFFD -active Yes -parent D86BBA5C-4903-33BA-59F8-4266A3D45896 -title {File Exists Condition} -component FileExistsCondition -TreeObject::id 5EC7056B-6F90-311E-2C6F-76E96164CFFD -InstallComponent 2A230259-3A6F-8669-8B8B-23C3E7C1BFC2 -setup Install -type action -conditions {4E5FC4FE-5D37-B216-CFFE-E046A2D6321E E560F3A1-208D-2B4F-2C87-E08595F8E1CD 9C1E4BD9-066D-ABCE-28D0-9E194B9F8475} -title {Launch Application} -component ExecuteExternalProgram -command insert -active Yes -parent 28BAE662-E103-4E3F-D298-C8FBA36361FC +InstallComponent 2A230259-3A6F-8669-8B8B-23C3E7C1BFC2 -setup Install -type action -conditions {4E5FC4FE-5D37-B216-CFFE-E046A2D6321E E560F3A1-208D-2B4F-2C87-E08595F8E1CD 9C1E4BD9-066D-ABCE-28D0-9E194B9F8475} -title {Launch Application} -component ExecuteExternalProgram -command insert -active No -parent 28BAE662-E103-4E3F-D298-C8FBA36361FC Condition 4E5FC4FE-5D37-B216-CFFE-E046A2D6321E -active Yes -parent 2A230259-3A6F-8669-8B8B-23C3E7C1BFC2 -title {String Is Condition} -component StringIsCondition -TreeObject::id 4E5FC4FE-5D37-B216-CFFE-E046A2D6321E Condition E560F3A1-208D-2B4F-2C87-E08595F8E1CD -active Yes -parent 2A230259-3A6F-8669-8B8B-23C3E7C1BFC2 -title {String Is Condition} -component StringIsCondition -TreeObject::id E560F3A1-208D-2B4F-2C87-E08595F8E1CD Condition 9C1E4BD9-066D-ABCE-28D0-9E194B9F8475 -active Yes -parent 2A230259-3A6F-8669-8B8B-23C3E7C1BFC2 -title {File Exists Condition} -component FileExistsCondition -TreeObject::id 9C1E4BD9-066D-ABCE-28D0-9E194B9F8475 @@ -802,6 +799,9 @@ CreateQuickLaunchShortcut 28FDA3F4-B799-901F-8A27-AA04F0C022AB,Title,subst 1 +2A230259-3A6F-8669-8B8B-23C3E7C1BFC2,Active +No + 2A230259-3A6F-8669-8B8B-23C3E7C1BFC2,Conditions {3 conditions} @@ -976,27 +976,6 @@ disabled 5C66451D-6042-DBDE-0D8C-31156EE244AD,Widget {Back Button;Next Button} -5D20DD8D-064A-9922-29E1-A7FABEF3666A,Background -white - -5D20DD8D-064A-9922-29E1-A7FABEF3666A,Conditions -{2 conditions} - -5D20DD8D-064A-9922-29E1-A7FABEF3666A,Text,subst -1 - -5D20DD8D-064A-9922-29E1-A7FABEF3666A,Type -checkbutton - -5D20DD8D-064A-9922-29E1-A7FABEF3666A,VirtualText -LaunchApplication - -5D20DD8D-064A-9922-29E1-A7FABEF3666A,X -185 - -5D20DD8D-064A-9922-29E1-A7FABEF3666A,Y -130 - 5EC7056B-6F90-311E-2C6F-76E96164CFFD,CheckCondition {Before Action is Executed} @@ -1408,15 +1387,6 @@ disabled A75C97CC-01AC-C12A-D663-A54E3257F11B,Widget {Back Button;Next Button} -A8856922-E6C1-160B-E55C-5C1806A89136,CheckCondition -{Before Action is Executed} - -A8856922-E6C1-160B-E55C-5C1806A89136,Operator -false - -A8856922-E6C1-160B-E55C-5C1806A89136,String -<%InstallStopped%> - AAEC34E6-7F02-18F2-30BB-744738192A3B,Conditions {2 conditions} @@ -1730,12 +1700,6 @@ disabled E5CBB018-A89D-3145-CFF5-CFC3B62BEA97,Widget {NextButton; CancelButton} -E5D227F7-E549-EFA9-1781-EFA6C5EEEC5C,CheckCondition -{Before Action is Executed} - -E5D227F7-E549-EFA9-1781-EFA6C5EEEC5C,Filename -<%ProgramExecutable%> - E611105F-DC85-9E20-4F7B-E63C54E5DF06,Message,subst 1 @@ -2340,9 +2304,6 @@ Please make sure that calibre is not running, as this will cause the install to 48E8A9D6-B57E-C506-680D-898C65DD2A1B,Title <%InstallApplicationText%> -5D20DD8D-064A-9922-29E1-A7FABEF3666A,Text -<%LaunchApplicationText%> - 64B8D0F3-4B11-DA22-D6E7-7248872D5FA7,Message <%UninstallStartupText%> @@ -2356,7 +2317,7 @@ Please make sure that calibre is not running, as this will cause the install to {<%AppName%> Installation complete} 8A7FD0C2-F053-8764-F204-4BAE71E05708,Message -{Installation of <%AppName%> was successful. Click Finish to quit the installer.} +{Installation of <%AppName%> was successful. Click Finish to quit the installer. <%AppName%> can be launched from the start menu.} 940F7FED-7D20-7264-3BF9-ED78205A76B3,Text <%CreateDesktopShortcutText%> diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py index ef9f58b003..c006501ca5 100644 --- a/src/calibre/web/feeds/recipes/__init__.py +++ b/src/calibre/web/feeds/recipes/__init__.py @@ -39,7 +39,7 @@ recipe_modules = ['recipe_' + r for r in ( 'nacional_cro', '24sata', 'dnevni_avaz', 'glas_srpske', '24sata_rs', 'krstarica', 'krstarica_en', 'tanjug', 'laprensa_ni', 'azstarnet', 'corriere_della_sera_it', 'corriere_della_sera_en', 'msdnmag_en', - 'moneynews', 'der_standard', + 'moneynews', 'der_standard', 'diepresse', )] import re, imp, inspect, time, os diff --git a/src/calibre/web/feeds/recipes/recipe_diepresse.py b/src/calibre/web/feeds/recipes/recipe_diepresse.py new file mode 100644 index 0000000000..c806575356 --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_diepresse.py @@ -0,0 +1,40 @@ +import re + +from calibre.web.feeds.news import BasicNewsRecipe + +class DiePresseRecipe(BasicNewsRecipe): + title = u'diePresse' + oldest_article = 1 + max_articles_per_feed = 100 + recursions = 0 + language = _('German') + __author__ = 'Gerhard Aigner' + + preprocess_regexps = [ + (re.compile(r'Textversion', re.DOTALL), lambda match: ''), + ] + remove_tags = [dict(name='hr'), + dict(name='br'), + dict(name='small'), + dict(name='img'), + dict(name='div', attrs={'class':'textnavi'}), + dict(name='h1', attrs={'class':'titel'}), + dict(name='a', attrs={'class':'print'}), + dict(name='div', attrs={'class':'hline'})] + feeds = [(u'Politik', u'http://diepresse.com/rss/Politik'), + (u'Wirtschaft', u'http://diepresse.com/rss/Wirtschaft'), + (u'Europa', u'http://diepresse.com/rss/EU'), + (u'Panorama', u'http://diepresse.com/rss/Panorama'), + (u'Sport', u'http://diepresse.com/rss/Sport'), + (u'Kultur', u'http://diepresse.com/rss/Kultur'), + (u'Leben', u'http://diepresse.com/rss/Leben'), + (u'Tech', u'http://diepresse.com/rss/Tech'), + (u'Science', u'http://diepresse.com/rss/Science'), + (u'Bildung', u'http://diepresse.com/rss/Bildung'), + (u'Gesundheit', u'http://diepresse.com/rss/Gesundheit'), + (u'Recht', u'http://diepresse.com/rss/Recht'), + (u'Spectrum', u'http://diepresse.com/rss/Spectrum'), + (u'Meinung', u'http://diepresse.com/rss/Meinung')] + + def print_version(self, url): + return url.replace('home','text/home') From d8ed8c0c079ce5985b4a44160a03d23040231080 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 14 Apr 2009 12:07:10 -0700 Subject: [PATCH 4/9] New recipe for NZZ Online by Darko Miletic --- src/calibre/gui2/images/news/nzz_ger.png | Bin 0 -> 811 bytes src/calibre/web/feeds/recipes/__init__.py | 2 +- .../web/feeds/recipes/recipe_nzz_ger.py | 66 ++++++++++++++++++ 3 files changed, 67 insertions(+), 1 deletion(-) create mode 100644 src/calibre/gui2/images/news/nzz_ger.png create mode 100644 src/calibre/web/feeds/recipes/recipe_nzz_ger.py diff --git a/src/calibre/gui2/images/news/nzz_ger.png b/src/calibre/gui2/images/news/nzz_ger.png new file mode 100644 index 0000000000000000000000000000000000000000..ba9591853f174d660a8e26fcd84f9262d2c9c882 GIT binary patch literal 811 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`Y)RhkE)4%caKYZ?lYt_f1s;*b zK-vS0-A-oPfdtD69Mgd`SU*F|v9*U87??slT^vI!PNz;b^pJ5BY1`j%bo1=DW!(+Y zEO$!OIW}&%;1}%b+0`QQP$7Y%aYgSztGi1%gdDG&i(v5;-17O2pW*|qCT)&)rPULX zme0QK+ZF5YyR7oep348fpV$Al$*8mWax$VN>L@3V=8OOV*414KD;gX(hCJ62;C%C6 zPa?<1#bkN1z$?`ip_}F{HB4e)yjuM;OaI9#o{aTgS^hWgoXnj6+YC2;@P?W?4sYoa0YnwLBGyJEKP@g@`py`_5u}iOKe9w1(IY(qg2&-1p z0-la*XF@X1nM;@oNGQG+Tf!<5mTBi55o^D6>%1_YEVZsHJR&P5hVMTkAi06>)H+oI zCw1LG@fo|$8MAyiF3e)Q5M`~lzbL0>o)haWcgsle zY4==8{B+i?myNUY?{I$j;LM^~jUHzm4)V002zB#6$WSPBd8be1)(Mv-+U~wLW=KgF zJE57=5S)sn{|H$znQMOBCe#o-HtxUn=c-*W!CnPxmTk2j9y+AdFLMB!t=B8VEpS?-{*&>{l5fEY^o)$5hck*sfi`2 zx+y?{!N|bCQrEyl*U%)y(8$Wb#LB=-*TCG$z`)D7<{?Z&ZhlH;S|z3iQ!672D^nwg VhG}*6TY(xFJYD@<);T3K0RTHJQ}h4; literal 0 HcmV?d00001 diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py index c006501ca5..c6444ec48e 100644 --- a/src/calibre/web/feeds/recipes/__init__.py +++ b/src/calibre/web/feeds/recipes/__init__.py @@ -39,7 +39,7 @@ recipe_modules = ['recipe_' + r for r in ( 'nacional_cro', '24sata', 'dnevni_avaz', 'glas_srpske', '24sata_rs', 'krstarica', 'krstarica_en', 'tanjug', 'laprensa_ni', 'azstarnet', 'corriere_della_sera_it', 'corriere_della_sera_en', 'msdnmag_en', - 'moneynews', 'der_standard', 'diepresse', + 'moneynews', 'der_standard', 'diepresse', 'nzz_ger', )] import re, imp, inspect, time, os diff --git a/src/calibre/web/feeds/recipes/recipe_nzz_ger.py b/src/calibre/web/feeds/recipes/recipe_nzz_ger.py new file mode 100644 index 0000000000..cdd23064bb --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_nzz_ger.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Darko Miletic ' + +''' +www.nzz.ch +''' + +from calibre.web.feeds.recipes import BasicNewsRecipe + +class Nzz(BasicNewsRecipe): + title = 'NZZ Online' + __author__ = 'Darko Miletic' + description = 'Laufend aktualisierte Nachrichten, Analysen und Hintergruende zu Politik, Wirtschaft, Kultur und Sport' + publisher = 'NZZ AG' + category = 'news, politics, nachrichten, Switzerland' + oldest_article = 2 + max_articles_per_feed = 100 + no_stylesheets = True + encoding = 'utf-8' + use_embedded_content = False + lang = 'de-CH' + language = _('German') + + html2lrf_options = [ + '--comment', description + , '--category', category + , '--publisher', publisher + ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"' + + keep_only_tags = [dict(name='div', attrs={'class':'article'})] + + remove_tags = [ + dict(name=['object','link','base','script']) + ,dict(name='div',attrs={'class':['more','teaser','advXertXoriXals','legal']}) + ,dict(name='div',attrs={'id':['popup-src','readercomments','google-ad','advXertXoriXals']}) + ] + + feeds = [ + (u'Neuste Artikel', u'http://www.nzz.ch/feeds/recent/' ) + ,(u'International' , u'http://www.nzz.ch/nachrichten/international?rss=true') + ,(u'Schweiz' , u'http://www.nzz.ch/nachrichten/schweiz?rss=true') + ,(u'Wirtschaft' , u'http://www.nzz.ch/nachrichten/wirtschaft/aktuell?rss=true') + ,(u'Finanzmaerkte' , u'http://www.nzz.ch/finanzen/nachrichten?rss=true') + ,(u'Zuerich' , u'http://www.nzz.ch/nachrichten/zuerich?rss=true') + ,(u'Sport' , u'http://www.nzz.ch/nachrichten/sport?rss=true') + ,(u'Panorama' , u'http://www.nzz.ch/nachrichten/panorama?rss=true') + ,(u'Kultur' , u'http://www.nzz.ch/nachrichten/kultur/aktuell?rss=true') + ,(u'Wissenschaft' , u'http://www.nzz.ch/nachrichten/wissenschaft?rss=true') + ,(u'Medien' , u'http://www.nzz.ch/nachrichten/medien?rss=true') + ,(u'Reisen' , u'http://www.nzz.ch/magazin/reisen?rss=true') + ] + + def preprocess_html(self, soup): + soup.html['xml:lang'] = self.lang + soup.html['lang'] = self.lang + mtag = '' + soup.head.insert(0,mtag) + return soup + + def print_version(self, url): + return url + '?printview=true' + From ead9de400281af161d35f7ba58e88fbda6e3d82d Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 14 Apr 2009 23:38:22 -0700 Subject: [PATCH 5/9] Fix #2273 (Little bug in Calibra Server) --- src/calibre/ebooks/mobi/reader.py | 32 ++++----- src/calibre/library/server.py | 113 +++++++++++++++++------------- 2 files changed, 79 insertions(+), 66 deletions(-) diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py index 65ff86173f..2033ff11f5 100644 --- a/src/calibre/ebooks/mobi/reader.py +++ b/src/calibre/ebooks/mobi/reader.py @@ -157,35 +157,35 @@ class BookHeader(object): class MetadataHeader(BookHeader): def __init__(self, stream): self.stream = stream - + self.ident = self.identity() self.num_sections = self.section_count() - + if self.num_sections >= 2: header = self.header() - BookHeader.__init__(self, header, self.ident, None) + BookHeader.__init__(self, header, self.ident) else: self.exth = None - + def identity(self): self.stream.seek(60) ident = self.stream.read(8).upper() - + if ident not in ['BOOKMOBI', 'TEXTREAD']: raise MobiError('Unknown book type: %s' % ident) return ident - + def section_count(self): self.stream.seek(76) return struct.unpack('>H', self.stream.read(2))[0] - + def section_offset(self, number): self.stream.seek(78+number*8) return struct.unpack('>LBBBB', self.stream.read(8))[0] - + def header(self): section_headers = [] - + # First section with the metadata section_headers.append(self.section_offset(0)) # Second section used to get the lengh of the first @@ -193,20 +193,20 @@ class MetadataHeader(BookHeader): end_off = section_headers[1] off = section_headers[0] - + self.stream.seek(off) return self.stream.read(end_off - off) def section_data(self, number): start = self.section_offset(number) - + if number == self.num_sections -1: end = os.stat(self.stream.name).st_size else: end = self.section_offset(number + 1) - + self.stream.seek(start) - + return self.stream.read(end - start) @@ -618,7 +618,7 @@ class MobiReader(object): self.image_names.append(os.path.basename(path)) im.convert('RGB').save(open(path, 'wb'), format='JPEG') -def get_metadata(stream): +def get_metadata(stream): mi = MetaInformation(os.path.basename(stream.name), [_('Unknown')]) try: mh = MetadataHeader(stream) @@ -632,7 +632,7 @@ def get_metadata(stream): mr.extract_content(tdir) if mr.embedded_mi is not None: mi = mr.embedded_mi - + if hasattr(mh.exth, 'cover_offset'): cover_index = mh.first_image_index + mh.exth.cover_offset data = mh.section_data(int(cover_index)) @@ -646,7 +646,7 @@ def get_metadata(stream): except: import traceback traceback.print_exc() - + return mi diff --git a/src/calibre/library/server.py b/src/calibre/library/server.py index 4ba6253819..8e9b6278d8 100644 --- a/src/calibre/library/server.py +++ b/src/calibre/library/server.py @@ -30,31 +30,31 @@ build_time = datetime.strptime(build_time, '%d %m %Y %H%M%S') server_resources['jquery.js'] = jquery def expose(func): - + def do(self, *args, **kwargs): dict.update(cherrypy.response.headers, {'Server':self.server_name}) return func(self, *args, **kwargs) - + return cherrypy.expose(do) log_access_file = os.path.join(config_dir, 'server_access_log.txt') log_error_file = os.path.join(config_dir, 'server_error_log.txt') - + class LibraryServer(object): - + server_name = __appname__ + '/' + __version__ BOOK = textwrap.dedent('''\ - ${r[8] if r[8] else ''} ''') - + LIBRARY = MarkupTemplate(textwrap.dedent('''\ @@ -72,7 +72,7 @@ class LibraryServer(object): ''')) - + STANZA_ENTRY=MarkupTemplate(textwrap.dedent('''\ ${record[FM['title']]} @@ -87,7 +87,7 @@ class LibraryServer(object): ''')) - + STANZA = MarkupTemplate(textwrap.dedent('''\ @@ -107,7 +107,7 @@ class LibraryServer(object): ''')) - + def __init__(self, db, opts, embedded=False, show_tracebacks=True): self.db = db for item in self.db: @@ -116,7 +116,7 @@ class LibraryServer(object): self.opts = opts self.max_cover_width, self.max_cover_height = \ map(int, self.opts.max_cover.split('x')) - + cherrypy.config.update({ 'log.screen' : opts.develop, 'engine.autoreload_on' : opts.develop, @@ -141,10 +141,10 @@ class LibraryServer(object): 'tools.digest_auth.realm' : (_('Password to access your calibre library. Username is ') + opts.username.strip()).encode('ascii', 'replace'), 'tools.digest_auth.users' : {opts.username.strip():opts.password.strip()}, } - + self.is_running = False self.exception = None - + def setup_loggers(self): access_file = log_access_file error_file = log_error_file @@ -152,20 +152,20 @@ class LibraryServer(object): maxBytes = getattr(log, "rot_maxBytes", 10000000) backupCount = getattr(log, "rot_backupCount", 1000) - + # Make a new RotatingFileHandler for the error log. h = RotatingFileHandler(error_file, 'a', maxBytes, backupCount) h.setLevel(logging.DEBUG) h.setFormatter(cherrypy._cplogging.logfmt) log.error_log.addHandler(h) - + # Make a new RotatingFileHandler for the access log. h = RotatingFileHandler(access_file, 'a', maxBytes, backupCount) h.setLevel(logging.DEBUG) h.setFormatter(cherrypy._cplogging.logfmt) log.access_log.addHandler(h) - + def start(self): self.is_running = False self.setup_loggers() @@ -173,7 +173,7 @@ class LibraryServer(object): try: cherrypy.engine.start() self.is_running = True - publish_zeroconf('Books in calibre', '_stanza._tcp', + publish_zeroconf('Books in calibre', '_stanza._tcp', self.opts.port, {'path':'/stanza'}) cherrypy.engine.block() except Exception, e: @@ -181,10 +181,10 @@ class LibraryServer(object): finally: self.is_running = False stop_zeroconf() - + def exit(self): cherrypy.engine.exit() - + def get_cover(self, id, thumbnail=False): cover = self.db.cover(id, index_is_id=True, as_file=False) if cover is None: @@ -196,14 +196,14 @@ class LibraryServer(object): try: if QApplication.instance() is None: QApplication([]) - + im = QImage() im.loadFromData(cover) if im.isNull(): raise cherrypy.HTTPError(404, 'No valid cover found') width, height = im.width(), im.height() - scaled, width, height = fit_image(width, height, - 60 if thumbnail else self.max_cover_width, + scaled, width, height = fit_image(width, height, + 60 if thumbnail else self.max_cover_width, 80 if thumbnail else self.max_cover_height) if not scaled: return cover @@ -217,7 +217,7 @@ class LibraryServer(object): import traceback traceback.print_exc() raise cherrypy.HTTPError(404, 'Failed to generate cover: %s'%err) - + def get_format(self, id, format): format = format.upper() fmt = self.db.format(id, format, index_is_id=True, as_file=True, mode='rb') @@ -232,7 +232,7 @@ class LibraryServer(object): updated = datetime.utcfromtimestamp(os.stat(path).st_mtime) cherrypy.response.headers['Last-Modified'] = self.last_modified(updated) return fmt.read() - + def sort(self, items, field, order): field = field.lower().strip() if field == 'author': @@ -243,10 +243,23 @@ class LibraryServer(object): raise cherrypy.HTTPError(400, '%s is not a valid sort field'%field) cmpf = cmp if field in ('rating', 'size', 'timestamp') else \ lambda x, y: cmp(x.lower() if x else '', y.lower() if y else '') - field = FIELD_MAP[field] - getter = operator.itemgetter(field) - items.sort(cmp=lambda x, y: cmpf(getter(x), getter(y)), reverse=not order) - + if field == 'series': + items.sort(cmp=self.seriescmp, reverse=not order) + else: + field = FIELD_MAP[field] + getter = operator.itemgetter(field) + items.sort(cmp=lambda x, y: cmpf(getter(x), getter(y)), reverse=not order) + + def seriescmp(self, x, y): + si = FIELD_MAP['series'] + try: + ans = cmp(x[si].lower(), y[si].lower()) + except AttributeError: # Some entries may be None + ans = cmp(x[si], y[si]) + if ans != 0: return ans + return cmp(x[FIELD_MAP['series_index']], y[FIELD_MAP['series_index']]) + + def last_modified(self, updated): lm = updated.strftime('day, %d month %Y %H:%M:%S GMT') day ={0:'Sun', 1:'Mon', 2:'Tue', 3:'Wed', 4:'Thu', 5:'Fri', 6:'Sat'} @@ -254,8 +267,8 @@ class LibraryServer(object): month = {1:'Jan', 2:'Feb', 3:'Mar', 4:'Apr', 5:'May', 6:'Jun', 7:'Jul', 8:'Aug', 9:'Sep', 10:'Oct', 11:'Nov', 12:'Dec'} return lm.replace('month', month[updated.month]) - - + + @expose def stanza(self): ' Feeds to read calibre books on a ipod with stanza.' @@ -264,7 +277,7 @@ class LibraryServer(object): r = record[FIELD_MAP['formats']] r = r.upper() if r else '' if 'EPUB' in r or 'PDB' in r: - authors = ' & '.join([i.replace('|', ',') for i in + authors = ' & '.join([i.replace('|', ',') for i in record[FIELD_MAP['authors']].split(',')]) extra = [] rating = record[FIELD_MAP['rating']] @@ -276,7 +289,7 @@ class LibraryServer(object): extra.append('TAGS: %s
'%', '.join(tags.split(','))) series = record[FIELD_MAP['series']] if series: - extra.append('SERIES: %s [%d]
'%(series, + extra.append('SERIES: %s [%d]
'%(series, record[FIELD_MAP['series_index']])) fmt = 'epub' if 'EPUB' in r else 'pdb' mimetype = guess_type('dummy.'+fmt)[0] @@ -288,24 +301,24 @@ class LibraryServer(object): mimetype=mimetype, fmt=fmt, ).render('xml').decode('utf8')) - + updated = self.db.last_modified() cherrypy.response.headers['Last-Modified'] = self.last_modified(updated) cherrypy.response.headers['Content-Type'] = 'text/xml' - + return self.STANZA.generate(subtitle='', data=books, FM=FIELD_MAP, updated=updated, id='urn:calibre:main').render('xml') - + @expose - def library(self, start='0', num='50', sort=None, search=None, + def library(self, start='0', num='50', sort=None, search=None, _=None, order='ascending'): ''' Serves metadata from the calibre database as XML. - + :param sort: Sort results by ``sort``. Can be one of `title,author,rating`. :param search: Filter results by ``search`` query. See :class:`SearchQueryParser` for query syntax :param start,num: Return the slice `[start:start+num]` of the sorted and filtered results - :param _: Firefox seems to sometimes send this when using XMLHttpRequest with no caching + :param _: Firefox seems to sometimes send this when using XMLHttpRequest with no caching ''' try: start = int(start) @@ -321,19 +334,19 @@ class LibraryServer(object): items = [r for r in iter(self.db) if r[0] in ids] if sort is not None: self.sort(items, sort, order) - + book, books = MarkupTemplate(self.BOOK), [] for record in items[start:start+num]: aus = record[2] if record[2] else _('Unknown') authors = '|'.join([i.replace('|', ',') for i in aus.split(',')]) books.append(book.generate(r=record, authors=authors).render('xml').decode('utf-8')) updated = self.db.last_modified() - + cherrypy.response.headers['Content-Type'] = 'text/xml' cherrypy.response.headers['Last-Modified'] = self.last_modified(updated) - return self.LIBRARY.generate(books=books, start=start, updated=updated, + return self.LIBRARY.generate(books=books, start=start, updated=updated, total=len(ids)).render('xml') - + @expose def index(self, **kwargs): 'The / URL' @@ -341,8 +354,8 @@ class LibraryServer(object): if stanza == 919: return self.static('index.html') return self.stanza() - - + + @expose def get(self, what, id): 'Serves files, covers, thumbnails from the calibre database' @@ -361,7 +374,7 @@ class LibraryServer(object): if what == 'cover': return self.get_cover(id) return self.get_format(id, what) - + @expose def static(self, name): 'Serves static content' @@ -392,11 +405,11 @@ def start_threaded_server(db, opts): server.thread.setDaemon(True) server.thread.start() return server - + def stop_threaded_server(server): server.exit() server.thread = None - + def option_parser(): return config().option_parser('%prog '+ _('[options]\n\nStart the calibre content server.')) From 9d8e8dd8b93013e84668feab5feb4399f06f4044 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 15 Apr 2009 12:53:40 -0700 Subject: [PATCH 6/9] =?UTF-8?q?New=20recipe=20for=20Hessisch=20Nieders?= =?UTF-8?q?=C3=A4chsische=20Allgemeine=20by=20Oliver=20neissner?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/calibre/gui2/images/news/hna.png | Bin 0 -> 827 bytes src/calibre/web/feeds/recipes/__init__.py | 2 +- src/calibre/web/feeds/recipes/recipe_hna.py | 40 ++++++++++++++++++++ 3 files changed, 41 insertions(+), 1 deletion(-) create mode 100644 src/calibre/gui2/images/news/hna.png create mode 100644 src/calibre/web/feeds/recipes/recipe_hna.py diff --git a/src/calibre/gui2/images/news/hna.png b/src/calibre/gui2/images/news/hna.png new file mode 100644 index 0000000000000000000000000000000000000000..f4e1135dd560ce57312e174c27a044c6295cf7dc GIT binary patch literal 827 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`Y)RhkE)4%caKYZ?lYt_f1s;*b zK-vS0-A-oPfdtD69Mgd`SU*F|v9*U87?@H#T^vI!PA{G8>mBMS(>{Nu<@0;R_s^d3 zp0)i_Q%103;FgIw(VikAfe9D2UPWKo)-UkHNvUe0P6x*#ahD5=I+zw7nQ}to^V;7< z&*sd`EYy3nZPwWXJT{;I{5;G5ub7j8{arkx9D|ooHCH2pVu6|)i{y&1z-x)djAnBW zMaLX7ix>56@G+2k_<(nj@bS*__ey)pXK%RFS@%t(c*XP`6Bd`fGZ9I_;_g zW6Las@I%^L6}T%ey_HR|_fdMspUjrO$Sm~4UFn0*ick2@PfSQCP21VkmAqzBgukPR z>h$e-3-w};*;Mk_+Ro68-4U)|I9q$i+uPR|YUE{?6r4VNqE&TEQ$cp?iyMMZPB=fg zn*HMO-xq&>d+e7u>Qn0!Z^Q4j#&hAE_JHQC6F42*C!9b0rY0#OAVBBP9i}zs8z+3I zRb{lvvVP#Qm@Vj0i-SaSFqd%KjFP?^PGLtmk_-2>UkIEOHX(P}#qY-!iYw|aNIb}S z!fKt2sEt&7O0410*PQY}b{YpJFOX%?J!!w=!gp!O@SWCtf7utVeNvpRljL9B=2_nt zc3oln=I*`Ln|*w=?asB#dDUC_e|2WT*IyI5Q~A0V*{)w{u9Q|6VRhQo?%cJTd*TAL zSkBbXUU+9?XWK`O)%reXp9?o{GHRw`XBBDL=Y5u=UPb_R9T;)!`@R z?eaPwN;Q2~?mp1`d~>Jc`ICw1b^B6x-SgG_J3AufK#K;eVCIyS%2WJvSTzMVE{VQ* zF)(%Ky`G7O7xM+PrXDnF7FAj~qu@cxXPui9#FQ8vvp4(PRLc1L-P9-bC-X!1Z(rrw zq+EcBPqoA~q9nN}HL)aBHw8#A7#SE?>Kd5n8k&X}8d;edSQ(k>8kk!d7`%`AvjV0e jH$NpatrA0nff-1Hp&3L&kd+J*Py>UftDnm{r-UW|Y2a5F literal 0 HcmV?d00001 diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py index c6444ec48e..9e2ef1969d 100644 --- a/src/calibre/web/feeds/recipes/__init__.py +++ b/src/calibre/web/feeds/recipes/__init__.py @@ -39,7 +39,7 @@ recipe_modules = ['recipe_' + r for r in ( 'nacional_cro', '24sata', 'dnevni_avaz', 'glas_srpske', '24sata_rs', 'krstarica', 'krstarica_en', 'tanjug', 'laprensa_ni', 'azstarnet', 'corriere_della_sera_it', 'corriere_della_sera_en', 'msdnmag_en', - 'moneynews', 'der_standard', 'diepresse', 'nzz_ger', + 'moneynews', 'der_standard', 'diepresse', 'nzz_ger', 'hna', )] import re, imp, inspect, time, os diff --git a/src/calibre/web/feeds/recipes/recipe_hna.py b/src/calibre/web/feeds/recipes/recipe_hna.py new file mode 100644 index 0000000000..40193336d1 --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_hna.py @@ -0,0 +1,40 @@ +__license__ = 'GPL v3' +__copyright__ = '2008, Kovid Goyal ' + +''' +Fetch Hessisch Niedersächsische Allgemeine. +''' + +from calibre.web.feeds.news import BasicNewsRecipe + + +class hnaDe(BasicNewsRecipe): + + title = 'HNA' + description = 'local news from Hessen/Germany' + __author__ = 'Oliver Niesner' + use_embedded_content = False + language = _('German') + use_embedded_content = False + timefmt = ' [%d %b %Y]' + max_articles_per_feed = 40 + no_stylesheets = True + encoding = 'iso-8859-1' + + remove_tags = [dict(id='topnav'), + dict(id='nav_main'), + dict(id='suchen'), + dict(id=''), + dict(name='span'), + dict(name='ul', attrs={'class':'linklist'}), + dict(name='a', attrs={'href':'#'}), + dict(name='p', attrs={'class':'breadcrumb'}), + dict(name='p', attrs={'class':'h5'})] + #remove_tags_after = [dict(name='div', attrs={'class':'rahmenbreaking'})] + remove_tags_after = [dict(name='a', attrs={'href':'#'})] + + feeds = [ ('hna_soehre', 'http://feeds2.feedburner.com/hna/soehre'), + ('hna_kassel', 'http://feeds2.feedburner.com/hna/kassel') ] + + + From 332dbf44441a775d6affb52f768fcb86fee17a04 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 15 Apr 2009 13:00:01 -0700 Subject: [PATCH 7/9] Updated recipes for Linux devices and Toms Hardware (German) --- src/calibre/web/feeds/recipes/recipe_hna.py | 2 +- .../web/feeds/recipes/recipe_linuxdevices.py | 158 +++++++++--------- .../feeds/recipes/recipe_tomshardware_de.py | 35 ++-- 3 files changed, 97 insertions(+), 98 deletions(-) diff --git a/src/calibre/web/feeds/recipes/recipe_hna.py b/src/calibre/web/feeds/recipes/recipe_hna.py index 40193336d1..c4faec94ba 100644 --- a/src/calibre/web/feeds/recipes/recipe_hna.py +++ b/src/calibre/web/feeds/recipes/recipe_hna.py @@ -2,7 +2,7 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' ''' -Fetch Hessisch Niedersächsische Allgemeine. +Fetch Hessisch Niedersachsische Allgemeine. ''' from calibre.web.feeds.news import BasicNewsRecipe diff --git a/src/calibre/web/feeds/recipes/recipe_linuxdevices.py b/src/calibre/web/feeds/recipes/recipe_linuxdevices.py index 04db6b02d5..cd914e96ad 100644 --- a/src/calibre/web/feeds/recipes/recipe_linuxdevices.py +++ b/src/calibre/web/feeds/recipes/recipe_linuxdevices.py @@ -1,80 +1,78 @@ -__license__ = 'GPL v3' -__copyright__ = '2008, Kovid Goyal ' - -''' -Fetch Linuxdevices. -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Sueddeutsche(BasicNewsRecipe): - - title = u'Linuxdevices' - description = 'News about Linux driven Hardware' - __author__ = 'Oliver Niesner' - use_embedded_content = False - timefmt = ' [%a, %d %b %Y]' - language = _('English') - max_articles_per_feed = 50 - no_stylesheets = True - encoding = 'latin1' - - remove_tags_after = [dict(id='nointelliTXT')] - filter_regexps = [r'ad\.doubleclick\.net'] - - - remove_tags = [dict(name='div', attrs={'class':'bannerSuperBanner'}), - dict(name='div', attrs={'class':'bannerSky'}), - dict(name='div', attrs={'class':'footerLinks'}), - dict(name='div', attrs={'class':'seitenanfang'}), - dict(name='td', attrs={'class':'mar5'}), - dict(name='td', attrs={'class':'mar5'}), - dict(name='table', attrs={'class':'pageAktiv'}), - dict(name='table', attrs={'class':'xartable'}), - dict(name='table', attrs={'class':'wpnavi'}), - dict(name='table', attrs={'class':'bgcontent absatz'}), - dict(name='table', attrs={'class':'footer'}), - dict(name='table', attrs={'class':'artikelBox'}), - dict(name='table', attrs={'class':'kommentare'}), - dict(name='table', attrs={'class':'pageBoxBot'}), - #dict(name='table', attrs={'with':'100%'}), - dict(name='td', attrs={'nowrap':'nowrap'}), - dict(name='td', attrs={'valign':'middle'}), - dict(name='td', attrs={'align':'left'}), - dict(name='td', attrs={'align':'center'}), - dict(name='td', attrs={'height':'5'}), - dict(name='div', attrs={'class':'artikelBox navigatorBox'}), - dict(name='div', attrs={'class':'similar-article-box'}), - dict(name='div', attrs={'class':'videoBigHack'}), - dict(name='td', attrs={'class':'artikelDruckenRight'}), - dict(name='td', attrs={'class':'width="200"'}), - dict(name='a', attrs={'href':'/news'}), - dict(name='a', attrs={'href':'/'}), - dict(name='a', attrs={'href':'/articles'}), - dict(name='a', attrs={'href':'/cgi-bin/survey/survey.cgi'}), - dict(name='a', attrs={'href':'/cgi-bin/board/UltraBoard.pl'}), - dict(name='iframe'), - dict(name='form'), - #dict(name='tr', attrs={'td':'Click here to learn'}), - dict(name='span', attrs={'class':'hidePrint'}), - dict(id='headerLBox'), - dict(id='nointelliTXT'), - dict(id='rechteSpalte'), - dict(id='newsticker-list-small'), - dict(id='ntop5'), - dict(id='ntop5send'), - dict(id='ntop5commented'), - dict(id='nnav-bgheader'), - dict(id='nnav-headerteaser'), - dict(id='nnav-head'), - dict(id='nnav-top'), - dict(id='nnav-logodiv'), - dict(id='nnav-logo'), - dict(id='nnav-oly'), - dict(id='readcomment')] - - - - feeds = [ (u'Linuxdevices', u'http://www.linuxdevices.com/backend/headlines.rss') ] - +__license__ = 'GPL v3' +__copyright__ = '2008, Kovid Goyal ' + +''' +Fetch Linuxdevices. +''' + +from calibre.web.feeds.news import BasicNewsRecipe + + +class Sueddeutsche(BasicNewsRecipe): + + title = u'Linuxdevices' + description = 'News about Linux driven Hardware' + __author__ = 'Oliver Niesner' + use_embedded_content = False + timefmt = ' [%a %d %b %Y]' + max_articles_per_feed = 50 + no_stylesheets = True + html2epub_options = 'linearize_tables = True\nbase_font_size2=14' + encoding = 'latin1' + + + remove_tags_after = [dict(id='nointelliTXT')] + filter_regexps = [r'ad\.doubleclick\.net'] + + remove_tags = [dict(name='div', attrs={'class':'bannerSuperBanner'}), + dict(name='div', attrs={'class':'bannerSky'}), + dict(name='div', attrs={'class':'footerLinks'}), + dict(name='div', attrs={'class':'seitenanfang'}), + dict(name='td', attrs={'class':'mar5'}), + dict(name='td', attrs={'class':'mar5'}), + dict(name='table', attrs={'class':'pageAktiv'}), + dict(name='table', attrs={'class':'xartable'}), + dict(name='table', attrs={'class':'wpnavi'}), + dict(name='table', attrs={'class':'bgcontent absatz'}), + dict(name='table', attrs={'class':'footer'}), + dict(name='table', attrs={'class':'artikelBox'}), + dict(name='table', attrs={'class':'kommentare'}), + dict(name='table', attrs={'class':'pageBoxBot'}), + dict(name='td', attrs={'nowrap':'nowrap'}), + dict(name='td', attrs={'valign':'middle'}), + dict(name='td', attrs={'align':'left'}), + dict(name='td', attrs={'align':'center'}), + dict(name='td', attrs={'height':'5'}), + dict(name='div', attrs={'class':'artikelBox navigatorBox'}), + dict(name='div', attrs={'class':'similar-article-box'}), + dict(name='div', attrs={'class':'videoBigHack'}), + dict(name='td', attrs={'class':'artikelDruckenRight'}), + dict(name='td', attrs={'class':'width="200"'}), + dict(name='a', attrs={'href':'/news'}), + dict(name='a', attrs={'href':'/'}), + dict(name='a', attrs={'href':'/articles'}), + dict(name='a', attrs={'href':'/cgi-bin/survey/survey.cgi'}), + dict(name='a', attrs={'href':'/cgi-bin/board/UltraBoard.pl'}), + dict(name='iframe'), + dict(name='form'), + dict(name='span', attrs={'class':'hidePrint'}), + dict(id='headerLBox'), + dict(id='nointelliTXT'), + dict(id='rechteSpalte'), + dict(id='newsticker-list-small'), + dict(id='ntop5'), + dict(id='ntop5send'), + dict(id='ntop5commented'), + dict(id='nnav-bgheader'), + dict(id='nnav-headerteaser'), + dict(id='nnav-head'), + dict(id='nnav-top'), + dict(id='nnav-logodiv'), + dict(id='nnav-logo'), + dict(id='nnav-oly'), + dict(id='readcomment')] + + + + feeds = [ (u'Linuxdevices', u'http://www.linuxdevices.com/backend/headlines.rss') ] + diff --git a/src/calibre/web/feeds/recipes/recipe_tomshardware_de.py b/src/calibre/web/feeds/recipes/recipe_tomshardware_de.py index 52f1583408..7ba656e1d5 100644 --- a/src/calibre/web/feeds/recipes/recipe_tomshardware_de.py +++ b/src/calibre/web/feeds/recipes/recipe_tomshardware_de.py @@ -8,26 +8,19 @@ Fetch tomshardware. from calibre.web.feeds.news import BasicNewsRecipe -class TomsHardwareDe(BasicNewsRecipe): - - title = 'Tom\'s Hardware German' - description = 'Computer news in german' +class cdnet(BasicNewsRecipe): + + title = 'tomshardware' + description = 'computer news in german' __author__ = 'Oliver Niesner' use_embedded_content = False timefmt = ' [%d %b %Y]' max_articles_per_feed = 50 - language = _('German') no_stylesheets = True + language = _('German') encoding = 'utf-8' - #preprocess_regexps = \ -# [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in -# [ -# (r'<84>', lambda match: ''), -# (r'<93>', lambda match: ''), -# ] -# ] - + remove_tags = [dict(id='outside-advert'), dict(id='advertRightWhite'), dict(id='header-advert'), @@ -36,9 +29,15 @@ class TomsHardwareDe(BasicNewsRecipe): dict(id='header-top'), dict(id='header-tools'), dict(id='nbComment'), + dict(id='commentTools'), dict(id='internalSidebar'), dict(id='header-news-infos'), + dict(id='header-news-tools'), dict(id='breadcrumbs'), + dict(id='emailTools'), + dict(id='bookmarkTools'), + dict(id='printTools'), + dict(id='header-nextNews'), dict(id=''), dict(name='div', attrs={'class':'pyjama'}), dict(name='href', attrs={'class':'comment'}), @@ -47,8 +46,10 @@ class TomsHardwareDe(BasicNewsRecipe): dict(name='div', attrs={'class':'greyBox clearfix'}), dict(id='')] #remove_tags_before = [dict(id='header-news-title')] - remove_tags_after = [dict(name='div', attrs={'class':'news-elm'})] + remove_tags_after = [dict(name='div', attrs={'class':'btmGreyTables'})] #remove_tags_after = [dict(name='div', attrs={'class':'intelliTXT'})] - - feeds = [ ('tomshardware', 'http://www.tomshardware.com/de/feeds/rss2/tom-s-hardware-de,12-1.xml') ] - + + feeds = [ ('tomshardware', 'http://www.tomshardware.com/de/feeds/rss2/tom-s-hardware-de,12-1.xml') ] + + + From c2b79fe5d93fb1c127ba0ba2351fe6a0118b3c30 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 15 Apr 2009 13:36:37 -0700 Subject: [PATCH 8/9] Allow single click fetching of covers in the Edit metadata dialog. Now calibre will automatically try to get the ISBN needed to fetch the cover based on title and author of the book. --- src/calibre/ebooks/metadata/lit.py | 4 +- src/calibre/gui2/__init__.py | 2 + src/calibre/gui2/dialogs/metadata_single.py | 79 +++++++++++++++------ src/calibre/gui2/dialogs/password.ui | 79 +++++++++++---------- 4 files changed, 102 insertions(+), 62 deletions(-) diff --git a/src/calibre/ebooks/metadata/lit.py b/src/calibre/ebooks/metadata/lit.py index 2129af76dd..e45016f303 100644 --- a/src/calibre/ebooks/metadata/lit.py +++ b/src/calibre/ebooks/metadata/lit.py @@ -25,7 +25,7 @@ def get_metadata(stream): for item in litfile.manifest.values(): if item.path in candidates: try: - covers.append((litfile.get_file('/data/'+item.internal), + covers.append((litfile.get_file('/data/'+item.internal), ctype)) except: pass @@ -33,7 +33,7 @@ def get_metadata(stream): covers.sort(cmp=lambda x, y:cmp(len(x[0]), len(y[0])), reverse=True) idx = 0 if len(covers) > 1: - if covers[1][1] == covers[1][0]+'-standard': + if covers[1][1] == covers[0][1]+'-standard': idx = 1 mi.cover_data = ('jpg', covers[idx][0]) return mi diff --git a/src/calibre/gui2/__init__.py b/src/calibre/gui2/__init__.py index 1da5bb6851..b3a67d003e 100644 --- a/src/calibre/gui2/__init__.py +++ b/src/calibre/gui2/__init__.py @@ -69,6 +69,8 @@ def _config(): 'clicked')) c.add_opt('show_donate_button', default=True, help='Show donation button') + c.add_opt('asked_library_thing_password', default=False, + help='Asked library thing password at least once.') return ConfigProxy(c) config = _config() diff --git a/src/calibre/gui2/dialogs/metadata_single.py b/src/calibre/gui2/dialogs/metadata_single.py index a9d20905c6..e3d4b5b521 100644 --- a/src/calibre/gui2/dialogs/metadata_single.py +++ b/src/calibre/gui2/dialogs/metadata_single.py @@ -25,24 +25,47 @@ from calibre import islinux from calibre.ebooks.metadata.meta import get_metadata from calibre.utils.config import prefs from calibre.customize.ui import run_plugins_on_import +from calibre.gui2 import config as gui_conf class CoverFetcher(QThread): - def __init__(self, username, password, isbn, timeout): + def __init__(self, username, password, isbn, timeout, title, author): self.username = username self.password = password self.timeout = timeout self.isbn = isbn + self.title = title + self.needs_isbn = False + self.author = author QThread.__init__(self) self.exception = self.traceback = self.cover_data = None def run(self): try: + if not self.isbn: + from calibre.ebooks.metadata.fetch import search + if not self.title: + self.needs_isbn = True + return + au = self.author if self.author else None + key = prefs['isbndb_com_key'] + if not key: + key = None + results = search(title=self.title, author=au, + isbndb_key=key)[0] + results = sorted([x.isbn for x in results if x.isbn], + cmp=lambda x,y:cmp(len(x),len(y)), reverse=True) + if not results: + self.needs_isbn = True + return + self.isbn = results[0] + login(self.username, self.password, force=False) self.cover_data = cover_from_isbn(self.isbn, timeout=self.timeout)[0] except Exception, e: self.exception = e self.traceback = traceback.format_exc() + print self.traceback @@ -64,6 +87,8 @@ class AuthorCompleter(QCompleter): class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog): + COVER_FETCH_TIMEOUT = 240 # seconds + def do_reset_cover(self, *args): pix = QPixmap(':/images/book.svg') self.cover.setPixmap(pix) @@ -345,36 +370,39 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog): def lt_password_dialog(self): return PasswordDialog(self, 'LibraryThing account', - _('

Enter your username and password for LibraryThing.com.
If you do not have one, you can register for free!.

')) + _('

Enter your username and password for ' + 'LibraryThing.com. This is optional. It will ' + 'make fetching of covers faster and more reliable.
If ' + 'you do not have an account, you can ' + 'register for ' + 'free.

')) def change_password(self): d = self.lt_password_dialog() d.exec_() def fetch_cover(self): - isbn = qstring_to_unicode(self.isbn.text()) - if isbn: - d = self.lt_password_dialog() - if not d.username() or not d.password(): - d.exec_() - if d.result() != PasswordDialog.Accepted: - return - self.fetch_cover_button.setEnabled(False) - self.setCursor(Qt.WaitCursor) - self.cover_fetcher = CoverFetcher(d.username(), d.password(), isbn, - self.timeout) - self.cover_fetcher.start() - self._hangcheck = QTimer(self) - self.connect(self._hangcheck, SIGNAL('timeout()'), self.hangcheck) - self.cf_start_time = time.time() - self.pi.start(_('Downloading cover...')) - self._hangcheck.start(100) - else: - error_dialog(self, _('Cannot fetch cover'), - _('You must specify the ISBN identifier for this book.')).exec_() + isbn = unicode(self.isbn.text()).strip() + d = self.lt_password_dialog() + if not gui_conf['asked_library_thing_password'] and \ + (not d.username() or not d.password()): + d.exec_() + gui_conf['asked_library_thing_password'] = True + self.fetch_cover_button.setEnabled(False) + self.setCursor(Qt.WaitCursor) + title, author = map(unicode, (self.title.text(), self.authors.text())) + self.cover_fetcher = CoverFetcher(d.username(), d.password(), isbn, + self.timeout, title, author) + self.cover_fetcher.start() + self._hangcheck = QTimer(self) + self.connect(self._hangcheck, SIGNAL('timeout()'), self.hangcheck) + self.cf_start_time = time.time() + self.pi.start(_('Downloading cover...')) + self._hangcheck.start(100) def hangcheck(self): - if not (self.cover_fetcher.isFinished() or time.time()-self.cf_start_time > 150): + if not self.cover_fetcher.isFinished() and \ + time.time()-self.cf_start_time < self.COVER_FETCH_TIMEOUT: return self._hangcheck.stop() @@ -385,6 +413,11 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog): _('Could not fetch cover.
')+ _('The download timed out.')).exec_() return + if self.cover_fetcher.needs_isbn: + error_dialog(self, _('Cannot fetch cover'), + _('Could not find cover for this book. Try ' + 'specifying the ISBN first.')).exec_() + return if self.cover_fetcher.exception is not None: err = self.cover_fetcher.exception error_dialog(self, _('Cannot fetch cover'), diff --git a/src/calibre/gui2/dialogs/password.ui b/src/calibre/gui2/dialogs/password.ui index 865c065a10..3fc982371e 100644 --- a/src/calibre/gui2/dialogs/password.ui +++ b/src/calibre/gui2/dialogs/password.ui @@ -1,7 +1,8 @@ - + + Dialog - - + + 0 0 @@ -9,66 +10,70 @@ 209 - + Password needed - - :/images/mimetypes/unknown.svg + + + :/images/mimetypes/unknown.svg:/images/mimetypes/unknown.svg - - - - + + + + TextLabel - + + true + + true - - - + + + &Username: - + gui_username - - + + - - - + + + &Password: - + gui_password - - - + + + QLineEdit::Password - - - + + + Qt::Horizontal - - QDialogButtonBox::Cancel|QDialogButtonBox::NoButton|QDialogButtonBox::Ok + + QDialogButtonBox::Cancel|QDialogButtonBox::Ok - - - + + + &Show password @@ -76,7 +81,7 @@ - + @@ -85,11 +90,11 @@ Dialog accept() - + 248 254 - + 157 274 @@ -101,11 +106,11 @@ Dialog reject() - + 316 260 - + 286 274 From d8430be7c84815b6e2b2003088f0bb6cbbb3596c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 15 Apr 2009 14:37:49 -0700 Subject: [PATCH 9/9] MOBI Output: Fix regression that was preventing the creation of pagebreaks between the contents of different HTML files in the input. --- src/calibre/ebooks/mobi/writer.py | 2 +- src/calibre/gui2/dialogs/metadata_single.py | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/calibre/ebooks/mobi/writer.py b/src/calibre/ebooks/mobi/writer.py index 9990fa9061..c860a9418e 100644 --- a/src/calibre/ebooks/mobi/writer.py +++ b/src/calibre/ebooks/mobi/writer.py @@ -218,7 +218,7 @@ class Serializer(object): for elem in item.data.find(XHTML('body')): self.serialize_elem(elem, item) #buffer.write('') - buffer.write('') + buffer.write('') def serialize_elem(self, elem, item, nsrmap=NSRMAP): buffer = self.buffer diff --git a/src/calibre/gui2/dialogs/metadata_single.py b/src/calibre/gui2/dialogs/metadata_single.py index e3d4b5b521..c48c7c3640 100644 --- a/src/calibre/gui2/dialogs/metadata_single.py +++ b/src/calibre/gui2/dialogs/metadata_single.py @@ -30,8 +30,8 @@ from calibre.gui2 import config as gui_conf class CoverFetcher(QThread): def __init__(self, username, password, isbn, timeout, title, author): - self.username = username - self.password = password + self.username = username.strip() if username else username + self.password = password.strip() if password else password self.timeout = timeout self.isbn = isbn self.title = title @@ -60,7 +60,8 @@ class CoverFetcher(QThread): return self.isbn = results[0] - login(self.username, self.password, force=False) + if self.username and self.password: + login(self.username, self.password, force=False) self.cover_data = cover_from_isbn(self.isbn, timeout=self.timeout)[0] except Exception, e: self.exception = e