'
'''Read meta information from PDF files'''
-import sys, os
+import sys, os, re
-from calibre.ebooks.metadata import MetaInformation
+from calibre.ebooks.metadata import MetaInformation, authors_to_string, get_parser
from pyPdf import PdfFileReader
def get_metadata(stream):
@@ -28,16 +28,43 @@ def get_metadata(stream):
msg = u'Couldn\'t read metadata from pdf: %s with error %s'%(mi.title, unicode(err))
print >>sys.stderr, msg.encode('utf8')
return mi
-
+
+def set_metadata(stream, mi):
+ stream.seek(0)
+ raw = stream.read()
+ if mi.title:
+ tit = mi.title.encode('utf-8') if isinstance(mi.title, unicode) else mi.title
+ raw = re.compile(r'<<.*?/Title\((.+?)\)', re.DOTALL).sub(lambda m: m.group().replace(m.group(1), tit), raw)
+ if mi.authors:
+ au = authors_to_string(mi.authors)
+ if isinstance(au, unicode):
+ au = au.encode('utf-8')
+ raw = re.compile(r'<<.*?/Author\((.+?)\)', re.DOTALL).sub(lambda m: m.group().replace(m.group(1), au), raw)
+ stream.seek(0)
+ stream.truncate()
+ stream.write(raw)
+ stream.seek(0)
+
+def option_parser():
+ p = get_parser('pdf')
+ p.remove_option('--category')
+ p.remove_option('--comment')
+ return p
def main(args=sys.argv):
+ #p = option_parser()
+ #opts, args = p.parse_args(args)
if len(args) != 2:
print >>sys.stderr, _('Usage: pdf-meta file.pdf')
print >>sys.stderr, _('No filename specified.')
return 1
- path = os.path.abspath(os.path.expanduser(args[1]))
- print get_metadata(open(path, 'rb'))
+ stream = open(os.path.abspath(os.path.expanduser(args[1])), 'r+b')
+ #mi = MetaInformation(opts.title, opts.authors)
+ #if mi.title or mi.authors:
+ # set_metadata(stream, mi)
+ print unicode(get_metadata(stream)).encode('utf-8')
+
return 0
if __name__ == '__main__':
diff --git a/src/calibre/gui2/__init__.py b/src/calibre/gui2/__init__.py
index 3d60ffc871..f53dc04d6a 100644
--- a/src/calibre/gui2/__init__.py
+++ b/src/calibre/gui2/__init__.py
@@ -414,7 +414,8 @@ except:
class Application(QApplication):
def __init__(self, args):
- QApplication.__init__(self, args)
+ qargs = [i.encode('utf-8') if isinstance(i, unicode) else i for i in args]
+ QApplication.__init__(self, qargs)
self.translator = QTranslator(self)
lang = get_lang()
if lang:
diff --git a/src/calibre/gui2/main.py b/src/calibre/gui2/main.py
index 77f94374f0..da12bc2715 100644
--- a/src/calibre/gui2/main.py
+++ b/src/calibre/gui2/main.py
@@ -667,9 +667,10 @@ class Main(MainWindow, Ui_MainWindow):
self.library_view.model().refresh_ids([id])
for row in rows:
- MetadataSingleDialog(self, row.row(),
+ d = MetadataSingleDialog(self, row.row(),
self.library_view.model().db,
accepted_callback=accepted)
+ d.exec_()
def edit_bulk_metadata(self, checked):
'''
diff --git a/src/calibre/library/server.py b/src/calibre/library/server.py
index e494ddee2c..9a7d30244b 100644
--- a/src/calibre/library/server.py
+++ b/src/calibre/library/server.py
@@ -252,7 +252,7 @@ class LibraryServer(object):
extra.append('RATING: %s
'%rating)
tags = record[FIELD_MAP['tags']]
if tags:
- extra.append('TAGS: %s
'%', '.join(tags))
+ extra.append('TAGS: %s
'%', '.join(tags.split(',')))
series = record[FIELD_MAP['series']]
if series:
extra.append('SERIES: %s [%d]
'%(series, record[FIELD_MAP['series_index']]))
diff --git a/src/calibre/library/static/index.html b/src/calibre/library/static/index.html
index 19d8c42c51..15f5268f05 100644
--- a/src/calibre/library/static/index.html
+++ b/src/calibre/library/static/index.html
@@ -11,7 +11,7 @@
-

+
diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py
index 3847dabb9c..451ee539f8 100644
--- a/src/calibre/web/feeds/news.py
+++ b/src/calibre/web/feeds/news.py
@@ -500,7 +500,6 @@ class BasicNewsRecipe(object, LoggingInterface):
if self.no_stylesheets:
for link in list(soup.findAll('link', type=re.compile('css')))+list(soup.findAll('style')):
link.extract()
-
head = soup.find('head')
if not head:
head = soup.find('body')
diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py
index 1462c688c1..185587c48f 100644
--- a/src/calibre/web/feeds/recipes/__init__.py
+++ b/src/calibre/web/feeds/recipes/__init__.py
@@ -11,7 +11,7 @@ recipes = [
'ars_technica', 'upi', 'new_yorker', 'irish_times', 'iht', 'lanacion',
'discover_magazine', 'scientific_american', 'new_york_review_of_books',
'daily_telegraph', 'guardian', 'el_pais', 'new_scientist', 'b92',
- 'politika', 'moscow_times', 'latimes'
+ 'politika', 'moscow_times', 'latimes', 'japan_times', 'san_fran_chronicle',
]
import re, imp, inspect, time, os
diff --git a/src/calibre/web/feeds/recipes/japan_times.py b/src/calibre/web/feeds/recipes/japan_times.py
new file mode 100644
index 0000000000..91d3604539
--- /dev/null
+++ b/src/calibre/web/feeds/recipes/japan_times.py
@@ -0,0 +1,30 @@
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+__copyright__ = '2008, Darko Miletic '
+'''
+japantimes.co.jp
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class JapanTimes(BasicNewsRecipe):
+ title = u'The Japan Times'
+ __author__ = 'Darko Miletic'
+ description = 'News from Japan'
+ oldest_article = 7
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ use_embedded_content = False
+
+ keep_only_tags = [ dict(name='div', attrs={'id':'searchresult'}) ]
+ remove_tags_after = [ dict(name='div', attrs={'id':'mainbody' }) ]
+ remove_tags = [
+ dict(name='div' , attrs={'id':'ads' })
+ ,dict(name='table', attrs={'width':470})
+ ]
+
+
+ feeds = [
+ (u'The Japan Times', u'http://feedproxy.google.com/japantimes')
+ ]
\ No newline at end of file
diff --git a/src/calibre/web/feeds/recipes/san_fran_chronicle.py b/src/calibre/web/feeds/recipes/san_fran_chronicle.py
new file mode 100644
index 0000000000..8682b743a4
--- /dev/null
+++ b/src/calibre/web/feeds/recipes/san_fran_chronicle.py
@@ -0,0 +1,29 @@
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+__copyright__ = '2008, Darko Miletic '
+'''
+sfgate.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class SanFranciscoChronicle(BasicNewsRecipe):
+ title = u'San Francisco Chronicle'
+ __author__ = u'Darko Miletic'
+ description = u'San Francisco news'
+ oldest_article = 7
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ use_embedded_content = False
+
+ keep_only_tags = [dict(name='td' , attrs={'class':'column1 w627'})]
+ remove_tags_after = dict(name='div', attrs={'id':'articlecontent' })
+ remove_tags = [
+ dict(name='div', attrs={'class':'tools tools_top'})
+ ,dict(name='div', attrs={'id':'articlebox' })
+ ]
+
+ feeds = [
+ (u'Top News Stories', u'http://www.sfgate.com/rss/feeds/news.xml')
+ ]
\ No newline at end of file
diff --git a/src/calibre/web/fetch/simple.py b/src/calibre/web/fetch/simple.py
index 283e9da787..57eec4d528 100644
--- a/src/calibre/web/fetch/simple.py
+++ b/src/calibre/web/fetch/simple.py
@@ -122,6 +122,7 @@ class RecursiveFetcher(object, LoggingInterface):
def get_soup(self, src):
nmassage = copy.copy(BeautifulSoup.MARKUP_MASSAGE)
nmassage.extend(self.preprocess_regexps)
+ nmassage += [(re.compile(r'', re.DOTALL), lambda m: '')] # Some websites have buggy doctype declarations that mess up beautifulsoup
soup = BeautifulSoup(xml_to_unicode(src, self.verbose, strip_encoding_pats=True)[0], markupMassage=nmassage)
if self.keep_only_tags: