diff --git a/recipes/icons/socialdiva.png b/recipes/icons/socialdiva.png new file mode 100644 index 0000000000..262a94432d Binary files /dev/null and b/recipes/icons/socialdiva.png differ diff --git a/recipes/socialdiva.recipe b/recipes/socialdiva.recipe new file mode 100644 index 0000000000..1befd03d4a --- /dev/null +++ b/recipes/socialdiva.recipe @@ -0,0 +1,54 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2011' +''' +socialdiva.ro +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class SocialDiva(BasicNewsRecipe): + title = u'Social Diva' + __author__ = u'Silviu Cotoara' + description = u'When in doubt, wear red' + publisher = 'Social Diva' + oldest_article = 5 + language = 'ro' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + category = 'Ziare,Reviste,Femei' + encoding = 'utf-8' + cover_url = 'http://www.socialdiva.ro/images/logo.png' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + + keep_only_tags = [ + dict(name='div', attrs={'class':'col-alpha mt5 content_articol'}), + dict(name='div', attrs={'class':'mt5'}) + ] + + remove_tags = [ + dict(name='a', attrs={'class':['comments float-left scroll mt5']}), + dict(name='a', attrs={'class':['comments float-left scroll']}), + dict(name='div', attrs={'class':['rating-container relative float-left']}), + dict(name='div', attrs={'class':['float-right social_articol']}) + ] + + remove_tags_after = [ + dict(name='a', attrs={'class':['comments float-left scroll mt5']}) + ] + + feeds = [ + (u'Feeds', u'http://www.socialdiva.ro/rss.html') + ] + + def preprocess_html(self, soup): + return self.adeify_images(soup) diff --git a/recipes/the_marker.recipe b/recipes/the_marker.recipe index e5f1ffc761..12b2f5e2ff 100644 --- a/recipes/the_marker.recipe +++ b/recipes/the_marker.recipe @@ -3,7 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class AdvancedUserRecipe1283848012(BasicNewsRecipe): description = 'TheMarker Financial News in Hebrew' - __author__ = 'TonyTheBookworm, Marbs' + __author__ = 'Marbs' cover_url = 'http://static.ispot.co.il/wp-content/upload/2009/09/themarker.jpg' title = u'TheMarker' language = 'he' @@ -11,42 +11,38 @@ class AdvancedUserRecipe1283848012(BasicNewsRecipe): remove_javascript = True timefmt = '[%a, %d %b, %Y]' oldest_article = 1 - remove_tags = [dict(name='tr', attrs={'bgcolor':['#738A94']}) ] - max_articles_per_feed = 10 + keep_only_tags =dict(name='div', attrs={'id':'content'}) + remove_attributes = ['width','float','margin-left'] + no_stylesheets = True + remove_tags = [dict(name='div', attrs={'class':['social-nav article-social-nav','prsnlArticleEnvelope','cb']}) , + dict(name='a', attrs={'href':['/misc/mobile']}) , + dict(name='span', attrs={'class':['post-summ']}) ] + max_articles_per_feed = 100 extra_css='body{direction: rtl;} .article_description{direction: rtl; } a.article{direction: rtl; } .calibre_feed_description{direction: rtl; }' - feeds = [(u'Head Lines', u'http://www.themarker.com/tmc/content/xml/rss/hpfeed.xml'), - (u'TA Market', u'http://www.themarker.com/tmc/content/xml/rss/sections/marketfeed.xml'), - (u'Real Estate', u'http://www.themarker.com/tmc/content/xml/rss/sections/realEstaterfeed.xml'), - (u'Wall Street & Global', u'http://www.themarker.com/tmc/content/xml/rss/sections/wallsfeed.xml'), - (u'Law', u'http://www.themarker.com/tmc/content/xml/rss/sections/lawfeed.xml'), - (u'Media', u'http://www.themarker.com/tmc/content/xml/rss/sections/mediafeed.xml'), - (u'Consumer', u'http://www.themarker.com/tmc/content/xml/rss/sections/consumerfeed.xml'), - (u'Career', u'http://www.themarker.com/tmc/content/xml/rss/sections/careerfeed.xml'), - (u'Car', u'http://www.themarker.com/tmc/content/xml/rss/sections/carfeed.xml'), - (u'High Tech', u'http://www.themarker.com/tmc/content/xml/rss/sections/hightechfeed.xml'), - (u'Investor Guide', u'http://www.themarker.com/tmc/content/xml/rss/sections/investorGuidefeed.xml')] + feeds = [(u'Head Lines', u'http://www.themarker.com/cmlink/1.144'), + (u'TA Market', u'http://www.themarker.com/cmlink/1.243'), + (u'Real Estate', u'http://www.themarker.com/cmlink/1.605656'), + (u'Global', u'http://www.themarker.com/cmlink/1.605658'), + (u'Wall Street', u'http://www.themarker.com/cmlink/1.613713'), + (u'SmartPhone', u'http://www.themarker.com/cmlink/1.605661'), + (u'Law', u'http://www.themarker.com/cmlink/1.605664'), + (u'Media', u'http://www.themarker.com/cmlink/1.605660'), + (u'Consumer', u'http://www.themarker.com/cmlink/1.605662'), + (u'Career', u'http://www.themarker.com/cmlink/1.605665'), + (u'Car', u'http://www.themarker.com/cmlink/1.605663'), + (u'High Tech', u'http://www.themarker.com/cmlink/1.605659'), + (u'Small Business', u'http://www.themarker.com/cmlink/1.605666')] def print_version(self, url): - split1 = url.split("=") - weblinks = url + #split1 = url.split("/") + #print_url='http://www.themarker.com/misc/article-print-page/'+split1[-1] + txt=url - if weblinks is not None: - for link in weblinks: - #--------------------------------------------------------- - #here we need some help with some regexpressions - #we are trying to find it.themarker.com in a url - #----------------------------------------------------------- - re1='.*?' # Non-greedy match on filler - re2='(it\\.themarker\\.com)' # Fully Qualified Domain Name 1 - rg = re.compile(re1+re2,re.IGNORECASE|re.DOTALL) - m = rg.search(url) + re1='.*?' # Non-greedy match on filler + re2='(tv)' # Word 1 - - if m: - split2 = url.split("article/") - print_url = 'http://it.themarker.com/tmit/PrintArticle/' + split2[1] - - else: - print_url = 'http://www.themarker.com/ibo/misc/printFriendly.jhtml?ElementId=%2Fibo%2Frepositories%2Fstories%2Fm1_2000%2F' + split1[1]+'.xml' - - return print_url + rg = re.compile(re1+re2,re.IGNORECASE|re.DOTALL) + m = rg.search(txt) + if m: + #print 'bad link' + return 1 diff --git a/recipes/volksrant_sub.recipe b/recipes/volksrant_sub.recipe new file mode 100644 index 0000000000..8a5f1543b5 --- /dev/null +++ b/recipes/volksrant_sub.recipe @@ -0,0 +1,115 @@ +from calibre import strftime +from calibre.web.feeds.news import BasicNewsRecipe + +class Volkskrant_full(BasicNewsRecipe): + # This recipe will download the Volkskrant newspaper, + # from the subscribers site. It requires a password. + # Known issues are: articles that are spread out over + # multiple pages will appear multiple times. Pages + # that contain only adverts will appear, but empty. + # The supplement 'Volkskrant Magazine' on saturday + # is currently not downloaded. + # You can set a manual date, to download an archived + # newspaper. Volkskrant stores over a month at the + # moment of writing. To do so I suggest you unmark + # the date on the line below, and insert it in the title. Then + # follow the instructions marked further below. + + title = 'De Volkskrant (subscription)' # [za, 13 nov 2010]' + __author__ = u'Selcal' + description = u"Volkskrant" + oldest_article = 30 + max_articles_per_feed = 100 + no_stylesheets = True + language = 'nl' + use_embedded_content = False + simultaneous_downloads = 1 + delay = 1 + needs_subscription = True + # Set RETRIEVEDATE to 'yyyymmdd' to load an older + # edition. Otherwise keep '%Y%m%d' + # When setting a manual date, unmark and add the date + # to the title above, and unmark the timefmt line to stop + # Calibre from adding today's date in addition. + + # timefmt = '' + RETRIEVEDATE = strftime('%Y%m%d') + INDEX_MAIN = 'http://www.volkskrant.nl/vk-online/VK/' + RETRIEVEDATE + '___/VKN01_001/#text' + INDEX_ARTICLE = 'http://www.volkskrant.nl/vk-online/VK/' + RETRIEVEDATE + '___/VKN01_001/' + LOGIN = 'http://www.volkskrant.nl/vk/user/loggedIn.do' + remove_tags = [dict(name='address')] + cover_url = 'http://www.volkskrant.nl/vk-online/VK/' + RETRIEVEDATE + '___/VKN01_001/page.jpg' + + def get_browser(self): + br = BasicNewsRecipe.get_browser() + + if self.username is not None and self.password is not None: + br.open(self.LOGIN) + br.select_form(nr = 0) + br['username'] = self.username + br['password'] = self.password + br.submit() + return br + + def parse_index(self): + krant = [] + def strip_title(_title): + i = 0 + while ((_title[i] <> ":") and (i <= len(_title))): + i = i + 1 + return(_title[0:i]) + for temp in range (5): + try: + soup = self.index_to_soup(self.INDEX_MAIN) + break + except: + #print '(Retrying main index load)' + continue + mainsoup = soup.find('td', attrs={'id': 'select_page_top'}) + for option in mainsoup.findAll('option'): + articles = [] + _INDEX = 'http://www.volkskrant.nl/vk-online/VK/' + self.RETRIEVEDATE + '___/' + option['value'] + '/#text' + _INDEX_ARTICLE = 'http://www.volkskrant.nl/vk-online/VK/' + self.RETRIEVEDATE + '___/' + option['value'] + '/' + #print '' + #print '<------- Processing section: ' + _INDEX + ' ------------------------->' + for temp in range (5): + try: + soup = self.index_to_soup(_INDEX) + break + except: + #print '(Retrying index load)' + continue + for item in soup.findAll('area'): + art_nr = item['class'] + attrname = art_nr[0:12] + '_section' + option['value'][0:5] + '_' + art_nr[26:len(art_nr)] + #print '==> Found: ' + attrname; + index_title = soup.find('div', attrs={'class': attrname}) + get_title = index_title['title']; + _ARTICLE = _INDEX_ARTICLE + attrname + '.html#text' + title = get_title; + #print '--> Title: ' + title; + #print '--> URL: ' + _ARTICLE; + for temp in range (5): + try: + souparticle = self.index_to_soup(_ARTICLE); + break + except: + print '(Retrying URL load)' + continue + headerurl = souparticle.findAll('frame')[0]['src']; + #print '--> Read frame name for header: ' + headerurl; + url = _INDEX_ARTICLE + headerurl[0:len(headerurl)-12] + '_text.html'; + #print '--> Corrected URL: ' + url; + if (get_title <> ''): + title = strip_title(get_title) + date = strftime(' %B %Y') + if (title <> ''): + articles.append({ + 'title' :title + ,'date' :date + ,'url' :url + ,'description':'' + }) + krant.append( (option.string, articles)) + return krant + diff --git a/resources/metadata_sqlite.sql b/resources/metadata_sqlite.sql index 2d95f735e2..9c4f666449 100644 --- a/resources/metadata_sqlite.sql +++ b/resources/metadata_sqlite.sql @@ -7,17 +7,30 @@ CREATE TABLE books ( id INTEGER PRIMARY KEY AUTOINCREMENT, title TEXT NOT NULL DEFAULT 'Unknown' COLLATE NOCASE, sort TEXT COLLATE NOCASE, timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - uri TEXT, - series_index INTEGER NOT NULL DEFAULT 1, + pubdate TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + series_index REAL NOT NULL DEFAULT 1.0, author_sort TEXT COLLATE NOCASE, isbn TEXT DEFAULT "" COLLATE NOCASE, - path TEXT NOT NULL DEFAULT "" - ); + lccn TEXT DEFAULT "" COLLATE NOCASE, + path TEXT NOT NULL DEFAULT "", + flags INTEGER NOT NULL DEFAULT 1 + , uuid TEXT, has_cover BOOL DEFAULT 0, last_modified TIMESTAMP NOT NULL DEFAULT "2000-01-01 00:00:00+00:00"); CREATE TABLE books_authors_link ( id INTEGER PRIMARY KEY, book INTEGER NOT NULL, author INTEGER NOT NULL, UNIQUE(book, author) ); +CREATE TABLE books_languages_link ( id INTEGER PRIMARY KEY, + book INTEGER NOT NULL, + lang_code INTEGER NOT NULL, + item_order INTEGER NOT NULL DEFAULT 0, + UNIQUE(book, lang_code) + ); +CREATE TABLE books_plugin_data(id INTEGER PRIMARY KEY, + book INTEGER NON NULL, + name TEXT NON NULL, + val TEXT NON NULL, + UNIQUE(book,name)); CREATE TABLE books_publishers_link ( id INTEGER PRIMARY KEY, book INTEGER NOT NULL, publisher INTEGER NOT NULL, @@ -49,11 +62,51 @@ CREATE TABLE conversion_options ( id INTEGER PRIMARY KEY, data BLOB NOT NULL, UNIQUE(format,book) ); +CREATE TABLE custom_columns ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + label TEXT NOT NULL, + name TEXT NOT NULL, + datatype TEXT NOT NULL, + mark_for_delete BOOL DEFAULT 0 NOT NULL, + editable BOOL DEFAULT 1 NOT NULL, + display TEXT DEFAULT "{}" NOT NULL, + is_multiple BOOL DEFAULT 0 NOT NULL, + normalized BOOL NOT NULL, + UNIQUE(label) + ); +CREATE TABLE data ( id INTEGER PRIMARY KEY, + book INTEGER NON NULL, + format TEXT NON NULL COLLATE NOCASE, + uncompressed_size INTEGER NON NULL, + name TEXT NON NULL, + UNIQUE(book, format) +); CREATE TABLE feeds ( id INTEGER PRIMARY KEY, title TEXT NOT NULL, script TEXT NOT NULL, UNIQUE(title) ); +CREATE TABLE identifiers ( id INTEGER PRIMARY KEY, + book INTEGER NON NULL, + type TEXT NON NULL DEFAULT "isbn" COLLATE NOCASE, + val TEXT NON NULL COLLATE NOCASE, + UNIQUE(book, type) + ); +CREATE TABLE languages ( id INTEGER PRIMARY KEY, + lang_code TEXT NON NULL COLLATE NOCASE, + UNIQUE(lang_code) + ); +CREATE TABLE library_id ( id INTEGER PRIMARY KEY, + uuid TEXT NOT NULL, + UNIQUE(uuid) + ); +CREATE TABLE metadata_dirtied(id INTEGER PRIMARY KEY, + book INTEGER NOT NULL, + UNIQUE(book)); +CREATE TABLE preferences(id INTEGER PRIMARY KEY, + key TEXT NON NULL, + val TEXT NON NULL, + UNIQUE(key)); CREATE TABLE publishers ( id INTEGER PRIMARY KEY, name TEXT NOT NULL COLLATE NOCASE, sort TEXT COLLATE NOCASE, @@ -72,34 +125,143 @@ CREATE TABLE tags ( id INTEGER PRIMARY KEY, name TEXT NOT NULL COLLATE NOCASE, UNIQUE (name) ); -CREATE TABLE data ( id INTEGER PRIMARY KEY, - book INTEGER NON NULL, - format TEXT NON NULL COLLATE NOCASE, - uncompressed_size INTEGER NON NULL, - name TEXT NON NULL, - UNIQUE(book, format) -); - CREATE VIEW meta AS - SELECT id, title, - (SELECT concat(name) FROM authors WHERE authors.id IN (SELECT author from books_authors_link WHERE book=books.id)) authors, - (SELECT name FROM publishers WHERE publishers.id IN (SELECT publisher from books_publishers_link WHERE book=books.id)) publisher, - (SELECT rating FROM ratings WHERE ratings.id IN (SELECT rating from books_ratings_link WHERE book=books.id)) rating, - timestamp, - (SELECT MAX(uncompressed_size) FROM data WHERE book=books.id) size, - (SELECT concat(name) FROM tags WHERE tags.id IN (SELECT tag from books_tags_link WHERE book=books.id)) tags, - (SELECT text FROM comments WHERE book=books.id) comments, - (SELECT name FROM series WHERE series.id IN (SELECT series FROM books_series_link WHERE book=books.id)) series, - series_index, - sort, - author_sort, - (SELECT concat(format) FROM data WHERE data.book=books.id) formats, - isbn - FROM books; + SELECT id, title, + (SELECT sortconcat(bal.id, name) FROM books_authors_link AS bal JOIN authors ON(author = authors.id) WHERE book = books.id) authors, + (SELECT name FROM publishers WHERE publishers.id IN (SELECT publisher from books_publishers_link WHERE book=books.id)) publisher, + (SELECT rating FROM ratings WHERE ratings.id IN (SELECT rating from books_ratings_link WHERE book=books.id)) rating, + timestamp, + (SELECT MAX(uncompressed_size) FROM data WHERE book=books.id) size, + (SELECT concat(name) FROM tags WHERE tags.id IN (SELECT tag from books_tags_link WHERE book=books.id)) tags, + (SELECT text FROM comments WHERE book=books.id) comments, + (SELECT name FROM series WHERE series.id IN (SELECT series FROM books_series_link WHERE book=books.id)) series, + series_index, + sort, + author_sort, + (SELECT concat(format) FROM data WHERE data.book=books.id) formats, + isbn, + path, + lccn, + pubdate, + flags, + uuid + FROM books; +CREATE VIEW tag_browser_authors AS SELECT + id, + name, + (SELECT COUNT(id) FROM books_authors_link WHERE author=authors.id) count, + (SELECT AVG(ratings.rating) + FROM books_authors_link AS tl, books_ratings_link AS bl, ratings + WHERE tl.author=authors.id AND bl.book=tl.book AND + ratings.id = bl.rating AND ratings.rating <> 0) avg_rating, + sort AS sort + FROM authors; +CREATE VIEW tag_browser_filtered_authors AS SELECT + id, + name, + (SELECT COUNT(books_authors_link.id) FROM books_authors_link WHERE + author=authors.id AND books_list_filter(book)) count, + (SELECT AVG(ratings.rating) + FROM books_authors_link AS tl, books_ratings_link AS bl, ratings + WHERE tl.author=authors.id AND bl.book=tl.book AND + ratings.id = bl.rating AND ratings.rating <> 0 AND + books_list_filter(bl.book)) avg_rating, + sort AS sort + FROM authors; +CREATE VIEW tag_browser_filtered_publishers AS SELECT + id, + name, + (SELECT COUNT(books_publishers_link.id) FROM books_publishers_link WHERE + publisher=publishers.id AND books_list_filter(book)) count, + (SELECT AVG(ratings.rating) + FROM books_publishers_link AS tl, books_ratings_link AS bl, ratings + WHERE tl.publisher=publishers.id AND bl.book=tl.book AND + ratings.id = bl.rating AND ratings.rating <> 0 AND + books_list_filter(bl.book)) avg_rating, + name AS sort + FROM publishers; +CREATE VIEW tag_browser_filtered_ratings AS SELECT + id, + rating, + (SELECT COUNT(books_ratings_link.id) FROM books_ratings_link WHERE + rating=ratings.id AND books_list_filter(book)) count, + (SELECT AVG(ratings.rating) + FROM books_ratings_link AS tl, books_ratings_link AS bl, ratings + WHERE tl.rating=ratings.id AND bl.book=tl.book AND + ratings.id = bl.rating AND ratings.rating <> 0 AND + books_list_filter(bl.book)) avg_rating, + rating AS sort + FROM ratings; +CREATE VIEW tag_browser_filtered_series AS SELECT + id, + name, + (SELECT COUNT(books_series_link.id) FROM books_series_link WHERE + series=series.id AND books_list_filter(book)) count, + (SELECT AVG(ratings.rating) + FROM books_series_link AS tl, books_ratings_link AS bl, ratings + WHERE tl.series=series.id AND bl.book=tl.book AND + ratings.id = bl.rating AND ratings.rating <> 0 AND + books_list_filter(bl.book)) avg_rating, + (title_sort(name)) AS sort + FROM series; +CREATE VIEW tag_browser_filtered_tags AS SELECT + id, + name, + (SELECT COUNT(books_tags_link.id) FROM books_tags_link WHERE + tag=tags.id AND books_list_filter(book)) count, + (SELECT AVG(ratings.rating) + FROM books_tags_link AS tl, books_ratings_link AS bl, ratings + WHERE tl.tag=tags.id AND bl.book=tl.book AND + ratings.id = bl.rating AND ratings.rating <> 0 AND + books_list_filter(bl.book)) avg_rating, + name AS sort + FROM tags; +CREATE VIEW tag_browser_publishers AS SELECT + id, + name, + (SELECT COUNT(id) FROM books_publishers_link WHERE publisher=publishers.id) count, + (SELECT AVG(ratings.rating) + FROM books_publishers_link AS tl, books_ratings_link AS bl, ratings + WHERE tl.publisher=publishers.id AND bl.book=tl.book AND + ratings.id = bl.rating AND ratings.rating <> 0) avg_rating, + name AS sort + FROM publishers; +CREATE VIEW tag_browser_ratings AS SELECT + id, + rating, + (SELECT COUNT(id) FROM books_ratings_link WHERE rating=ratings.id) count, + (SELECT AVG(ratings.rating) + FROM books_ratings_link AS tl, books_ratings_link AS bl, ratings + WHERE tl.rating=ratings.id AND bl.book=tl.book AND + ratings.id = bl.rating AND ratings.rating <> 0) avg_rating, + rating AS sort + FROM ratings; +CREATE VIEW tag_browser_series AS SELECT + id, + name, + (SELECT COUNT(id) FROM books_series_link WHERE series=series.id) count, + (SELECT AVG(ratings.rating) + FROM books_series_link AS tl, books_ratings_link AS bl, ratings + WHERE tl.series=series.id AND bl.book=tl.book AND + ratings.id = bl.rating AND ratings.rating <> 0) avg_rating, + (title_sort(name)) AS sort + FROM series; +CREATE VIEW tag_browser_tags AS SELECT + id, + name, + (SELECT COUNT(id) FROM books_tags_link WHERE tag=tags.id) count, + (SELECT AVG(ratings.rating) + FROM books_tags_link AS tl, books_ratings_link AS bl, ratings + WHERE tl.tag=tags.id AND bl.book=tl.book AND + ratings.id = bl.rating AND ratings.rating <> 0) avg_rating, + name AS sort + FROM tags; CREATE INDEX authors_idx ON books (author_sort COLLATE NOCASE); CREATE INDEX books_authors_link_aidx ON books_authors_link (author); CREATE INDEX books_authors_link_bidx ON books_authors_link (book); CREATE INDEX books_idx ON books (sort COLLATE NOCASE); +CREATE INDEX books_languages_link_aidx ON books_languages_link (lang_code); +CREATE INDEX books_languages_link_bidx ON books_languages_link (book); CREATE INDEX books_publishers_link_aidx ON books_publishers_link (publisher); CREATE INDEX books_publishers_link_bidx ON books_publishers_link (book); CREATE INDEX books_ratings_link_aidx ON books_ratings_link (rating); @@ -111,32 +273,38 @@ CREATE INDEX books_tags_link_bidx ON books_tags_link (book); CREATE INDEX comments_idx ON comments (book); CREATE INDEX conversion_options_idx_a ON conversion_options (format COLLATE NOCASE); CREATE INDEX conversion_options_idx_b ON conversion_options (book); +CREATE INDEX custom_columns_idx ON custom_columns (label); CREATE INDEX data_idx ON data (book); +CREATE INDEX formats_idx ON data (format); +CREATE INDEX languages_idx ON languages (lang_code COLLATE NOCASE); CREATE INDEX publishers_idx ON publishers (name COLLATE NOCASE); -CREATE INDEX series_idx ON series (sort COLLATE NOCASE); +CREATE INDEX series_idx ON series (name COLLATE NOCASE); CREATE INDEX tags_idx ON tags (name COLLATE NOCASE); CREATE TRIGGER books_delete_trg - AFTER DELETE ON books - BEGIN - DELETE FROM books_authors_link WHERE book=OLD.id; - DELETE FROM books_publishers_link WHERE book=OLD.id; - DELETE FROM books_ratings_link WHERE book=OLD.id; - DELETE FROM books_series_link WHERE book=OLD.id; - DELETE FROM books_tags_link WHERE book=OLD.id; - DELETE FROM data WHERE book=OLD.id; - DELETE FROM comments WHERE book=OLD.id; - DELETE FROM conversion_options WHERE book=OLD.id; + AFTER DELETE ON books + BEGIN + DELETE FROM books_authors_link WHERE book=OLD.id; + DELETE FROM books_publishers_link WHERE book=OLD.id; + DELETE FROM books_ratings_link WHERE book=OLD.id; + DELETE FROM books_series_link WHERE book=OLD.id; + DELETE FROM books_tags_link WHERE book=OLD.id; + DELETE FROM books_languages_link WHERE book=OLD.id; + DELETE FROM data WHERE book=OLD.id; + DELETE FROM comments WHERE book=OLD.id; + DELETE FROM conversion_options WHERE book=OLD.id; + DELETE FROM books_plugin_data WHERE book=OLD.id; + DELETE FROM identifiers WHERE book=OLD.id; END; -CREATE TRIGGER books_insert_trg - AFTER INSERT ON books +CREATE TRIGGER books_insert_trg AFTER INSERT ON books BEGIN - UPDATE books SET sort=title_sort(NEW.title) WHERE id=NEW.id; + UPDATE books SET sort=title_sort(NEW.title),uuid=uuid4() WHERE id=NEW.id; END; CREATE TRIGGER books_update_trg - AFTER UPDATE ON books - BEGIN - UPDATE books SET sort=title_sort(NEW.title) WHERE id=NEW.id; - END; + AFTER UPDATE ON books + BEGIN + UPDATE books SET sort=title_sort(NEW.title) + WHERE id=NEW.id AND OLD.title <> NEW.title; + END; CREATE TRIGGER fkc_comments_insert BEFORE INSERT ON comments BEGIN @@ -169,23 +337,41 @@ CREATE TRIGGER fkc_data_update THEN RAISE(ABORT, 'Foreign key violation: book not in books') END; END; -CREATE TRIGGER fkc_delete_books_authors_link +CREATE TRIGGER fkc_delete_on_authors BEFORE DELETE ON authors BEGIN SELECT CASE - WHEN (SELECT COUNT(id) FROM books_authors_link WHERE book=OLD.book) > 0 - THEN RAISE(ABORT, 'Foreign key violation: author is still referenced') + WHEN (SELECT COUNT(id) FROM books_authors_link WHERE author=OLD.id) > 0 + THEN RAISE(ABORT, 'Foreign key violation: authors is still referenced') END; END; -CREATE TRIGGER fkc_delete_books_publishers_link +CREATE TRIGGER fkc_delete_on_languages + BEFORE DELETE ON languages + BEGIN + SELECT CASE + WHEN (SELECT COUNT(id) FROM books_languages_link WHERE lang_code=OLD.id) > 0 + THEN RAISE(ABORT, 'Foreign key violation: language is still referenced') + END; + END; +CREATE TRIGGER fkc_delete_on_languages_link + BEFORE INSERT ON books_languages_link + BEGIN + SELECT CASE + WHEN (SELECT id from books WHERE id=NEW.book) IS NULL + THEN RAISE(ABORT, 'Foreign key violation: book not in books') + WHEN (SELECT id from languages WHERE id=NEW.lang_code) IS NULL + THEN RAISE(ABORT, 'Foreign key violation: lang_code not in languages') + END; + END; +CREATE TRIGGER fkc_delete_on_publishers BEFORE DELETE ON publishers BEGIN SELECT CASE - WHEN (SELECT COUNT(id) FROM books_publishers_link WHERE book=OLD.book) > 0 - THEN RAISE(ABORT, 'Foreign key violation: publisher is still referenced') + WHEN (SELECT COUNT(id) FROM books_publishers_link WHERE publisher=OLD.id) > 0 + THEN RAISE(ABORT, 'Foreign key violation: publishers is still referenced') END; END; -CREATE TRIGGER fkc_delete_books_series_link +CREATE TRIGGER fkc_delete_on_series BEFORE DELETE ON series BEGIN SELECT CASE @@ -193,12 +379,12 @@ CREATE TRIGGER fkc_delete_books_series_link THEN RAISE(ABORT, 'Foreign key violation: series is still referenced') END; END; -CREATE TRIGGER fkc_delete_books_tags_link +CREATE TRIGGER fkc_delete_on_tags BEFORE DELETE ON tags BEGIN SELECT CASE WHEN (SELECT COUNT(id) FROM books_tags_link WHERE tag=OLD.id) > 0 - THEN RAISE(ABORT, 'Foreign key violation: tag is still referenced') + THEN RAISE(ABORT, 'Foreign key violation: tags is still referenced') END; END; CREATE TRIGGER fkc_insert_books_authors_link @@ -267,6 +453,22 @@ CREATE TRIGGER fkc_update_books_authors_link_b THEN RAISE(ABORT, 'Foreign key violation: author not in authors') END; END; +CREATE TRIGGER fkc_update_books_languages_link_a + BEFORE UPDATE OF book ON books_languages_link + BEGIN + SELECT CASE + WHEN (SELECT id from books WHERE id=NEW.book) IS NULL + THEN RAISE(ABORT, 'Foreign key violation: book not in books') + END; + END; +CREATE TRIGGER fkc_update_books_languages_link_b + BEFORE UPDATE OF lang_code ON books_languages_link + BEGIN + SELECT CASE + WHEN (SELECT id from languages WHERE id=NEW.lang_code) IS NULL + THEN RAISE(ABORT, 'Foreign key violation: lang_code not in languages') + END; + END; CREATE TRIGGER fkc_update_books_publishers_link_a BEFORE UPDATE OF book ON books_publishers_link BEGIN @@ -341,3 +543,4 @@ CREATE TRIGGER series_update_trg BEGIN UPDATE series SET sort=NEW.name WHERE id=NEW.id; END; +pragma user_version=20; diff --git a/resources/quick_start.epub b/resources/quick_start.epub index 2d590ebef2..3b289537a6 100644 Binary files a/resources/quick_start.epub and b/resources/quick_start.epub differ diff --git a/src/calibre/ebooks/chm/input.py b/src/calibre/ebooks/chm/input.py index fce07c2359..b5074e8a72 100644 --- a/src/calibre/ebooks/chm/input.py +++ b/src/calibre/ebooks/chm/input.py @@ -19,12 +19,12 @@ class CHMInput(InputFormatPlugin): description = 'Convert CHM files to OEB' file_types = set(['chm']) - def _chmtohtml(self, output_dir, chm_path, no_images, log): + def _chmtohtml(self, output_dir, chm_path, no_images, log, debug_dump=False): from calibre.ebooks.chm.reader import CHMReader log.debug('Opening CHM file') rdr = CHMReader(chm_path, log, self.opts) log.debug('Extracting CHM to %s' % output_dir) - rdr.extract_content(output_dir) + rdr.extract_content(output_dir, debug_dump=debug_dump) self._chm_reader = rdr return rdr.hhc_path @@ -47,7 +47,12 @@ class CHMInput(InputFormatPlugin): stream.close() log.debug('tdir=%s' % tdir) log.debug('stream.name=%s' % stream.name) - mainname = self._chmtohtml(tdir, chm_name, no_images, log) + debug_dump = False + odi = options.debug_pipeline + if odi: + debug_dump = os.path.join(odi, 'input') + mainname = self._chmtohtml(tdir, chm_name, no_images, log, + debug_dump=debug_dump) mainpath = os.path.join(tdir, mainname) metadata = get_metadata_from_reader(self._chm_reader) @@ -56,7 +61,6 @@ class CHMInput(InputFormatPlugin): #from calibre import ipython #ipython() - odi = options.debug_pipeline options.debug_pipeline = None options.input_encoding = 'utf-8' # try a custom conversion: diff --git a/src/calibre/ebooks/chm/reader.py b/src/calibre/ebooks/chm/reader.py index 24814a34f9..5f23ad0241 100644 --- a/src/calibre/ebooks/chm/reader.py +++ b/src/calibre/ebooks/chm/reader.py @@ -97,7 +97,7 @@ class CHMReader(CHMFile): raise CHMError("'%s' is zero bytes in length!"%(path,)) return data - def ExtractFiles(self, output_dir=os.getcwdu()): + def ExtractFiles(self, output_dir=os.getcwdu(), debug_dump=False): html_files = set([]) for path in self.Contents(): lpath = os.path.join(output_dir, path) @@ -123,6 +123,9 @@ class CHMReader(CHMFile): self.log.warn('%r filename too long, skipping'%path) continue raise + if debug_dump: + import shutil + shutil.copytree(output_dir, os.path.join(debug_dump, 'debug_dump')) for lpath in html_files: with open(lpath, 'r+b') as f: data = f.read() @@ -249,8 +252,8 @@ class CHMReader(CHMFile): if not os.path.isdir(dir): os.makedirs(dir) - def extract_content(self, output_dir=os.getcwdu()): - self.ExtractFiles(output_dir=output_dir) + def extract_content(self, output_dir=os.getcwdu(), debug_dump=False): + self.ExtractFiles(output_dir=output_dir, debug_dump=debug_dump) diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index 96ea3e5884..3eb59a21b9 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -854,7 +854,8 @@ OptionRecommendation(name='sr3_replace', if isinstance(ret, basestring): shutil.copytree(output_dir, out_dir) else: - os.makedirs(out_dir) + if not os.path.exists(out_dir): + os.makedirs(out_dir) self.dump_oeb(ret, out_dir) if self.input_fmt == 'recipe': zf = ZipFile(os.path.join(self.opts.debug_pipeline, diff --git a/src/calibre/ebooks/html/input.py b/src/calibre/ebooks/html/input.py index 079e990de3..b22f7d2791 100644 --- a/src/calibre/ebooks/html/input.py +++ b/src/calibre/ebooks/html/input.py @@ -309,9 +309,9 @@ class HTMLInput(InputFormatPlugin): def create_oebbook(self, htmlpath, basedir, opts, log, mi): from calibre.ebooks.conversion.plumber import create_oebbook - from calibre.ebooks.oeb.base import DirContainer, \ - rewrite_links, urlnormalize, urldefrag, BINARY_MIME, OEB_STYLES, \ - xpath + from calibre.ebooks.oeb.base import (DirContainer, + rewrite_links, urlnormalize, urldefrag, BINARY_MIME, OEB_STYLES, + xpath) from calibre import guess_type from calibre.ebooks.oeb.transforms.metadata import \ meta_info_to_oeb_metadata @@ -345,7 +345,8 @@ class HTMLInput(InputFormatPlugin): htmlfile_map = {} for f in filelist: path = f.path - oeb.container = DirContainer(os.path.dirname(path), log) + oeb.container = DirContainer(os.path.dirname(path), log, + ignore_opf=True) bname = os.path.basename(path) id, href = oeb.manifest.generate(id='html', href=ascii_filename(bname)) @@ -369,7 +370,7 @@ class HTMLInput(InputFormatPlugin): for f in filelist: path = f.path dpath = os.path.dirname(path) - oeb.container = DirContainer(dpath, log) + oeb.container = DirContainer(dpath, log, ignore_opf=True) item = oeb.manifest.hrefs[htmlfile_map[path]] rewrite_links(item.data, partial(self.resource_adder, base=dpath)) @@ -409,7 +410,7 @@ class HTMLInput(InputFormatPlugin): if not item.linear: continue toc.add(title, item.href) - oeb.container = DirContainer(os.getcwdu(), oeb.log) + oeb.container = DirContainer(os.getcwdu(), oeb.log, ignore_opf=True) return oeb def link_to_local_path(self, link_, base=None): @@ -456,7 +457,7 @@ class HTMLInput(InputFormatPlugin): href=bhref) self.oeb.log.debug('Added', link) self.oeb.container = self.DirContainer(os.path.dirname(link), - self.oeb.log) + self.oeb.log, ignore_opf=True) # Load into memory guessed = self.guess_type(href)[0] media_type = guessed or self.BINARY_MIME diff --git a/src/calibre/ebooks/metadata/book/base.py b/src/calibre/ebooks/metadata/book/base.py index 3099de12e4..a031c4886d 100644 --- a/src/calibre/ebooks/metadata/book/base.py +++ b/src/calibre/ebooks/metadata/book/base.py @@ -68,7 +68,19 @@ composite_formatter = SafeFormat() class Metadata(object): ''' - A class representing all the metadata for a book. + A class representing all the metadata for a book. The various standard metadata + fields are available as attributes of this object. You can also stick + arbitrary attributes onto this object. + + Metadata from custom columns should be accessed via the get() method, + passing in the lookup name for the column, for example: "#mytags". + + Use the :meth:`is_null` method to test if a filed is null. + + This object also has functions to format fields into strings. + + The list of standard metadata fields grows with time is in + :data:`STANDARD_METADATA_FIELDS`. Please keep the method based API of this class to a minimum. Every method becomes a reserved field name. @@ -88,11 +100,19 @@ class Metadata(object): if title: self.title = title if authors: - #: List of strings or [] + # List of strings or [] self.author = list(authors) if authors else []# Needed for backward compatibility self.authors = list(authors) if authors else [] def is_null(self, field): + ''' + Return True if the value of filed is null in this object. + 'null' means it is unknown or evaluates to False. So a title of + _('Unknown') is null or a language of 'und' is null. + + Be careful with numeric fields since this will return True for zero as + well as None. + ''' null_val = NULL_VALUES.get(field, None) val = getattr(self, field, None) return not val or val == null_val @@ -547,13 +567,16 @@ class Metadata(object): return unicode(self.rating) def format_field(self, key, series_with_index=True): + ''' + Returns the tuple (display_name, formatted_value) + ''' name, val, ign, ign = self.format_field_extended(key, series_with_index) return (name, val) def format_field_extended(self, key, series_with_index=True): from calibre.ebooks.metadata import authors_to_string ''' - returns the tuple (field_name, formatted_value, original_value, + returns the tuple (display_name, formatted_value, original_value, field_metadata) ''' @@ -637,6 +660,10 @@ class Metadata(object): return (None, None, None, None) def __unicode__(self): + ''' + A string representation of this object, suitable for printing to + console + ''' from calibre.ebooks.metadata import authors_to_string ans = [] def fmt(x, y): @@ -680,6 +707,9 @@ class Metadata(object): return u'\n'.join(ans) def to_html(self): + ''' + A HTML representation of this object. + ''' from calibre.ebooks.metadata import authors_to_string ans = [(_('Title'), unicode(self.title))] ans += [(_('Author(s)'), (authors_to_string(self.authors) if self.authors else _('Unknown')))] diff --git a/src/calibre/ebooks/metadata/mobi.py b/src/calibre/ebooks/metadata/mobi.py index 15fdceff15..74db3b3a58 100644 --- a/src/calibre/ebooks/metadata/mobi.py +++ b/src/calibre/ebooks/metadata/mobi.py @@ -400,7 +400,8 @@ class MetadataUpdater(object): if getattr(self, 'exth', None) is None: raise MobiError('No existing EXTH record. Cannot update metadata.') - self.record0[92:96] = iana2mobi(mi.language) + if not mi.is_null('language'): + self.record0[92:96] = iana2mobi(mi.language) self.create_exth(exth=exth, new_title=mi.title) # Fetch updated timestamp, cover_record, thumbnail_record diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index f2c9696976..c07386e1fd 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -446,22 +446,23 @@ class NullContainer(object): class DirContainer(object): """Filesystem directory container.""" - def __init__(self, path, log): + def __init__(self, path, log, ignore_opf=False): self.log = log if isbytestring(path): path = path.decode(filesystem_encoding) + self.opfname = None ext = os.path.splitext(path)[1].lower() if ext == '.opf': self.opfname = os.path.basename(path) self.rootdir = os.path.dirname(path) return self.rootdir = path - for path in self.namelist(): - ext = os.path.splitext(path)[1].lower() - if ext == '.opf': - self.opfname = path - return - self.opfname = None + if not ignore_opf: + for path in self.namelist(): + ext = os.path.splitext(path)[1].lower() + if ext == '.opf': + self.opfname = path + return def read(self, path): if path is None: diff --git a/src/calibre/gui2/book_details.py b/src/calibre/gui2/book_details.py index 80d3c1636e..f94e179166 100644 --- a/src/calibre/gui2/book_details.py +++ b/src/calibre/gui2/book_details.py @@ -118,8 +118,9 @@ def render_data(mi, use_roman_numbers=True, all_fields=False): links = [u'%s' % (url, id_typ, id_val, name) for name, id_typ, id_val, url in urls] links = u', '.join(links) - ans.append((field, u'%s%s'%( - _('Ids')+':', links))) + if links: + ans.append((field, u'%s%s'%( + _('Ids')+':', links))) else: val = mi.format_field(field)[-1] if val is None: diff --git a/src/calibre/gui2/metadata/basic_widgets.py b/src/calibre/gui2/metadata/basic_widgets.py index 35d50c3d66..1620734209 100644 --- a/src/calibre/gui2/metadata/basic_widgets.py +++ b/src/calibre/gui2/metadata/basic_widgets.py @@ -942,7 +942,11 @@ class IdentifiersEdit(QLineEdit): # {{{ ans = {} for x in parts: c = x.split(':') - if len(c) == 2: + if len(c) > 1: + if c[0] == 'isbn': + v = check_isbn(c[1]) + if v is not None: + c[1] = v ans[c[0]] = c[1] return ans def fset(self, val): @@ -953,6 +957,11 @@ class IdentifiersEdit(QLineEdit): # {{{ if x == 'isbn': x = '00isbn' return x + for k in list(val): + if k == 'isbn': + v = check_isbn(k) + if v is not None: + val[k] = v ids = sorted(val.iteritems(), key=keygen) txt = ', '.join(['%s:%s'%(k, v) for k, v in ids]) self.setText(txt.strip()) @@ -960,8 +969,8 @@ class IdentifiersEdit(QLineEdit): # {{{ return property(fget=fget, fset=fset) def initialize(self, db, id_): - self.current_val = db.get_identifiers(id_, index_is_id=True) - self.original_val = self.current_val + self.original_val = db.get_identifiers(id_, index_is_id=True) + self.current_val = self.original_val def commit(self, db, id_): if self.original_val != self.current_val: diff --git a/src/calibre/gui2/metadata/config.py b/src/calibre/gui2/metadata/config.py index abb45faa46..cb51324ae0 100644 --- a/src/calibre/gui2/metadata/config.py +++ b/src/calibre/gui2/metadata/config.py @@ -41,8 +41,11 @@ class FieldsModel(FM): # {{{ self.reset() def commit(self): - val = [k for k, v in self.overrides.iteritems() if v == Qt.Unchecked] - self.prefs['ignore_fields'] = val + ignored_fields = set([x for x in self.prefs['ignore_fields'] if x not in + self.overrides]) + changed = set([k for k, v in self.overrides.iteritems() if v == + Qt.Unchecked]) + self.prefs['ignore_fields'] = list(ignored_fields.union(changed)) # }}} diff --git a/src/calibre/gui2/preferences/metadata_sources.py b/src/calibre/gui2/preferences/metadata_sources.py index 4a6c7cfc06..f487051d07 100644 --- a/src/calibre/gui2/preferences/metadata_sources.py +++ b/src/calibre/gui2/preferences/metadata_sources.py @@ -209,8 +209,11 @@ class FieldsModel(QAbstractListModel): # {{{ return ret def commit(self): - val = [k for k, v in self.overrides.iteritems() if v == Qt.Unchecked] - msprefs['ignore_fields'] = val + ignored_fields = set([x for x in msprefs['ignore_fields'] if x not in + self.overrides]) + changed = set([k for k, v in self.overrides.iteritems() if v == + Qt.Unchecked]) + msprefs['ignore_fields'] = list(ignored_fields.union(changed)) # }}} diff --git a/src/calibre/gui2/preferences/template_functions.py b/src/calibre/gui2/preferences/template_functions.py index 20724b7667..fcb4c87372 100644 --- a/src/calibre/gui2/preferences/template_functions.py +++ b/src/calibre/gui2/preferences/template_functions.py @@ -74,9 +74,10 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form): def initialize(self): try: - with open(P('template-functions.json'), 'rb') as f: - self.builtin_source_dict = json.load(f, encoding='utf-8') + self.builtin_source_dict = json.loads(P('template-functions.json', data=True, + allow_user_override=False).decode('utf-8')) except: + traceback.print_exc() self.builtin_source_dict = {} self.funcs = formatter_functions.get_functions() diff --git a/src/calibre/gui2/ui.py b/src/calibre/gui2/ui.py index 9a4e0ca70a..4885f7b2db 100644 --- a/src/calibre/gui2/ui.py +++ b/src/calibre/gui2/ui.py @@ -278,11 +278,8 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{ self.library_view.model().count_changed_signal.connect( self.iactions['Choose Library'].count_changed) if not gprefs.get('quick_start_guide_added', False): - from calibre.ebooks.metadata import MetaInformation - mi = MetaInformation(_('Calibre Quick Start Guide'), ['John Schember']) - mi.author_sort = 'Schember, John' - mi.comments = "A guide to get you up and running with calibre" - mi.publisher = 'calibre' + from calibre.ebooks.metadata.meta import get_metadata + mi = get_metadata(open(P('quick_start.epub'), 'rb'), 'epub') self.library_view.model().add_books([P('quick_start.epub')], ['epub'], [mi]) gprefs['quick_start_guide_added'] = True diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index 3702de45c5..9d58ae4456 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -464,9 +464,12 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): self.refresh_ondevice = None def initialize_database(self): - metadata_sqlite = open(P('metadata_sqlite.sql'), 'rb').read() + metadata_sqlite = P('metadata_sqlite.sql', data=True, + allow_user_override=False).decode('utf-8') self.conn.executescript(metadata_sqlite) - self.user_version = 1 + self.conn.commit() + if self.user_version == 0: + self.user_version = 1 def last_modified(self): ''' Return last modified time as a UTC datetime object''' diff --git a/src/calibre/manual/template_lang.rst b/src/calibre/manual/template_lang.rst index a77f0d1697..b4bf7473e7 100644 --- a/src/calibre/manual/template_lang.rst +++ b/src/calibre/manual/template_lang.rst @@ -396,3 +396,19 @@ You might find the following tips useful. * In a plugboard, you can set a field to empty (or whatever is equivalent to empty) by using the special template ``{null}``. This template will always evaluate to an empty string. * The technique described above to show numbers even if they have a zero value works with the standard field series_index. +API of the Metadata objects +---------------------------- + +.. module:: calibre.ebooks.metadata.book.base + +.. autoclass:: Metadata + :members: + :member-order: bysource + +.. data:: STANDARD_METADATA_FIELDS + + The set of standard metadata fields. + +.. literalinclude:: ../ebooks/metadata/book/__init__.py + :lines: 7- + diff --git a/src/calibre/utils/resources.py b/src/calibre/utils/resources.py index 97c14926e4..00777973bb 100644 --- a/src/calibre/utils/resources.py +++ b/src/calibre/utils/resources.py @@ -65,7 +65,8 @@ _resolver = PathResolver() def get_path(path, data=False, allow_user_override=True): fpath = _resolver(path, allow_user_override=allow_user_override) if data: - return open(fpath, 'rb').read() + with open(fpath, 'rb') as f: + return f.read() return fpath def get_image_path(path, data=False, allow_user_override=True):