This commit is contained in:
GRiker 2013-05-10 07:09:52 -06:00
commit b61e295fd5
165 changed files with 43043 additions and 35672 deletions

View File

@ -20,6 +20,60 @@
# new recipes: # new recipes:
# - title: # - title:
- version: 0.9.30
date: 2013-05-10
new features:
- title: "Kobo driver: Add support for showing 'Archived' books on the device. Also up the supported firmware version to 2.5.3."
tickets: [1177677]
- title: "Driver for Blackberry 9790"
tickets: [1176607]
- title: "Add a tweak to turn off the highlighting of the book count when using a virtual library (Preferences->Tweaks)"
- title: "Add a button to clear the viewer search history in the viewer Preferences, under Miscellaneous"
- title: "Add keyboard shortcuts to clear the virtual Library and the additional restriction (Ctrl+Esc and Alt+Esc). Also use Shift+Esc to bring keyboard focus back tot he book list. Can be changed under Preferences->Keyboard"
- title: "Docx metadata: Read the language of the file, if present"
bug fixes:
- title: "Kobo driver: Fix unable to read SD card on OS X/Linux"
tickets: [1174815]
- title: "Content server: Fix unable to download ORIGINAL_* formats"
tickets: [1177158]
- title: "Fix regression that broke searching for terms containing a quote mark"
tickets: [1177114]
- title: "Fix regression that broke conversion of txt files when no input encoding is specified"
tickets: [1176622]
- title: "When changing to a virtual library, refresh the Book Details panel."
tickets: [1176296]
- title: "Fix regression that caused searching for user categories to break."
tickets: [1176187]
- title: "Fix error when downloading only covers and reviewing downloaded metadata."
tickets: [1176253]
- title: "MOBI metadata: Strip XML unsafe unicode codepoints when reading metadata from MOBI files."
tickets: [1175965]
- title: "Txt Input: Use the gbk encoding for txt files with detected encoding of gb2312."
tickets: [1175974]
- title: "When pressing Ctrl+Home/End preserve the horizontal scroll position in the book list"
improved recipes:
- NSFW
- Go Comics
- Various Polish news sources
- The Sun
- version: 0.9.29 - version: 0.9.29
date: 2013-05-03 date: 2013-05-03

View File

@ -582,6 +582,12 @@ Calibre has several keyboard shortcuts to save you time and mouse movement. Thes
- Open the advanced search dialog - Open the advanced search dialog
* - :kbd:`Esc` * - :kbd:`Esc`
- Clear the current search - Clear the current search
* - :kbd:`Shift+Esc`
- Focus the book list
* - :kbd:`Ctrl+Esc`
- Clear the virtual library
* - :kbd:`Alt+Esc`
- Clear the additional restriction
* - :kbd:`N or F3` * - :kbd:`N or F3`
- Find the next book that matches the current search (only works if the highlight checkbox next to the search bar is checked) - Find the next book that matches the current search (only works if the highlight checkbox next to the search bar is checked)
* - :kbd:`Shift+N or Shift+F3` * - :kbd:`Shift+N or Shift+F3`

View File

@ -12,12 +12,15 @@ class BenchmarkPl(BasicNewsRecipe):
language = 'pl' language = 'pl'
oldest_article = 8 oldest_article = 8
max_articles_per_feed = 100 max_articles_per_feed = 100
extra_css = 'ul {list-style-type: none;}'
no_stylesheets = True no_stylesheets = True
remove_attributes = ['style'] #remove_attributes = ['style']
preprocess_regexps = [(re.compile(ur'<h3><span style="font-size: small;">&nbsp;Zobacz poprzednie <a href="http://www.benchmark.pl/news/zestawienie/grupa_id/135">Opinie dnia:</a></span>.*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</body>'), (re.compile(ur'Więcej o .*?</ul>', re.DOTALL|re.IGNORECASE), lambda match: '')] preprocess_regexps = [(re.compile(ur'<h3><span style="font-size: small;">&nbsp;Zobacz poprzednie <a href="http://www.benchmark.pl/news/zestawienie/grupa_id/135">Opinie dnia:</a></span>.*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</body>'), (re.compile(ur'Więcej o .*?</ul>', re.DOTALL|re.IGNORECASE), lambda match: '')]
keep_only_tags = [dict(name='div', attrs={'class':['m_zwykly', 'gallery']}), dict(id='article')]
keep_only_tags = [dict(id=['articleHeader', 'articleGallery']), dict(name='div', attrs={'class':['m_zwykly', 'gallery']}), dict(id='article')]
remove_tags_after = dict(id='article') remove_tags_after = dict(id='article')
remove_tags = [dict(name='div', attrs={'class':['comments', 'body', 'kategoria', 'socialize', 'thumb', 'panelOcenaObserwowane', 'categoryNextToSocializeGallery', 'breadcrumb', 'footer', 'moreTopics']}), dict(name='table', attrs = {'background':'http://www.benchmark.pl/uploads/backend_img/a/fotki_newsy/opinie_dnia/bg.png'}), dict(name='table', attrs={'width':'210', 'cellspacing':'1', 'cellpadding':'4', 'border':'0', 'align':'right'})] remove_tags = [dict(name='div', attrs={'class':['comments', 'body', 'kategoria', 'socialize', 'thumb', 'panelOcenaObserwowane', 'categoryNextToSocializeGallery', 'breadcrumb', 'footer', 'moreTopics']}), dict(name='table', attrs = {'background':'http://www.benchmark.pl/uploads/backend_img/a/fotki_newsy/opinie_dnia/bg.png'}), dict(name='table', attrs={'width':'210', 'cellspacing':'1', 'cellpadding':'4', 'border':'0', 'align':'right'})]
INDEX = 'http://www.benchmark.pl' INDEX = 'http://www.benchmark.pl'
feeds = [(u'Aktualności', u'http://www.benchmark.pl/rss/aktualnosci-pliki.xml'), feeds = [(u'Aktualności', u'http://www.benchmark.pl/rss/aktualnosci-pliki.xml'),
(u'Testy i recenzje', u'http://www.benchmark.pl/rss/testy-recenzje-minirecenzje.xml')] (u'Testy i recenzje', u'http://www.benchmark.pl/rss/testy-recenzje-minirecenzje.xml')]
@ -42,46 +45,16 @@ class BenchmarkPl(BasicNewsRecipe):
for r in appendtag.findAll(attrs={'class':'changePage'}): for r in appendtag.findAll(attrs={'class':'changePage'}):
r.extract() r.extract()
def image_article(self, soup, appendtag):
nexturl = soup.find('div', attrs={'class':'preview'})
if nexturl:
nexturl = nexturl.find('a', attrs={'class':'move_next'})
image = appendtag.find('div', attrs={'class':'preview'}).div['style'][16:]
image = self.INDEX + image[:image.find("')")]
appendtag.find(attrs={'class':'preview'}).name='img'
appendtag.find(attrs={'class':'preview'})['src']=image
appendtag.find('a', attrs={'class':'move_next'}).extract()
while nexturl:
nexturl = self.INDEX + nexturl['href']
soup2 = self.index_to_soup(nexturl)
nexturl = soup2.find('a', attrs={'class':'move_next'})
image = soup2.find('div', attrs={'class':'preview'}).div['style'][16:]
image = self.INDEX + image[:image.find("')")]
soup2.find(attrs={'class':'preview'}).name='img'
soup2.find(attrs={'class':'preview'})['src']=image
pagetext = soup2.find('div', attrs={'class':'gallery'})
pagetext.find('div', attrs={'class':'title'}).extract()
pagetext.find('div', attrs={'class':'thumb'}).extract()
pagetext.find('div', attrs={'class':'panelOcenaObserwowane'}).extract()
if nexturl:
pagetext.find('a', attrs={'class':'move_next'}).extract()
pagetext.find('a', attrs={'class':'move_back'}).extract()
comments = pagetext.findAll(text=lambda text:isinstance(text, Comment))
for comment in comments:
comment.extract()
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
def preprocess_html(self, soup): def preprocess_html(self, soup):
if soup.find('div', attrs={'class':'preview'}): self.append_page(soup, soup.body)
self.image_article(soup, soup.body)
else:
self.append_page(soup, soup.body)
for a in soup('a'): for a in soup('a'):
if a.has_key('href') and not a['href'].startswith('http'): if a.has_key('href') and not a['href'].startswith('http'):
a['href'] = self.INDEX + a['href'] a['href'] = self.INDEX + a['href']
for r in soup.findAll(attrs={'class':['comments', 'body']}): for r in soup.findAll(attrs={'class':['comments', 'body']}):
r.extract() r.extract()
tag1 = soup.find(attrs={'class':'inlineGallery'})
if tag1:
for tag in tag1.findAll('li'):
tag['style'] = 'float: left; margin-right: 10px;'
tag1.findNext('p')['style'] = 'clear: both;'
return soup return soup

View File

@ -1,224 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class Comics(BasicNewsRecipe):
title = 'Comics.com'
__author__ = 'Starson17'
description = 'Comics from comics.com. You should customize this recipe to fetch only the comics you are interested in'
language = 'en'
use_embedded_content= False
no_stylesheets = True
oldest_article = 24
remove_javascript = True
cover_url = 'http://www.bsb.lib.tx.us/images/comics.com.gif'
recursions = 0
max_articles_per_feed = 10
num_comics_to_get = 7
simultaneous_downloads = 1
# delay = 3
keep_only_tags = [dict(name='a', attrs={'class':'STR_StripImage'}),
dict(name='div', attrs={'class':'STR_Date'})
]
def parse_index(self):
feeds = []
for title, url in [
("9 Chickweed Lane", "http://comics.com/9_chickweed_lane"),
("Agnes", "http://comics.com/agnes"),
("Alley Oop", "http://comics.com/alley_oop"),
("Andy Capp", "http://comics.com/andy_capp"),
("Arlo & Janis", "http://comics.com/arlo&janis"),
("B.C.", "http://comics.com/bc"),
("Ballard Street", "http://comics.com/ballard_street"),
# ("Ben", "http://comics.com/ben"),
# ("Betty", "http://comics.com/betty"),
# ("Big Nate", "http://comics.com/big_nate"),
# ("Brevity", "http://comics.com/brevity"),
# ("Candorville", "http://comics.com/candorville"),
# ("Cheap Thrills", "http://comics.com/cheap_thrills"),
# ("Committed", "http://comics.com/committed"),
# ("Cow & Boy", "http://comics.com/cow&boy"),
# ("Daddy's Home", "http://comics.com/daddys_home"),
# ("Dog eat Doug", "http://comics.com/dog_eat_doug"),
# ("Drabble", "http://comics.com/drabble"),
# ("F Minus", "http://comics.com/f_minus"),
# ("Family Tree", "http://comics.com/family_tree"),
# ("Farcus", "http://comics.com/farcus"),
# ("Fat Cats Classics", "http://comics.com/fat_cats_classics"),
# ("Ferd'nand", "http://comics.com/ferdnand"),
# ("Flight Deck", "http://comics.com/flight_deck"),
# ("Flo & Friends", "http://comics.com/flo&friends"),
# ("Fort Knox", "http://comics.com/fort_knox"),
# ("Frank & Ernest", "http://comics.com/frank&ernest"),
# ("Frazz", "http://comics.com/frazz"),
# ("Free Range", "http://comics.com/free_range"),
# ("Geech Classics", "http://comics.com/geech_classics"),
# ("Get Fuzzy", "http://comics.com/get_fuzzy"),
# ("Girls & Sports", "http://comics.com/girls&sports"),
# ("Graffiti", "http://comics.com/graffiti"),
# ("Grand Avenue", "http://comics.com/grand_avenue"),
# ("Heathcliff", "http://comics.com/heathcliff"),
# "Heathcliff, a street-smart and mischievous cat with many adventures."
# ("Herb and Jamaal", "http://comics.com/herb_and_jamaal"),
# ("Herman", "http://comics.com/herman"),
# ("Home and Away", "http://comics.com/home_and_away"),
# ("It's All About You", "http://comics.com/its_all_about_you"),
# ("Jane's World", "http://comics.com/janes_world"),
# ("Jump Start", "http://comics.com/jump_start"),
# ("Kit 'N' Carlyle", "http://comics.com/kit_n_carlyle"),
# ("Li'l Abner Classics", "http://comics.com/lil_abner_classics"),
# ("Liberty Meadows", "http://comics.com/liberty_meadows"),
# ("Little Dog Lost", "http://comics.com/little_dog_lost"),
# ("Lola", "http://comics.com/lola"),
# ("Luann", "http://comics.com/luann"),
# ("Marmaduke", "http://comics.com/marmaduke"),
# ("Meg! Classics", "http://comics.com/meg_classics"),
# ("Minimum Security", "http://comics.com/minimum_security"),
# ("Moderately Confused", "http://comics.com/moderately_confused"),
# ("Momma", "http://comics.com/momma"),
# ("Monty", "http://comics.com/monty"),
# ("Motley Classics", "http://comics.com/motley_classics"),
# ("Nancy", "http://comics.com/nancy"),
# ("Natural Selection", "http://comics.com/natural_selection"),
# ("Nest Heads", "http://comics.com/nest_heads"),
# ("Off The Mark", "http://comics.com/off_the_mark"),
# ("On a Claire Day", "http://comics.com/on_a_claire_day"),
# ("One Big Happy Classics", "http://comics.com/one_big_happy_classics"),
# ("Over the Hedge", "http://comics.com/over_the_hedge"),
# ("PC and Pixel", "http://comics.com/pc_and_pixel"),
# ("Peanuts", "http://comics.com/peanuts"),
# ("Pearls Before Swine", "http://comics.com/pearls_before_swine"),
# ("Pickles", "http://comics.com/pickles"),
# ("Prickly City", "http://comics.com/prickly_city"),
# ("Raising Duncan Classics", "http://comics.com/raising_duncan_classics"),
# ("Reality Check", "http://comics.com/reality_check"),
# ("Red & Rover", "http://comics.com/red&rover"),
# ("Rip Haywire", "http://comics.com/rip_haywire"),
# ("Ripley's Believe It or Not!", "http://comics.com/ripleys_believe_it_or_not"),
# ("Rose Is Rose", "http://comics.com/rose_is_rose"),
# ("Rubes", "http://comics.com/rubes"),
# ("Rudy Park", "http://comics.com/rudy_park"),
# ("Scary Gary", "http://comics.com/scary_gary"),
# ("Shirley and Son Classics", "http://comics.com/shirley_and_son_classics"),
# ("Soup To Nutz", "http://comics.com/soup_to_nutz"),
# ("Speed Bump", "http://comics.com/speed_bump"),
# ("Spot The Frog", "http://comics.com/spot_the_frog"),
# ("State of the Union", "http://comics.com/state_of_the_union"),
# ("Strange Brew", "http://comics.com/strange_brew"),
# ("Tarzan Classics", "http://comics.com/tarzan_classics"),
# ("That's Life", "http://comics.com/thats_life"),
# ("The Barn", "http://comics.com/the_barn"),
# ("The Born Loser", "http://comics.com/the_born_loser"),
# ("The Buckets", "http://comics.com/the_buckets"),
# ("The Dinette Set", "http://comics.com/the_dinette_set"),
# ("The Grizzwells", "http://comics.com/the_grizzwells"),
# ("The Humble Stumble", "http://comics.com/the_humble_stumble"),
# ("The Knight Life", "http://comics.com/the_knight_life"),
# ("The Meaning of Lila", "http://comics.com/the_meaning_of_lila"),
# ("The Other Coast", "http://comics.com/the_other_coast"),
# ("The Sunshine Club", "http://comics.com/the_sunshine_club"),
# ("Unstrange Phenomena", "http://comics.com/unstrange_phenomena"),
# ("Watch Your Head", "http://comics.com/watch_your_head"),
# ("Wizard of Id", "http://comics.com/wizard_of_id"),
# ("Working Daze", "http://comics.com/working_daze"),
# ("Working It Out", "http://comics.com/working_it_out"),
# ("Zack Hill", "http://comics.com/zack_hill"),
# ("(Th)ink", "http://comics.com/think"),
# "Tackling the political and social issues impacting communities of color."
# ("Adam Zyglis", "http://comics.com/adam_zyglis"),
# "Known for his excellent caricatures, as well as independent and incisive imagery. "
# ("Andy Singer", "http://comics.com/andy_singer"),
# ("Bill Day", "http://comics.com/bill_day"),
# "Powerful images on sensitive issues."
# ("Bill Schorr", "http://comics.com/bill_schorr"),
# ("Bob Englehart", "http://comics.com/bob_englehart"),
# ("Brian Fairrington", "http://comics.com/brian_fairrington"),
# ("Bruce Beattie", "http://comics.com/bruce_beattie"),
# ("Cam Cardow", "http://comics.com/cam_cardow"),
# ("Chip Bok", "http://comics.com/chip_bok"),
# ("Chris Britt", "http://comics.com/chris_britt"),
# ("Chuck Asay", "http://comics.com/chuck_asay"),
# ("Clay Bennett", "http://comics.com/clay_bennett"),
# ("Daryl Cagle", "http://comics.com/daryl_cagle"),
# ("David Fitzsimmons", "http://comics.com/david_fitzsimmons"),
# "David Fitzsimmons is a new editorial cartoons on comics.com. He is also a staff writer and editorial cartoonist for the Arizona Daily Star. "
# ("Drew Litton", "http://comics.com/drew_litton"),
# "Drew Litton is an artist who is probably best known for his sports cartoons. He received the National Cartoonist Society Sports Cartoon Award for 1993. "
# ("Ed Stein", "http://comics.com/ed_stein"),
# "Winner of the Fischetti Award in 2006 and the Scripps Howard National Journalism Award, 1999, Ed Stein has been the editorial cartoonist for the Rocky Mountain News since 1978. "
# ("Eric Allie", "http://comics.com/eric_allie"),
# "Eric Allie is an editorial cartoonist with the Pioneer Press and CNS News. "
# ("Gary Markstein", "http://comics.com/gary_markstein"),
# ("Gary McCoy", "http://comics.com/gary_mccoy"),
# "Gary McCoy is known for his editorial cartoons, humor and inane ramblings. He is a 2 time nominee for Best Magazine Cartoonist of the Year by the National Cartoonists Society. He resides in Belleville, IL. "
# ("Gary Varvel", "http://comics.com/gary_varvel"),
# ("Henry Payne", "http://comics.com/henry_payne"),
# ("JD Crowe", "http://comics.com/jd_crowe"),
# ("Jeff Parker", "http://comics.com/jeff_parker"),
# ("Jeff Stahler", "http://comics.com/jeff_stahler"),
# ("Jerry Holbert", "http://comics.com/jerry_holbert"),
# ("John Cole", "http://comics.com/john_cole"),
# ("John Darkow", "http://comics.com/john_darkow"),
# "John Darkow is a contributing editorial cartoonist for the Humor Times as well as editoiral cartoonist for the Columbia Daily Tribune, Missouri"
# ("John Sherffius", "http://comics.com/john_sherffius"),
# ("Larry Wright", "http://comics.com/larry_wright"),
# ("Lisa Benson", "http://comics.com/lisa_benson"),
# ("Marshall Ramsey", "http://comics.com/marshall_ramsey"),
# ("Matt Bors", "http://comics.com/matt_bors"),
# ("Michael Ramirez", "http://comics.com/michael_ramirez"),
# ("Mike Keefe", "http://comics.com/mike_keefe"),
# ("Mike Luckovich", "http://comics.com/mike_luckovich"),
# ("MIke Thompson", "http://comics.com/mike_thompson"),
# ("Monte Wolverton", "http://comics.com/monte_wolverton"),
# "Unique mix of perspectives"
# ("Mr. Fish", "http://comics.com/mr_fish"),
# "Side effects may include swelling"
# ("Nate Beeler", "http://comics.com/nate_beeler"),
# "Middle America meets the Beltway."
# ("Nick Anderson", "http://comics.com/nick_anderson"),
# ("Pat Bagley", "http://comics.com/pat_bagley"),
# "Unfair and Totally Unbalanced."
# ("Paul Szep", "http://comics.com/paul_szep"),
# ("RJ Matson", "http://comics.com/rj_matson"),
# "Power cartoons from NYC and Capitol Hill"
# ("Rob Rogers", "http://comics.com/rob_rogers"),
# "Humorous slant on current events"
# ("Robert Ariail", "http://comics.com/robert_ariail"),
# "Clever and unpredictable"
# ("Scott Stantis", "http://comics.com/scott_stantis"),
# ("Signe Wilkinson", "http://comics.com/signe_wilkinson"),
# ("Steve Benson", "http://comics.com/steve_benson"),
# ("Steve Breen", "http://comics.com/steve_breen"),
# ("Steve Kelley", "http://comics.com/steve_kelley"),
# ("Steve Sack", "http://comics.com/steve_sack"),
]:
articles = self.make_links(url)
if articles:
feeds.append((title, articles))
return feeds
def make_links(self, url):
soup = self.index_to_soup(url)
# print 'soup: ', soup
title = ''
current_articles = []
pages = range(1, self.num_comics_to_get+1)
for page in pages:
page_url = url + '/?Page=' + str(page)
soup = self.index_to_soup(page_url)
if soup:
strip_tag = soup.find('a', attrs={'class': 'STR_StripImage'})
if strip_tag:
print 'strip_tag: ', strip_tag
title = strip_tag['title']
print 'title: ', title
current_articles.append({'title': title, 'url': page_url, 'description':'', 'date':''})
current_articles.reverse()
return current_articles
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''

View File

@ -16,7 +16,7 @@ class Computerworld_pl(BasicNewsRecipe):
preprocess_regexps = [(re.compile(u'Zobacz również:', re.IGNORECASE), lambda m: ''), (re.compile(ur'[*]+reklama[*]+', re.IGNORECASE), lambda m: ''),] preprocess_regexps = [(re.compile(u'Zobacz również:', re.IGNORECASE), lambda m: ''), (re.compile(ur'[*]+reklama[*]+', re.IGNORECASE), lambda m: ''),]
keep_only_tags = [dict(id=['szpaltaL', 's2011'])] keep_only_tags = [dict(id=['szpaltaL', 's2011'])]
remove_tags_after = dict(name='div', attrs={'class':'tresc'}) remove_tags_after = dict(name='div', attrs={'class':'tresc'})
remove_tags = [dict(attrs={'class':['nnav', 'rMobi', 'tagi', 'rec']}),] remove_tags = [dict(attrs={'class':['nnav', 'rMobi', 'tagi', 'rec']}), dict(name='a', attrs={'target':'_blank'})]
feeds = [(u'Wiadomo\u015bci', u'http://rssout.idg.pl/cw/news_iso.xml')] feeds = [(u'Wiadomo\u015bci', u'http://rssout.idg.pl/cw/news_iso.xml')]
def skip_ad_pages(self, soup): def skip_ad_pages(self, soup):

View File

@ -15,6 +15,7 @@ class CoNowegoPl(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
remove_empty_feeds = True remove_empty_feeds = True
use_embedded_content = False use_embedded_content = False
ignore_duplicate_articles = {'title', 'url'}
keep_only_tags = [dict(name='div', attrs={'class':'news_list single_view'})] keep_only_tags = [dict(name='div', attrs={'class':'news_list single_view'})]
remove_tags = [dict(name='div', attrs={'class':['ni_bottom', 'ni_rank', 'ni_date']})] remove_tags = [dict(name='div', attrs={'class':['ni_bottom', 'ni_rank', 'ni_date']})]
feeds = [(u'Aktualno\u015bci', u'http://www.conowego.pl/rss/aktualnosci-5/?type=100'), (u'Gaming', u'http://www.conowego.pl/rss/gaming-6/?type=100'), (u'Porady', u'http://www.conowego.pl/rss/porady-3/?type=100'), (u'Testy', u'http://www.conowego.pl/rss/testy-2/?type=100')] feeds = [(u'Aktualno\u015bci', u'http://www.conowego.pl/rss/aktualnosci-5/?type=100'), (u'Gaming', u'http://www.conowego.pl/rss/gaming-6/?type=100'), (u'Porady', u'http://www.conowego.pl/rss/porady-3/?type=100'), (u'Testy', u'http://www.conowego.pl/rss/testy-2/?type=100')]

View File

@ -1,4 +1,5 @@
#!/usr/bin/env python #!/usr/bin/env python
# vim:fileencoding=UTF-8
__license__ = 'GPL v3' __license__ = 'GPL v3'
__author__ = 'Mori' __author__ = 'Mori'
@ -14,7 +15,7 @@ class DziennikInternautowRecipe(BasicNewsRecipe):
__author__ = 'Mori' __author__ = 'Mori'
language = 'pl' language = 'pl'
title = u'Dziennik Internautow' title = u'Dziennik Internautów'
publisher = u'Dziennik Internaut\u00f3w Sp. z o.o.' publisher = u'Dziennik Internaut\u00f3w Sp. z o.o.'
description = u'Internet w \u017cyciu i biznesie. Porady, wywiady, interwencje, bezpiecze\u0144stwo w Sieci, technologia.' description = u'Internet w \u017cyciu i biznesie. Porady, wywiady, interwencje, bezpiecze\u0144stwo w Sieci, technologia.'

View File

@ -16,7 +16,7 @@ class DziennikLodzki(BasicNewsRecipe):
ignore_duplicate_articles = {'title', 'url'} ignore_duplicate_articles = {'title', 'url'}
#preprocess_regexps = [(re.compile(ur'<b>Czytaj także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur',<b>Czytaj też:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>Zobacz także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<center><h4><a.*?</a></h4></center>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TEŻ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ WIĘCEJ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TAKŻE:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>\* CZYTAJ KONIECZNIE:.*', re.DOTALL), lambda match: '</body>'), (re.compile(ur'<b>Nasze serwisy:</b>.*', re.DOTALL), lambda match: '</body>') ] #preprocess_regexps = [(re.compile(ur'<b>Czytaj także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur',<b>Czytaj też:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>Zobacz także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<center><h4><a.*?</a></h4></center>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TEŻ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ WIĘCEJ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TAKŻE:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>\* CZYTAJ KONIECZNIE:.*', re.DOTALL), lambda match: '</body>'), (re.compile(ur'<b>Nasze serwisy:</b>.*', re.DOTALL), lambda match: '</body>') ]
remove_tags_after= dict(attrs={'src':'http://nm.dz.com.pl/dz.png'}) remove_tags_after= dict(attrs={'src':'http://nm.dz.com.pl/dz.png'})
remove_tags=[dict(id='mat-podobne'), dict(name='a', attrs={'class':'czytajDalej'}), dict(attrs={'src':'http://nm.dz.com.pl/dz.png'})] remove_tags=[dict(id='mat-podobne'), dict(name='a', attrs={'class':'czytajDalej'}), dict(attrs={'src':'http://nm.dz.com.pl/dz.png'}), dict(name='a', attrs={'href':'http://www.dzienniklodzki.pl/piano'})]
feeds = [(u'Na sygnale', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_nasygnale.xml?201302'), (u'\u0141\xf3d\u017a', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_lodz.xml?201302'), (u'Opinie', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_opinie.xml?201302'), (u'Pieni\u0105dze', u'http://dzienniklodzki.feedsportal.com/c/32980/f/533763/index.rss?201302'), (u'Kultura', u'http://dzienniklodzki.feedsportal.com/c/32980/f/533762/index.rss?201302'), (u'Sport', u'http://dzienniklodzki.feedsportal.com/c/32980/f/533761/index.rss?201302'), (u'Akcje', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_akcje.xml?201302'), (u'M\xf3j Reporter', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_mojreporter.xml?201302'), (u'Studni\xf3wki', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_studniowki.xml?201302'), (u'Kraj', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_kraj.xml?201302'), (u'Zdrowie', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_zdrowie.xml?201302')] feeds = [(u'Na sygnale', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_nasygnale.xml?201302'), (u'\u0141\xf3d\u017a', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_lodz.xml?201302'), (u'Opinie', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_opinie.xml?201302'), (u'Pieni\u0105dze', u'http://dzienniklodzki.feedsportal.com/c/32980/f/533763/index.rss?201302'), (u'Kultura', u'http://dzienniklodzki.feedsportal.com/c/32980/f/533762/index.rss?201302'), (u'Sport', u'http://dzienniklodzki.feedsportal.com/c/32980/f/533761/index.rss?201302'), (u'Akcje', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_akcje.xml?201302'), (u'M\xf3j Reporter', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_mojreporter.xml?201302'), (u'Studni\xf3wki', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_studniowki.xml?201302'), (u'Kraj', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_kraj.xml?201302'), (u'Zdrowie', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_zdrowie.xml?201302')]

View File

@ -16,7 +16,7 @@ class DziennikZachodni(BasicNewsRecipe):
ignore_duplicate_articles = {'title', 'url'} ignore_duplicate_articles = {'title', 'url'}
#preprocess_regexps = [(re.compile(ur'<b>Czytaj także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur',<b>Czytaj też:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>Zobacz także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<center><h4><a.*?</a></h4></center>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TEŻ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ WIĘCEJ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TAKŻE:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>\* CZYTAJ KONIECZNIE:.*', re.DOTALL), lambda match: '</body>'), (re.compile(ur'<b>Nasze serwisy:</b>.*', re.DOTALL), lambda match: '</body>') ] #preprocess_regexps = [(re.compile(ur'<b>Czytaj także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur',<b>Czytaj też:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>Zobacz także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<center><h4><a.*?</a></h4></center>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TEŻ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ WIĘCEJ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TAKŻE:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>\* CZYTAJ KONIECZNIE:.*', re.DOTALL), lambda match: '</body>'), (re.compile(ur'<b>Nasze serwisy:</b>.*', re.DOTALL), lambda match: '</body>') ]
remove_tags_after= dict(attrs={'src':'http://nm.dz.com.pl/dz.png'}) remove_tags_after= dict(attrs={'src':'http://nm.dz.com.pl/dz.png'})
remove_tags=[dict(id='mat-podobne'), dict(name='a', attrs={'class':'czytajDalej'}), dict(attrs={'src':'http://nm.dz.com.pl/dz.png'}), dict(attrs={'href':'http://www.dziennikzachodni.pl/piano'})] remove_tags=[dict(id='mat-podobne'), dict(name='a', attrs={'class':'czytajDalej'}), dict(attrs={'src':'http://nm.dz.com.pl/dz.png'}), dict(attrs={'href':'http://www.dziennikzachodni.pl/piano'}), dict(name='aside')]
feeds = [(u'Wszystkie', u'http://dziennikzachodni.feedsportal.com/c/32980/f/533764/index.rss?201302'), (u'Wiadomo\u015bci', u'http://dziennikzachodni.feedsportal.com/c/32980/f/533765/index.rss?201302'), (u'Regiony', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_regiony.xml?201302'), (u'Opinie', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_regiony.xml?201302'), (u'Blogi', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_blogi.xml?201302'), (u'Serwisy', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_serwisy.xml?201302'), (u'Sport', u'http://dziennikzachodni.feedsportal.com/c/32980/f/533766/index.rss?201302'), (u'M\xf3j Reporter', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_mojreporter.xml?201302'), (u'Na narty', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_nanarty.xml?201302'), (u'Drogi', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_drogi.xml?201302'), (u'Pieni\u0105dze', u'http://dziennikzachodni.feedsportal.com/c/32980/f/533768/index.rss?201302')] feeds = [(u'Wszystkie', u'http://dziennikzachodni.feedsportal.com/c/32980/f/533764/index.rss?201302'), (u'Wiadomo\u015bci', u'http://dziennikzachodni.feedsportal.com/c/32980/f/533765/index.rss?201302'), (u'Regiony', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_regiony.xml?201302'), (u'Opinie', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_regiony.xml?201302'), (u'Blogi', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_blogi.xml?201302'), (u'Serwisy', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_serwisy.xml?201302'), (u'Sport', u'http://dziennikzachodni.feedsportal.com/c/32980/f/533766/index.rss?201302'), (u'M\xf3j Reporter', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_mojreporter.xml?201302'), (u'Na narty', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_nanarty.xml?201302'), (u'Drogi', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_drogi.xml?201302'), (u'Pieni\u0105dze', u'http://dziennikzachodni.feedsportal.com/c/32980/f/533768/index.rss?201302')]

View File

@ -16,6 +16,7 @@ class EchoDnia(BasicNewsRecipe):
max_articles_per_feed = 100 max_articles_per_feed = 100
remove_empty_feeds = True remove_empty_feeds = True
no_stylesheets = True no_stylesheets = True
use_embedded_content = False
ignore_duplicate_articles = {'title', 'url'} ignore_duplicate_articles = {'title', 'url'}
preprocess_regexps = [(re.compile(ur'Czytaj:.*?</a>', re.DOTALL), lambda match: ''), (re.compile(ur'Przeczytaj także:.*?</a>', re.DOTALL|re.IGNORECASE), lambda match: ''), preprocess_regexps = [(re.compile(ur'Czytaj:.*?</a>', re.DOTALL), lambda match: ''), (re.compile(ur'Przeczytaj także:.*?</a>', re.DOTALL|re.IGNORECASE), lambda match: ''),

View File

@ -12,7 +12,7 @@ class swiatczytnikow(BasicNewsRecipe):
__author__ = u'Artur Stachecki' __author__ = u'Artur Stachecki'
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 100
remove_empty_feeds = True
remove_tags = [dict(name = 'div', attrs = {'class' : 'feedflare'})] remove_tags = [dict(name = 'div', attrs = {'class' : 'feedflare'})]
feeds = [(u'Wpisy', u'http://feeds.feedburner.com/Ekundelekpl?format=xml')] feeds = [(u'Wpisy', u'http://feeds.feedburner.com/Ekundelekpl?format=xml')]

View File

@ -11,6 +11,7 @@ class eMuzyka(BasicNewsRecipe):
cover_url='http://s.emuzyka.pl/img/emuzyka_invert_small.jpg' cover_url='http://s.emuzyka.pl/img/emuzyka_invert_small.jpg'
no_stylesheets = True no_stylesheets = True
oldest_article = 7 oldest_article = 7
remove_empty_feeds = True
max_articles_per_feed = 100 max_articles_per_feed = 100
remove_attributes = ['style'] remove_attributes = ['style']
keep_only_tags=[dict(name='div', attrs={'id':'news_container'}), dict(name='h3'), dict(name='div', attrs={'class':'review_text'})] keep_only_tags=[dict(name='div', attrs={'id':'news_container'}), dict(name='h3'), dict(name='div', attrs={'class':'review_text'})]

View File

@ -9,6 +9,7 @@ class Gazeta_Wyborcza(BasicNewsRecipe):
description = 'Wiadomości z Polski i ze świata. Serwisy tematyczne i lokalne w 20 miastach.' description = 'Wiadomości z Polski i ze świata. Serwisy tematyczne i lokalne w 20 miastach.'
category = 'newspaper' category = 'newspaper'
publication_type = 'newspaper' publication_type = 'newspaper'
#encoding = 'iso-8859-2'
masthead_url = 'http://bi.gazeta.pl/im/5/10285/z10285445AA.jpg' masthead_url = 'http://bi.gazeta.pl/im/5/10285/z10285445AA.jpg'
INDEX = 'http://wyborcza.pl' INDEX = 'http://wyborcza.pl'
remove_empty_feeds = True remove_empty_feeds = True
@ -16,6 +17,7 @@ class Gazeta_Wyborcza(BasicNewsRecipe):
max_articles_per_feed = 100 max_articles_per_feed = 100
remove_javascript = True remove_javascript = True
no_stylesheets = True no_stylesheets = True
use_embedded_content = False
ignore_duplicate_articles = {'title', 'url'} ignore_duplicate_articles = {'title', 'url'}
remove_tags_before = dict(id='k0') remove_tags_before = dict(id='k0')
remove_tags_after = dict(id='banP4') remove_tags_after = dict(id='banP4')
@ -24,7 +26,19 @@ class Gazeta_Wyborcza(BasicNewsRecipe):
(u'Wyborcza.biz', u'http://wyborcza.biz/pub/rss/wyborcza_biz_wiadomosci.htm'), (u'Wyborcza.biz', u'http://wyborcza.biz/pub/rss/wyborcza_biz_wiadomosci.htm'),
(u'Komentarze', u'http://rss.feedsportal.com/c/32739/f/530312/index.rss'), (u'Komentarze', u'http://rss.feedsportal.com/c/32739/f/530312/index.rss'),
(u'Kultura', u'http://rss.gazeta.pl/pub/rss/gazetawyborcza_kultura.xml'), (u'Kultura', u'http://rss.gazeta.pl/pub/rss/gazetawyborcza_kultura.xml'),
(u'Nauka', u'http://rss.feedsportal.com/c/32739/f/530269/index.rss'), (u'Opinie', u'http://rss.gazeta.pl/pub/rss/opinie.xml'), (u'Gazeta \u015awi\u0105teczna', u'http://rss.feedsportal.com/c/32739/f/530431/index.rss'), (u'Du\u017cy Format', u'http://rss.feedsportal.com/c/32739/f/530265/index.rss'), (u'Witamy w Polsce', u'http://rss.feedsportal.com/c/32739/f/530476/index.rss'), (u'M\u0119ska Muzyka', u'http://rss.feedsportal.com/c/32739/f/530337/index.rss'), (u'Lata Lec\u0105', u'http://rss.feedsportal.com/c/32739/f/530326/index.rss'), (u'Solidarni z Tybetem', u'http://rss.feedsportal.com/c/32739/f/530461/index.rss'), (u'W pon. - \u017bakowski', u'http://rss.feedsportal.com/c/32739/f/530491/index.rss'), (u'We wt. - Kolenda-Zalewska', u'http://rss.feedsportal.com/c/32739/f/530310/index.rss'), (u'\u015aroda w \u015brod\u0119', u'http://rss.feedsportal.com/c/32739/f/530428/index.rss'), (u'W pi\u0105tek - Olejnik', u'http://rss.feedsportal.com/c/32739/f/530364/index.rss'), (u'Nekrologi', u'http://rss.feedsportal.com/c/32739/f/530358/index.rss') (u'Nauka', u'http://rss.feedsportal.com/c/32739/f/530269/index.rss'),
(u'Opinie', u'http://rss.gazeta.pl/pub/rss/opinie.xml'),
(u'Gazeta \u015awi\u0105teczna', u'http://rss.feedsportal.com/c/32739/f/530431/index.rss'),
(u'Du\u017cy Format', u'http://rss.feedsportal.com/c/32739/f/530265/index.rss'),
(u'Witamy w Polsce', u'http://rss.feedsportal.com/c/32739/f/530476/index.rss'),
(u'M\u0119ska Muzyka', u'http://rss.feedsportal.com/c/32739/f/530337/index.rss'),
(u'Lata Lec\u0105', u'http://rss.feedsportal.com/c/32739/f/530326/index.rss'),
(u'Solidarni z Tybetem', u'http://rss.feedsportal.com/c/32739/f/530461/index.rss'),
(u'W pon. - \u017bakowski', u'http://rss.feedsportal.com/c/32739/f/530491/index.rss'),
(u'We wt. - Kolenda-Zalewska', u'http://rss.feedsportal.com/c/32739/f/530310/index.rss'),
(u'\u015aroda w \u015brod\u0119', u'http://rss.feedsportal.com/c/32739/f/530428/index.rss'),
(u'W pi\u0105tek - Olejnik', u'http://rss.feedsportal.com/c/32739/f/530364/index.rss'),
(u'Nekrologi', u'http://rss.feedsportal.com/c/32739/f/530358/index.rss')
] ]
def skip_ad_pages(self, soup): def skip_ad_pages(self, soup):

View File

@ -31,6 +31,14 @@ class Gildia(BasicNewsRecipe):
for link in content.findAll(name='a'): for link in content.findAll(name='a'):
if 'fragment' in link['href']: if 'fragment' in link['href']:
return self.index_to_soup(link['href'], raw=True) return self.index_to_soup(link['href'], raw=True)
if 'relacj' in soup.title.string.lower():
for link in content.findAll(name='a'):
if 'relacj' in link['href']:
return self.index_to_soup(link['href'], raw=True)
if 'wywiad' in soup.title.string.lower():
for link in content.findAll(name='a'):
if 'wywiad' in link['href']:
return self.index_to_soup(link['href'], raw=True)
def preprocess_html(self, soup): def preprocess_html(self, soup):

View File

@ -16,7 +16,7 @@ class GlosWielkopolski(BasicNewsRecipe):
ignore_duplicate_articles = {'title', 'url'} ignore_duplicate_articles = {'title', 'url'}
#preprocess_regexps = [(re.compile(ur'<b>Czytaj także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur',<b>Czytaj też:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>Zobacz także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<center><h4><a.*?</a></h4></center>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TEŻ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ WIĘCEJ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TAKŻE:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>\* CZYTAJ KONIECZNIE:.*', re.DOTALL), lambda match: '</body>'), (re.compile(ur'<b>Nasze serwisy:</b>.*', re.DOTALL), lambda match: '</body>') ] #preprocess_regexps = [(re.compile(ur'<b>Czytaj także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur',<b>Czytaj też:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>Zobacz także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<center><h4><a.*?</a></h4></center>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TEŻ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ WIĘCEJ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TAKŻE:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>\* CZYTAJ KONIECZNIE:.*', re.DOTALL), lambda match: '</body>'), (re.compile(ur'<b>Nasze serwisy:</b>.*', re.DOTALL), lambda match: '</body>') ]
remove_tags_after= dict(attrs={'src':'http://nm.dz.com.pl/dz.png'}) remove_tags_after= dict(attrs={'src':'http://nm.dz.com.pl/dz.png'})
remove_tags=[dict(id='mat-podobne'), dict(name='a', attrs={'class':'czytajDalej'}), dict(attrs={'src':'http://nm.dz.com.pl/dz.png'})] remove_tags=[dict(id='mat-podobne'), dict(name='a', attrs={'class':'czytajDalej'}), dict(attrs={'src':'http://nm.dz.com.pl/dz.png'}), dict(name='a', attrs={'href', 'http://www.gloswielkopolski.pl/newsletter/'})]
feeds = [(u'Wszystkie', u'http://gloswielkopolski.feedsportal.com/c/32980/f/533779/index.rss?201302'), (u'Wiadomo\u015bci', u'http://gloswielkopolski.feedsportal.com/c/32980/f/533780/index.rss?201302'), (u'Sport', u'http://gloswielkopolski.feedsportal.com/c/32980/f/533781/index.rss?201302'), (u'Kultura', u'http://gloswielkopolski.feedsportal.com/c/32980/f/533782/index.rss?201302'), (u'Porady', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_porady.xml?201302'), (u'Blogi', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_blogi.xml?201302'), (u'Nasze akcje', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_naszeakcje.xml?201302'), (u'Opinie', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_opinie.xml?201302'), (u'Magazyn', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_magazyn.xml?201302')] feeds = [(u'Wszystkie', u'http://gloswielkopolski.feedsportal.com/c/32980/f/533779/index.rss?201302'), (u'Wiadomo\u015bci', u'http://gloswielkopolski.feedsportal.com/c/32980/f/533780/index.rss?201302'), (u'Sport', u'http://gloswielkopolski.feedsportal.com/c/32980/f/533781/index.rss?201302'), (u'Kultura', u'http://gloswielkopolski.feedsportal.com/c/32980/f/533782/index.rss?201302'), (u'Porady', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_porady.xml?201302'), (u'Blogi', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_blogi.xml?201302'), (u'Nasze akcje', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_naszeakcje.xml?201302'), (u'Opinie', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_opinie.xml?201302'), (u'Magazyn', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_magazyn.xml?201302')]

View File

@ -1,229 +1,443 @@
__license__ = 'GPL v3'
__copyright__ = 'Copyright 2010 Starson17'
'''
www.gocomics.com
'''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
import re
class GoComics(BasicNewsRecipe):
class Comics(BasicNewsRecipe): title = 'Go Comics'
title = 'Comics.com'
__author__ = 'Starson17' __author__ = 'Starson17'
description = 'Comics from comics.com. You should customize this recipe to fetch only the comics you are interested in' __version__ = '1.06'
__date__ = '07 June 2011'
description = u'200+ Comics - Customize for more days/comics: Defaults to 7 days, 25 comics - 20 general, 5 editorial.'
category = 'news, comics'
language = 'en' language = 'en'
use_embedded_content= False use_embedded_content= False
no_stylesheets = True no_stylesheets = True
oldest_article = 24
remove_javascript = True remove_javascript = True
cover_url = 'http://www.bsb.lib.tx.us/images/comics.com.gif' remove_attributes = ['style']
recursions = 0
max_articles_per_feed = 10
num_comics_to_get = 7
simultaneous_downloads = 1
# delay = 3
keep_only_tags = [dict(name='h1'), ####### USER PREFERENCES - COMICS, IMAGE SIZE AND NUMBER OF COMICS TO RETRIEVE ########
dict(name='p', attrs={'class':'feature_item'}) # num_comics_to_get - I've tried up to 99 on Calvin&Hobbes
num_comics_to_get = 1
# comic_size 300 is small, 600 is medium, 900 is large, 1500 is extra-large
comic_size = 900
# CHOOSE COMIC STRIPS BELOW - REMOVE COMMENT '# ' FROM IN FRONT OF DESIRED STRIPS
# Please do not overload their servers by selecting all comics and 1000 strips from each!
conversion_options = {'linearize_tables' : True
, 'comment' : description
, 'tags' : category
, 'language' : language
}
keep_only_tags = [dict(name='div', attrs={'class':['feature','banner']}),
] ]
remove_tags = [dict(name='a', attrs={'class':['beginning','prev','cal','next','newest']}),
dict(name='div', attrs={'class':['tag-wrapper']}),
dict(name='a', attrs={'href':re.compile(r'.*mutable_[0-9]+', re.IGNORECASE)}),
dict(name='img', attrs={'src':re.compile(r'.*mutable_[0-9]+', re.IGNORECASE)}),
dict(name='ul', attrs={'class':['share-nav','feature-nav']}),
]
def get_browser(self):
br = BasicNewsRecipe.get_browser(self)
br.addheaders = [('Referer','http://www.gocomics.com/')]
return br
def parse_index(self): def parse_index(self):
feeds = [] feeds = []
for title, url in [ for title, url in [
("9 Chickweed Lane", "http://gocomics.com/9_chickweed_lane"), #(u"2 Cows and a Chicken", u"http://www.gocomics.com/2cowsandachicken"),
("Agnes", "http://gocomics.com/agnes"), #(u"9 Chickweed Lane", u"http://www.gocomics.com/9chickweedlane"),
("Alley Oop", "http://gocomics.com/alley_oop"), #(u"Adam At Home", u"http://www.gocomics.com/adamathome"),
("Andy Capp", "http://gocomics.com/andy_capp"), #(u"Agnes", u"http://www.gocomics.com/agnes"),
("Arlo & Janis", "http://gocomics.com/arlo&janis"), #(u"Alley Oop", u"http://www.gocomics.com/alleyoop"),
("B.C.", "http://gocomics.com/bc"), #(u"Andy Capp", u"http://www.gocomics.com/andycapp"),
("Ballard Street", "http://gocomics.com/ballard_street"), (u"Animal Crackers", u"http://www.gocomics.com/animalcrackers"),
# ("Ben", "http://comics.com/ben"), #(u"Annie", u"http://www.gocomics.com/annie"),
# ("Betty", "http://comics.com/betty"), #(u"Arlo & Janis", u"http://www.gocomics.com/arloandjanis"),
# ("Big Nate", "http://comics.com/big_nate"), #(u"Ask Shagg", u"http://www.gocomics.com/askshagg"),
# ("Brevity", "http://comics.com/brevity"), (u"B.C.", u"http://www.gocomics.com/bc"),
# ("Candorville", "http://comics.com/candorville"), #(u"Back in the Day", u"http://www.gocomics.com/backintheday"),
# ("Cheap Thrills", "http://comics.com/cheap_thrills"), #(u"Bad Reporter", u"http://www.gocomics.com/badreporter"),
# ("Committed", "http://comics.com/committed"), (u"Baldo", u"http://www.gocomics.com/baldo"),
# ("Cow & Boy", "http://comics.com/cow&boy"), #(u"Ballard Street", u"http://www.gocomics.com/ballardstreet"),
# ("Daddy's Home", "http://comics.com/daddys_home"), #(u"Barkeater Lake", u"http://www.gocomics.com/barkeaterlake"),
# ("Dog eat Doug", "http://comics.com/dog_eat_doug"), #(u"Basic Instructions", u"http://www.gocomics.com/basicinstructions"),
# ("Drabble", "http://comics.com/drabble"), #(u"Ben", u"http://www.gocomics.com/ben"),
# ("F Minus", "http://comics.com/f_minus"), #(u"Betty", u"http://www.gocomics.com/betty"),
# ("Family Tree", "http://comics.com/family_tree"), #(u"Bewley", u"http://www.gocomics.com/bewley"),
# ("Farcus", "http://comics.com/farcus"), #(u"Big Nate", u"http://www.gocomics.com/bignate"),
# ("Fat Cats Classics", "http://comics.com/fat_cats_classics"), #(u"Big Top", u"http://www.gocomics.com/bigtop"),
# ("Ferd'nand", "http://comics.com/ferdnand"), #(u"Biographic", u"http://www.gocomics.com/biographic"),
# ("Flight Deck", "http://comics.com/flight_deck"), #(u"Birdbrains", u"http://www.gocomics.com/birdbrains"),
# ("Flo & Friends", "http://comics.com/flo&friends"), #(u"Bleeker: The Rechargeable Dog", u"http://www.gocomics.com/bleeker"),
# ("Fort Knox", "http://comics.com/fort_knox"), #(u"Bliss", u"http://www.gocomics.com/bliss"),
# ("Frank & Ernest", "http://comics.com/frank&ernest"), #(u"Bloom County", u"http://www.gocomics.com/bloomcounty"),
# ("Frazz", "http://comics.com/frazz"), #(u"Bo Nanas", u"http://www.gocomics.com/bonanas"),
# ("Free Range", "http://comics.com/free_range"), #(u"Bob the Squirrel", u"http://www.gocomics.com/bobthesquirrel"),
# ("Geech Classics", "http://comics.com/geech_classics"), #(u"Boomerangs", u"http://www.gocomics.com/boomerangs"),
# ("Get Fuzzy", "http://comics.com/get_fuzzy"), #(u"Bottomliners", u"http://www.gocomics.com/bottomliners"),
# ("Girls & Sports", "http://comics.com/girls&sports"), (u"Bound and Gagged", u"http://www.gocomics.com/boundandgagged"),
# ("Graffiti", "http://comics.com/graffiti"), #(u"Brainwaves", u"http://www.gocomics.com/brainwaves"),
# ("Grand Avenue", "http://comics.com/grand_avenue"), #(u"Brenda Starr", u"http://www.gocomics.com/brendastarr"),
# ("Heathcliff", "http://comics.com/heathcliff"), #(u"Brevity", u"http://www.gocomics.com/brevity"),
# "Heathcliff, a street-smart and mischievous cat with many adventures." #(u"Brewster Rockit", u"http://www.gocomics.com/brewsterrockit"),
# ("Herb and Jamaal", "http://comics.com/herb_and_jamaal"), (u"Broom Hilda", u"http://www.gocomics.com/broomhilda"),
# ("Herman", "http://comics.com/herman"), (u"Calvin and Hobbes", u"http://www.gocomics.com/calvinandhobbes"),
# ("Home and Away", "http://comics.com/home_and_away"), #(u"Candorville", u"http://www.gocomics.com/candorville"),
# ("It's All About You", "http://comics.com/its_all_about_you"), #(u"Cathy", u"http://www.gocomics.com/cathy"),
# ("Jane's World", "http://comics.com/janes_world"), #(u"C'est la Vie", u"http://www.gocomics.com/cestlavie"),
# ("Jump Start", "http://comics.com/jump_start"), #(u"Cheap Thrills", u"http://www.gocomics.com/cheapthrills"),
# ("Kit 'N' Carlyle", "http://comics.com/kit_n_carlyle"), #(u"Chuckle Bros", u"http://www.gocomics.com/chucklebros"),
# ("Li'l Abner Classics", "http://comics.com/lil_abner_classics"), #(u"Citizen Dog", u"http://www.gocomics.com/citizendog"),
# ("Liberty Meadows", "http://comics.com/liberty_meadows"), #(u"Cleats", u"http://www.gocomics.com/cleats"),
# ("Little Dog Lost", "http://comics.com/little_dog_lost"), #(u"Close to Home", u"http://www.gocomics.com/closetohome"),
# ("Lola", "http://comics.com/lola"), #(u"Committed", u"http://www.gocomics.com/committed"),
# ("Luann", "http://comics.com/luann"), #(u"Compu-toon", u"http://www.gocomics.com/compu-toon"),
# ("Marmaduke", "http://comics.com/marmaduke"), #(u"Cornered", u"http://www.gocomics.com/cornered"),
# ("Meg! Classics", "http://comics.com/meg_classics"), #(u"Cow & Boy", u"http://www.gocomics.com/cow&boy"),
# ("Minimum Security", "http://comics.com/minimum_security"), #(u"Cul de Sac", u"http://www.gocomics.com/culdesac"),
# ("Moderately Confused", "http://comics.com/moderately_confused"), #(u"Daddy's Home", u"http://www.gocomics.com/daddyshome"),
# ("Momma", "http://comics.com/momma"), #(u"Deep Cover", u"http://www.gocomics.com/deepcover"),
# ("Monty", "http://comics.com/monty"), #(u"Dick Tracy", u"http://www.gocomics.com/dicktracy"),
# ("Motley Classics", "http://comics.com/motley_classics"), #(u"Dog Eat Doug", u"http://www.gocomics.com/dogeatdoug"),
# ("Nancy", "http://comics.com/nancy"), #(u"Domestic Abuse", u"http://www.gocomics.com/domesticabuse"),
# ("Natural Selection", "http://comics.com/natural_selection"), #(u"Doodles", u"http://www.gocomics.com/doodles"),
# ("Nest Heads", "http://comics.com/nest_heads"), #(u"Doonesbury", u"http://www.gocomics.com/doonesbury"),
# ("Off The Mark", "http://comics.com/off_the_mark"), #(u"Drabble", u"http://www.gocomics.com/drabble"),
# ("On a Claire Day", "http://comics.com/on_a_claire_day"), #(u"Eek!", u"http://www.gocomics.com/eek"),
# ("One Big Happy Classics", "http://comics.com/one_big_happy_classics"), #(u"F Minus", u"http://www.gocomics.com/fminus"),
# ("Over the Hedge", "http://comics.com/over_the_hedge"), #(u"Family Tree", u"http://www.gocomics.com/familytree"),
# ("PC and Pixel", "http://comics.com/pc_and_pixel"), #(u"Farcus", u"http://www.gocomics.com/farcus"),
# ("Peanuts", "http://comics.com/peanuts"), #(u"Fat Cats Classics", u"http://www.gocomics.com/fatcatsclassics"),
# ("Pearls Before Swine", "http://comics.com/pearls_before_swine"), #(u"Ferd'nand", u"http://www.gocomics.com/ferdnand"),
# ("Pickles", "http://comics.com/pickles"), #(u"Flight Deck", u"http://www.gocomics.com/flightdeck"),
# ("Prickly City", "http://comics.com/prickly_city"), #(u"Flo and Friends", u"http://www.gocomics.com/floandfriends"),
# ("Raising Duncan Classics", "http://comics.com/raising_duncan_classics"), (u"For Better or For Worse", u"http://www.gocomics.com/forbetterorforworse"),
# ("Reality Check", "http://comics.com/reality_check"), #(u"For Heaven's Sake", u"http://www.gocomics.com/forheavenssake"),
# ("Red & Rover", "http://comics.com/red&rover"), #(u"Fort Knox", u"http://www.gocomics.com/fortknox"),
# ("Rip Haywire", "http://comics.com/rip_haywire"), #(u"FoxTrot Classics", u"http://www.gocomics.com/foxtrotclassics"),
# ("Ripley's Believe It or Not!", "http://comics.com/ripleys_believe_it_or_not"), #(u"FoxTrot", u"http://www.gocomics.com/foxtrot"),
# ("Rose Is Rose", "http://comics.com/rose_is_rose"), (u"Frank & Ernest", u"http://www.gocomics.com/frankandernest"),
# ("Rubes", "http://comics.com/rubes"), #(u"Frazz", u"http://www.gocomics.com/frazz"),
# ("Rudy Park", "http://comics.com/rudy_park"), #(u"Fred Basset", u"http://www.gocomics.com/fredbasset"),
# ("Scary Gary", "http://comics.com/scary_gary"), #(u"Free Range", u"http://www.gocomics.com/freerange"),
# ("Shirley and Son Classics", "http://comics.com/shirley_and_son_classics"), #(u"Frog Applause", u"http://www.gocomics.com/frogapplause"),
# ("Soup To Nutz", "http://comics.com/soup_to_nutz"), #(u"Garfield Minus Garfield", u"http://www.gocomics.com/garfieldminusgarfield"),
# ("Speed Bump", "http://comics.com/speed_bump"), (u"Garfield", u"http://www.gocomics.com/garfield"),
# ("Spot The Frog", "http://comics.com/spot_the_frog"), #(u"Gasoline Alley", u"http://www.gocomics.com/gasolinealley"),
# ("State of the Union", "http://comics.com/state_of_the_union"), #(u"Geech Classics", u"http://www.gocomics.com/geechclassics"),
# ("Strange Brew", "http://comics.com/strange_brew"), (u"Get Fuzzy", u"http://www.gocomics.com/getfuzzy"),
# ("Tarzan Classics", "http://comics.com/tarzan_classics"), #(u"Gil Thorp", u"http://www.gocomics.com/gilthorp"),
# ("That's Life", "http://comics.com/thats_life"), #(u"Ginger Meggs", u"http://www.gocomics.com/gingermeggs"),
# ("The Barn", "http://comics.com/the_barn"), #(u"Girls & Sports", u"http://www.gocomics.com/girlsandsports"),
# ("The Born Loser", "http://comics.com/the_born_loser"), #(u"Graffiti", u"http://www.gocomics.com/graffiti"),
# ("The Buckets", "http://comics.com/the_buckets"), #(u"Grand Avenue", u"http://www.gocomics.com/grandavenue"),
# ("The Dinette Set", "http://comics.com/the_dinette_set"), #(u"Haiku Ewe", u"http://www.gocomics.com/haikuewe"),
# ("The Grizzwells", "http://comics.com/the_grizzwells"), #(u"Heart of the City", u"http://www.gocomics.com/heartofthecity"),
# ("The Humble Stumble", "http://comics.com/the_humble_stumble"), #(u"Herb and Jamaal", u"http://www.gocomics.com/herbandjamaal"),
# ("The Knight Life", "http://comics.com/the_knight_life"), #(u"Home and Away", u"http://www.gocomics.com/homeandaway"),
# ("The Meaning of Lila", "http://comics.com/the_meaning_of_lila"), #(u"Housebroken", u"http://www.gocomics.com/housebroken"),
# ("The Other Coast", "http://comics.com/the_other_coast"), #(u"Hubert and Abby", u"http://www.gocomics.com/hubertandabby"),
# ("The Sunshine Club", "http://comics.com/the_sunshine_club"), #(u"Imagine This", u"http://www.gocomics.com/imaginethis"),
# ("Unstrange Phenomena", "http://comics.com/unstrange_phenomena"), #(u"In the Bleachers", u"http://www.gocomics.com/inthebleachers"),
# ("Watch Your Head", "http://comics.com/watch_your_head"), #(u"In the Sticks", u"http://www.gocomics.com/inthesticks"),
# ("Wizard of Id", "http://comics.com/wizard_of_id"), #(u"Ink Pen", u"http://www.gocomics.com/inkpen"),
# ("Working Daze", "http://comics.com/working_daze"), #(u"It's All About You", u"http://www.gocomics.com/itsallaboutyou"),
# ("Working It Out", "http://comics.com/working_it_out"), #(u"Jane's World", u"http://www.gocomics.com/janesworld"),
# ("Zack Hill", "http://comics.com/zack_hill"), #(u"Joe Vanilla", u"http://www.gocomics.com/joevanilla"),
# ("(Th)ink", "http://comics.com/think"), #(u"Jump Start", u"http://www.gocomics.com/jumpstart"),
# "Tackling the political and social issues impacting communities of color." #(u"Kit 'N' Carlyle", u"http://www.gocomics.com/kitandcarlyle"),
# ("Adam Zyglis", "http://comics.com/adam_zyglis"), #(u"La Cucaracha", u"http://www.gocomics.com/lacucaracha"),
# "Known for his excellent caricatures, as well as independent and incisive imagery. " #(u"Last Kiss", u"http://www.gocomics.com/lastkiss"),
# ("Andy Singer", "http://comics.com/andy_singer"), #(u"Legend of Bill", u"http://www.gocomics.com/legendofbill"),
# ("Bill Day", "http://comics.com/bill_day"), #(u"Liberty Meadows", u"http://www.gocomics.com/libertymeadows"),
# "Powerful images on sensitive issues." #(u"Li'l Abner Classics", u"http://www.gocomics.com/lilabnerclassics"),
# ("Bill Schorr", "http://comics.com/bill_schorr"), #(u"Lio", u"http://www.gocomics.com/lio"),
# ("Bob Englehart", "http://comics.com/bob_englehart"), #(u"Little Dog Lost", u"http://www.gocomics.com/littledoglost"),
# ("Brian Fairrington", "http://comics.com/brian_fairrington"), #(u"Little Otto", u"http://www.gocomics.com/littleotto"),
# ("Bruce Beattie", "http://comics.com/bruce_beattie"), #(u"Lola", u"http://www.gocomics.com/lola"),
# ("Cam Cardow", "http://comics.com/cam_cardow"), #(u"Love Is...", u"http://www.gocomics.com/loveis"),
# ("Chip Bok", "http://comics.com/chip_bok"), (u"Luann", u"http://www.gocomics.com/luann"),
# ("Chris Britt", "http://comics.com/chris_britt"), #(u"Maintaining", u"http://www.gocomics.com/maintaining"),
# ("Chuck Asay", "http://comics.com/chuck_asay"), #(u"Meg! Classics", u"http://www.gocomics.com/megclassics"),
# ("Clay Bennett", "http://comics.com/clay_bennett"), #(u"Middle-Aged White Guy", u"http://www.gocomics.com/middleagedwhiteguy"),
# ("Daryl Cagle", "http://comics.com/daryl_cagle"), #(u"Minimum Security", u"http://www.gocomics.com/minimumsecurity"),
# ("David Fitzsimmons", "http://comics.com/david_fitzsimmons"), #(u"Moderately Confused", u"http://www.gocomics.com/moderatelyconfused"),
# "David Fitzsimmons is a new editorial cartoons on comics.com. He is also a staff writer and editorial cartoonist for the Arizona Daily Star. " (u"Momma", u"http://www.gocomics.com/momma"),
# ("Drew Litton", "http://comics.com/drew_litton"), #(u"Monty", u"http://www.gocomics.com/monty"),
# "Drew Litton is an artist who is probably best known for his sports cartoons. He received the National Cartoonist Society Sports Cartoon Award for 1993. " #(u"Motley Classics", u"http://www.gocomics.com/motleyclassics"),
# ("Ed Stein", "http://comics.com/ed_stein"), #(u"Mutt & Jeff", u"http://www.gocomics.com/muttandjeff"),
# "Winner of the Fischetti Award in 2006 and the Scripps Howard National Journalism Award, 1999, Ed Stein has been the editorial cartoonist for the Rocky Mountain News since 1978. " #(u"Mythtickle", u"http://www.gocomics.com/mythtickle"),
# ("Eric Allie", "http://comics.com/eric_allie"), #(u"Nancy", u"http://www.gocomics.com/nancy"),
# "Eric Allie is an editorial cartoonist with the Pioneer Press and CNS News. " #(u"Natural Selection", u"http://www.gocomics.com/naturalselection"),
# ("Gary Markstein", "http://comics.com/gary_markstein"), #(u"Nest Heads", u"http://www.gocomics.com/nestheads"),
# ("Gary McCoy", "http://comics.com/gary_mccoy"), #(u"NEUROTICA", u"http://www.gocomics.com/neurotica"),
# "Gary McCoy is known for his editorial cartoons, humor and inane ramblings. He is a 2 time nominee for Best Magazine Cartoonist of the Year by the National Cartoonists Society. He resides in Belleville, IL. " #(u"New Adventures of Queen Victoria", u"http://www.gocomics.com/thenewadventuresofqueenvictoria"),
# ("Gary Varvel", "http://comics.com/gary_varvel"), (u"Non Sequitur", u"http://www.gocomics.com/nonsequitur"),
# ("Henry Payne", "http://comics.com/henry_payne"), #(u"Off The Mark", u"http://www.gocomics.com/offthemark"),
# ("JD Crowe", "http://comics.com/jd_crowe"), #(u"On A Claire Day", u"http://www.gocomics.com/onaclaireday"),
# ("Jeff Parker", "http://comics.com/jeff_parker"), #(u"One Big Happy Classics", u"http://www.gocomics.com/onebighappyclassics"),
# ("Jeff Stahler", "http://comics.com/jeff_stahler"), #(u"One Big Happy", u"http://www.gocomics.com/onebighappy"),
# ("Jerry Holbert", "http://comics.com/jerry_holbert"), #(u"Out of the Gene Pool Re-Runs", u"http://www.gocomics.com/outofthegenepool"),
# ("John Cole", "http://comics.com/john_cole"), #(u"Over the Hedge", u"http://www.gocomics.com/overthehedge"),
# ("John Darkow", "http://comics.com/john_darkow"), #(u"Overboard", u"http://www.gocomics.com/overboard"),
# "John Darkow is a contributing editorial cartoonist for the Humor Times as well as editoiral cartoonist for the Columbia Daily Tribune, Missouri" #(u"PC and Pixel", u"http://www.gocomics.com/pcandpixel"),
# ("John Sherffius", "http://comics.com/john_sherffius"), (u"Peanuts", u"http://www.gocomics.com/peanuts"),
# ("Larry Wright", "http://comics.com/larry_wright"), (u"Pearls Before Swine", u"http://www.gocomics.com/pearlsbeforeswine"),
# ("Lisa Benson", "http://comics.com/lisa_benson"), #(u"Pibgorn Sketches", u"http://www.gocomics.com/pibgornsketches"),
# ("Marshall Ramsey", "http://comics.com/marshall_ramsey"), #(u"Pibgorn", u"http://www.gocomics.com/pibgorn"),
# ("Matt Bors", "http://comics.com/matt_bors"), #(u"Pickles", u"http://www.gocomics.com/pickles"),
# ("Michael Ramirez", "http://comics.com/michael_ramirez"), #(u"Pinkerton", u"http://www.gocomics.com/pinkerton"),
# ("Mike Keefe", "http://comics.com/mike_keefe"), #(u"Pluggers", u"http://www.gocomics.com/pluggers"),
# ("Mike Luckovich", "http://comics.com/mike_luckovich"), (u"Pooch Cafe", u"http://www.gocomics.com/poochcafe"),
# ("MIke Thompson", "http://comics.com/mike_thompson"), #(u"PreTeena", u"http://www.gocomics.com/preteena"),
# ("Monte Wolverton", "http://comics.com/monte_wolverton"), #(u"Prickly City", u"http://www.gocomics.com/pricklycity"),
# "Unique mix of perspectives" #(u"Rabbits Against Magic", u"http://www.gocomics.com/rabbitsagainstmagic"),
# ("Mr. Fish", "http://comics.com/mr_fish"), #(u"Raising Duncan Classics", u"http://www.gocomics.com/raisingduncanclassics"),
# "Side effects may include swelling" #(u"Real Life Adventures", u"http://www.gocomics.com/reallifeadventures"),
# ("Nate Beeler", "http://comics.com/nate_beeler"), #(u"Reality Check", u"http://www.gocomics.com/realitycheck"),
# "Middle America meets the Beltway." #(u"Red and Rover", u"http://www.gocomics.com/redandrover"),
# ("Nick Anderson", "http://comics.com/nick_anderson"), #(u"Red Meat", u"http://www.gocomics.com/redmeat"),
# ("Pat Bagley", "http://comics.com/pat_bagley"), #(u"Reynolds Unwrapped", u"http://www.gocomics.com/reynoldsunwrapped"),
# "Unfair and Totally Unbalanced." #(u"Rip Haywire", u"http://www.gocomics.com/riphaywire"),
# ("Paul Szep", "http://comics.com/paul_szep"), #(u"Ronaldinho Gaucho", u"http://www.gocomics.com/ronaldinhogaucho"),
# ("RJ Matson", "http://comics.com/rj_matson"), (u"Rose Is Rose", u"http://www.gocomics.com/roseisrose"),
# "Power cartoons from NYC and Capitol Hill" #(u"Rudy Park", u"http://www.gocomics.com/rudypark"),
# ("Rob Rogers", "http://comics.com/rob_rogers"), #(u"Scary Gary", u"http://www.gocomics.com/scarygary"),
# "Humorous slant on current events" #(u"Shirley and Son Classics", u"http://www.gocomics.com/shirleyandsonclassics"),
# ("Robert Ariail", "http://comics.com/robert_ariail"), (u"Shoe", u"http://www.gocomics.com/shoe"),
# "Clever and unpredictable" #(u"Shoecabbage", u"http://www.gocomics.com/shoecabbage"),
# ("Scott Stantis", "http://comics.com/scott_stantis"), #(u"Skin Horse", u"http://www.gocomics.com/skinhorse"),
# ("Signe Wilkinson", "http://comics.com/signe_wilkinson"), #(u"Slowpoke", u"http://www.gocomics.com/slowpoke"),
# ("Steve Benson", "http://comics.com/steve_benson"), #(u"Soup To Nutz", u"http://www.gocomics.com/souptonutz"),
# ("Steve Breen", "http://comics.com/steve_breen"), #(u"Spot The Frog", u"http://www.gocomics.com/spotthefrog"),
# ("Steve Kelley", "http://comics.com/steve_kelley"), #(u"State of the Union", u"http://www.gocomics.com/stateoftheunion"),
# ("Steve Sack", "http://comics.com/steve_sack"), #(u"Stone Soup", u"http://www.gocomics.com/stonesoup"),
]: #(u"Sylvia", u"http://www.gocomics.com/sylvia"),
#(u"Tank McNamara", u"http://www.gocomics.com/tankmcnamara"),
#(u"Tarzan Classics", u"http://www.gocomics.com/tarzanclassics"),
#(u"That's Life", u"http://www.gocomics.com/thatslife"),
#(u"The Academia Waltz", u"http://www.gocomics.com/academiawaltz"),
#(u"The Barn", u"http://www.gocomics.com/thebarn"),
#(u"The Boiling Point", u"http://www.gocomics.com/theboilingpoint"),
#(u"The Boondocks", u"http://www.gocomics.com/boondocks"),
(u"The Born Loser", u"http://www.gocomics.com/thebornloser"),
#(u"The Buckets", u"http://www.gocomics.com/thebuckets"),
#(u"The City", u"http://www.gocomics.com/thecity"),
#(u"The Dinette Set", u"http://www.gocomics.com/dinetteset"),
#(u"The Doozies", u"http://www.gocomics.com/thedoozies"),
#(u"The Duplex", u"http://www.gocomics.com/duplex"),
#(u"The Elderberries", u"http://www.gocomics.com/theelderberries"),
#(u"The Flying McCoys", u"http://www.gocomics.com/theflyingmccoys"),
#(u"The Fusco Brothers", u"http://www.gocomics.com/thefuscobrothers"),
#(u"The Grizzwells", u"http://www.gocomics.com/thegrizzwells"),
#(u"The Humble Stumble", u"http://www.gocomics.com/thehumblestumble"),
#(u"The Knight Life", u"http://www.gocomics.com/theknightlife"),
#(u"The Meaning of Lila", u"http://www.gocomics.com/meaningoflila"),
(u"The Middletons", u"http://www.gocomics.com/themiddletons"),
#(u"The Norm", u"http://www.gocomics.com/thenorm"),
#(u"The Other Coast", u"http://www.gocomics.com/theothercoast"),
#(u"The Quigmans", u"http://www.gocomics.com/thequigmans"),
#(u"The Sunshine Club", u"http://www.gocomics.com/thesunshineclub"),
#(u"Tiny Sepuk", u"http://www.gocomics.com/tinysepuk"),
#(u"TOBY", u"http://www.gocomics.com/toby"),
#(u"Tom the Dancing Bug", u"http://www.gocomics.com/tomthedancingbug"),
#(u"Too Much Coffee Man", u"http://www.gocomics.com/toomuchcoffeeman"),
#(u"Unstrange Phenomena", u"http://www.gocomics.com/unstrangephenomena"),
#(u"W.T. Duck", u"http://www.gocomics.com/wtduck"),
#(u"Watch Your Head", u"http://www.gocomics.com/watchyourhead"),
#(u"Wee Pals", u"http://www.gocomics.com/weepals"),
#(u"Winnie the Pooh", u"http://www.gocomics.com/winniethepooh"),
(u"Wizard of Id", u"http://www.gocomics.com/wizardofid"),
#(u"Working Daze", u"http://www.gocomics.com/workingdaze"),
#(u"Working It Out", u"http://www.gocomics.com/workingitout"),
#(u"Yenny", u"http://www.gocomics.com/yenny"),
#(u"Zack Hill", u"http://www.gocomics.com/zackhill"),
#(u"Ziggy", u"http://www.gocomics.com/ziggy"),
(u"9 to 5", u"http://www.gocomics.com/9to5"),
(u"Heathcliff", u"http://www.gocomics.com/heathcliff"),
(u"Herman", u"http://www.gocomics.com/herman"),
(u"Loose Parts", u"http://www.gocomics.com/looseparts"),
(u"Marmaduke", u"http://www.gocomics.com/marmaduke"),
(u"Ripley's Believe It or Not!", u"http://www.gocomics.com/ripleysbelieveitornot"),
(u"Rubes", u"http://www.gocomics.com/rubes"),
(u"Speed Bump", u"http://www.gocomics.com/speedbump"),
(u"Strange Brew", u"http://www.gocomics.com/strangebrew"),
(u"The Argyle Sweater", u"http://www.gocomics.com/theargylesweater"),
#
######## EDITORIAL CARTOONS #####################
#(u"Adam Zyglis", u"http://www.gocomics.com/adamzyglis"),
#(u"Andy Singer", u"http://www.gocomics.com/andysinger"),
#(u"Ben Sargent",u"http://www.gocomics.com/bensargent"),
#(u"Bill Day", u"http://www.gocomics.com/billday"),
#(u"Bill Schorr", u"http://www.gocomics.com/billschorr"),
#(u"Bob Englehart", u"http://www.gocomics.com/bobenglehart"),
#(u"Bob Gorrell",u"http://www.gocomics.com/bobgorrell"),
#(u"Brian Fairrington", u"http://www.gocomics.com/brianfairrington"),
#(u"Bruce Beattie", u"http://www.gocomics.com/brucebeattie"),
#(u"Cam Cardow", u"http://www.gocomics.com/camcardow"),
#(u"Chan Lowe",u"http://www.gocomics.com/chanlowe"),
#(u"Chip Bok",u"http://www.gocomics.com/chipbok"),
#(u"Chris Britt",u"http://www.gocomics.com/chrisbritt"),
#(u"Chuck Asay",u"http://www.gocomics.com/chuckasay"),
#(u"Clay Bennett",u"http://www.gocomics.com/claybennett"),
#(u"Clay Jones",u"http://www.gocomics.com/clayjones"),
#(u"Dan Wasserman",u"http://www.gocomics.com/danwasserman"),
#(u"Dana Summers",u"http://www.gocomics.com/danasummers"),
#(u"Daryl Cagle", u"http://www.gocomics.com/darylcagle"),
#(u"David Fitzsimmons", u"http://www.gocomics.com/davidfitzsimmons"),
#(u"Dick Locher",u"http://www.gocomics.com/dicklocher"),
#(u"Don Wright",u"http://www.gocomics.com/donwright"),
#(u"Donna Barstow",u"http://www.gocomics.com/donnabarstow"),
#(u"Drew Litton", u"http://www.gocomics.com/drewlitton"),
#(u"Drew Sheneman",u"http://www.gocomics.com/drewsheneman"),
#(u"Ed Stein", u"http://www.gocomics.com/edstein"),
#(u"Eric Allie", u"http://www.gocomics.com/ericallie"),
#(u"Gary Markstein", u"http://www.gocomics.com/garymarkstein"),
#(u"Gary McCoy", u"http://www.gocomics.com/garymccoy"),
#(u"Gary Varvel", u"http://www.gocomics.com/garyvarvel"),
#(u"Glenn McCoy",u"http://www.gocomics.com/glennmccoy"),
#(u"Henry Payne", u"http://www.gocomics.com/henrypayne"),
#(u"Jack Ohman",u"http://www.gocomics.com/jackohman"),
#(u"JD Crowe", u"http://www.gocomics.com/jdcrowe"),
#(u"Jeff Danziger",u"http://www.gocomics.com/jeffdanziger"),
#(u"Jeff Parker", u"http://www.gocomics.com/jeffparker"),
#(u"Jeff Stahler", u"http://www.gocomics.com/jeffstahler"),
#(u"Jerry Holbert", u"http://www.gocomics.com/jerryholbert"),
#(u"Jim Morin",u"http://www.gocomics.com/jimmorin"),
#(u"Joel Pett",u"http://www.gocomics.com/joelpett"),
#(u"John Cole", u"http://www.gocomics.com/johncole"),
#(u"John Darkow", u"http://www.gocomics.com/johndarkow"),
#(u"John Deering",u"http://www.gocomics.com/johndeering"),
#(u"John Sherffius", u"http://www.gocomics.com/johnsherffius"),
#(u"Ken Catalino",u"http://www.gocomics.com/kencatalino"),
#(u"Kerry Waghorn",u"http://www.gocomics.com/facesinthenews"),
#(u"Kevin Kallaugher",u"http://www.gocomics.com/kevinkallaugher"),
#(u"Lalo Alcaraz",u"http://www.gocomics.com/laloalcaraz"),
#(u"Larry Wright", u"http://www.gocomics.com/larrywright"),
#(u"Lisa Benson", u"http://www.gocomics.com/lisabenson"),
#(u"Marshall Ramsey", u"http://www.gocomics.com/marshallramsey"),
#(u"Matt Bors", u"http://www.gocomics.com/mattbors"),
#(u"Matt Davies",u"http://www.gocomics.com/mattdavies"),
#(u"Michael Ramirez", u"http://www.gocomics.com/michaelramirez"),
#(u"Mike Keefe", u"http://www.gocomics.com/mikekeefe"),
#(u"Mike Luckovich", u"http://www.gocomics.com/mikeluckovich"),
#(u"MIke Thompson", u"http://www.gocomics.com/mikethompson"),
#(u"Monte Wolverton", u"http://www.gocomics.com/montewolverton"),
#(u"Mr. Fish", u"http://www.gocomics.com/mrfish"),
#(u"Nate Beeler", u"http://www.gocomics.com/natebeeler"),
#(u"Nick Anderson", u"http://www.gocomics.com/nickanderson"),
#(u"Pat Bagley", u"http://www.gocomics.com/patbagley"),
#(u"Pat Oliphant",u"http://www.gocomics.com/patoliphant"),
#(u"Paul Conrad",u"http://www.gocomics.com/paulconrad"),
#(u"Paul Szep", u"http://www.gocomics.com/paulszep"),
#(u"RJ Matson", u"http://www.gocomics.com/rjmatson"),
#(u"Rob Rogers", u"http://www.gocomics.com/robrogers"),
#(u"Robert Ariail", u"http://www.gocomics.com/robertariail"),
#(u"Scott Stantis", u"http://www.gocomics.com/scottstantis"),
#(u"Signe Wilkinson", u"http://www.gocomics.com/signewilkinson"),
#(u"Small World",u"http://www.gocomics.com/smallworld"),
#(u"Steve Benson", u"http://www.gocomics.com/stevebenson"),
#(u"Steve Breen", u"http://www.gocomics.com/stevebreen"),
#(u"Steve Kelley", u"http://www.gocomics.com/stevekelley"),
#(u"Steve Sack", u"http://www.gocomics.com/stevesack"),
#(u"Stuart Carlson",u"http://www.gocomics.com/stuartcarlson"),
#(u"Ted Rall",u"http://www.gocomics.com/tedrall"),
#(u"(Th)ink", u"http://www.gocomics.com/think"),
#(u"Tom Toles",u"http://www.gocomics.com/tomtoles"),
#(u"Tony Auth",u"http://www.gocomics.com/tonyauth"),
#(u"Views of the World",u"http://www.gocomics.com/viewsoftheworld"),
#(u"ViewsAfrica",u"http://www.gocomics.com/viewsafrica"),
#(u"ViewsAmerica",u"http://www.gocomics.com/viewsamerica"),
#(u"ViewsAsia",u"http://www.gocomics.com/viewsasia"),
#(u"ViewsBusiness",u"http://www.gocomics.com/viewsbusiness"),
#(u"ViewsEurope",u"http://www.gocomics.com/viewseurope"),
#(u"ViewsLatinAmerica",u"http://www.gocomics.com/viewslatinamerica"),
#(u"ViewsMidEast",u"http://www.gocomics.com/viewsmideast"),
#(u"Walt Handelsman",u"http://www.gocomics.com/walthandelsman"),
#(u"Wayne Stayskal",u"http://www.gocomics.com/waynestayskal"),
#(u"Wit of the World",u"http://www.gocomics.com/witoftheworld"),
]:
print 'Working on: ', title
articles = self.make_links(url) articles = self.make_links(url)
if articles: if articles:
feeds.append((title, articles)) feeds.append((title, articles))
return feeds return feeds
def make_links(self, url): def make_links(self, url):
soup = self.index_to_soup(url) title = 'Temp'
# print 'soup: ', soup
title = ''
current_articles = [] current_articles = []
from datetime import datetime, timedelta pages = range(1, self.num_comics_to_get+1)
now = datetime.now() for page in pages:
dates = [(now-timedelta(days=d)).strftime('%Y/%m/%d') for d in range(self.num_comics_to_get)] page_soup = self.index_to_soup(url)
if page_soup:
for page in dates: try:
page_url = url + '/' + str(page) strip_title = page_soup.find(name='div', attrs={'class':'top'}).h1.a.string
print(page_url) except:
soup = self.index_to_soup(page_url) strip_title = 'Error - no Title found'
if soup: try:
strip_tag = self.tag_to_string(soup.find('a')) date_title = page_soup.find('ul', attrs={'class': 'feature-nav'}).li.string
if strip_tag: if not date_title:
print 'strip_tag: ', strip_tag date_title = page_soup.find('ul', attrs={'class': 'feature-nav'}).li.string
title = strip_tag except:
print 'title: ', title date_title = 'Error - no Date found'
title = strip_title + ' - ' + date_title
for i in range(2):
try:
strip_url_date = page_soup.find(name='div', attrs={'class':'top'}).h1.a['href']
break # success - this is normal exit
except:
strip_url_date = None
continue # try to get strip_url_date again
for i in range(2):
try:
prev_strip_url_date = page_soup.find('a', attrs={'class': 'prev'})['href']
break # success - this is normal exit
except:
prev_strip_url_date = None
continue # try to get prev_strip_url_date again
if strip_url_date:
page_url = 'http://www.gocomics.com' + strip_url_date
else:
continue
if prev_strip_url_date:
prev_page_url = 'http://www.gocomics.com' + prev_strip_url_date
else:
continue
current_articles.append({'title': title, 'url': page_url, 'description':'', 'date':''}) current_articles.append({'title': title, 'url': page_url, 'description':'', 'date':''})
url = prev_page_url
current_articles.reverse() current_articles.reverse()
return current_articles return current_articles
def preprocess_html(self, soup):
if soup.title:
title_string = soup.title.string.strip()
_cd = title_string.split(',',1)[1]
comic_date = ' '.join(_cd.split(' ', 4)[0:-1])
if soup.h1.span:
artist = soup.h1.span.string
soup.h1.span.string.replaceWith(comic_date + artist)
feature_item = soup.find('p',attrs={'class':'feature_item'})
if feature_item.a:
a_tag = feature_item.a
a_href = a_tag["href"]
img_tag = a_tag.img
img_tag["src"] = a_href
img_tag["width"] = self.comic_size
img_tag["height"] = None
return self.adeify_images(soup)
extra_css = ''' extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;} h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
img {max-width:100%; min-width:100%;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;} p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;} body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
''' '''

View File

@ -12,5 +12,6 @@ class KDEFamilyPl(BasicNewsRecipe):
max_articles_per_feed = 100 max_articles_per_feed = 100
preprocess_regexps = [(re.compile(r"Podobne wpisy.*", re.IGNORECASE|re.DOTALL), lambda m: '')] preprocess_regexps = [(re.compile(r"Podobne wpisy.*", re.IGNORECASE|re.DOTALL), lambda m: '')]
no_stylesheets = True no_stylesheets = True
remove_empty_feeds = True
use_embedded_content = True use_embedded_content = True
feeds = [(u'Wszystko', u'http://kdefamily.pl/feed/')] feeds = [(u'Wszystko', u'http://kdefamily.pl/feed/')]

View File

@ -21,7 +21,7 @@ class LegeArtisRecipe(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
remove_javascript = True remove_javascript = True
remove_empty_feeds = True
extra_css = ''' extra_css = '''
img{clear: both;} img{clear: both;}
''' '''

View File

@ -8,6 +8,7 @@ class Lomza(BasicNewsRecipe):
language = 'pl' language = 'pl'
oldest_article = 15 oldest_article = 15
no_stylesheets = True no_stylesheets = True
extra_css = '#foto {float: right; max-width: 200px; margin-left: 10px;} #fotogaleria > div {float:left;} .br {clear: both;}'
max_articles_per_feed = 100 max_articles_per_feed = 100
remove_tags=[dict(name='div', attrs={'class':['bxbanner', 'drukuj', 'wyslijznajomemu']})] remove_tags=[dict(name='div', attrs={'class':['bxbanner', 'drukuj', 'wyslijznajomemu']})]
keep_only_tags=[dict(name='div', attrs={'class':'wiadomosc'})] keep_only_tags=[dict(name='div', attrs={'class':'wiadomosc'})]

View File

@ -1,11 +1,9 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2012-2013, Darko Miletic <darko.miletic at gmail.com>'
''' '''
www.nsfwcorp.com www.nsfwcorp.com
''' '''
import urllib
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class NotSafeForWork(BasicNewsRecipe): class NotSafeForWork(BasicNewsRecipe):
@ -20,8 +18,8 @@ class NotSafeForWork(BasicNewsRecipe):
needs_subscription = True needs_subscription = True
auto_cleanup = False auto_cleanup = False
INDEX = 'https://www.nsfwcorp.com' INDEX = 'https://www.nsfwcorp.com'
LOGIN = INDEX + '/login/target/' LOGIN = INDEX + '/account/login/?next=%2F'
SETTINGS = INDEX + '/settings/' SETTINGS = INDEX + '/account/settings/'
use_embedded_content = True use_embedded_content = True
language = 'en' language = 'en'
publication_type = 'magazine' publication_type = 'magazine'
@ -48,19 +46,20 @@ class NotSafeForWork(BasicNewsRecipe):
def get_browser(self): def get_browser(self):
br = BasicNewsRecipe.get_browser(self) br = BasicNewsRecipe.get_browser(self)
br.open(self.LOGIN) br.open(self.INDEX)
if self.username is not None and self.password is not None: if self.username is not None and self.password is not None:
data = urllib.urlencode({ 'email':self.username br.open(self.LOGIN)
,'password':self.password br.select_form(nr=0)
}) br['email' ] = self.username
br.open(self.LOGIN, data) br['password'] = self.password
br.submit()
return br return br
def get_feeds(self): def get_feeds(self):
self.feeds = [] self.feeds = []
soup = self.index_to_soup(self.SETTINGS) soup = self.index_to_soup(self.SETTINGS)
for item in soup.findAll('input', attrs={'type':'text'}): for item in soup.findAll('input', attrs={'type':'text'}):
if item.has_key('value') and item['value'].startswith('http://www.nsfwcorp.com/feed/'): if item.has_key('value') and item['value'].startswith('https://www.nsfwcorp.com/feed/'):
self.feeds.append(item['value']) self.feeds.append(item['value'])
return self.feeds return self.feeds
return self.feeds return self.feeds

View File

@ -1,6 +1,6 @@
#!/usr/bin/env python #!/usr/bin/env python
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Comment
class PCLab(BasicNewsRecipe): class PCLab(BasicNewsRecipe):
cover_url = 'http://pclab.pl/img/logo.png' cover_url = 'http://pclab.pl/img/logo.png'
title = u"PC Lab" title = u"PC Lab"
@ -52,6 +52,9 @@ class PCLab(BasicNewsRecipe):
pager = soup2.find('div', attrs={'class':'next'}) pager = soup2.find('div', attrs={'class':'next'})
pagetext = soup2.find('div', attrs={'class':'substance'}) pagetext = soup2.find('div', attrs={'class':'substance'})
pagetext = pagetext.find('div', attrs={'class':'data'}) pagetext = pagetext.find('div', attrs={'class':'data'})
comments = pagetext.findAll(text=lambda text:isinstance(text, Comment))
for comment in comments:
comment.extract()
pos = len(appendtag.contents) pos = len(appendtag.contents)
appendtag.insert(pos, pagetext) appendtag.insert(pos, pagetext)

View File

@ -10,7 +10,7 @@ import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class swiatczytnikow(BasicNewsRecipe): class swiatczytnikow(BasicNewsRecipe):
title = u'Swiat Czytnikow' title = u'Świat Czytników'
description = u'Czytniki e-książek w Polsce. Jak wybrać, kupić i korzystać z Amazon Kindle i innych' description = u'Czytniki e-książek w Polsce. Jak wybrać, kupić i korzystać z Amazon Kindle i innych'
language = 'pl' language = 'pl'
__author__ = u'Tomasz D\u0142ugosz' __author__ = u'Tomasz D\u0142ugosz'

View File

@ -1,4 +1,4 @@
import re, random import random
from calibre import browser from calibre import browser
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
@ -8,7 +8,7 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
title = u'The Sun UK' title = u'The Sun UK'
description = 'Articles from The Sun tabloid UK' description = 'Articles from The Sun tabloid UK'
__author__ = 'Dave Asbury' __author__ = 'Dave Asbury'
# last updated 19/10/12 better cover fetch # last updated 5/5/13 better cover fetch
language = 'en_GB' language = 'en_GB'
oldest_article = 1 oldest_article = 1
max_articles_per_feed = 15 max_articles_per_feed = 15
@ -29,16 +29,12 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
dict(name='div',attrs={'class' : 'intro'}), dict(name='div',attrs={'class' : 'intro'}),
dict(name='h3'), dict(name='h3'),
dict(name='div',attrs={'id' : 'articlebody'}), dict(name='div',attrs={'id' : 'articlebody'}),
#dict(attrs={'class' : ['right_col_branding','related-stories','mystery-meat-link','ltbx-container','ltbx-var ltbx-hbxpn','ltbx-var ltbx-nav-loop','ltbx-var ltbx-url']}),
# dict(name='div',attrs={'class' : 'cf'}),
# dict(attrs={'title' : 'download flash'}),
# dict(attrs={'style' : 'padding: 5px'})
] ]
remove_tags_after = [dict(id='bodyText')] remove_tags_after = [dict(id='bodyText')]
remove_tags=[ remove_tags=[
dict(name='li'), dict(name='li'),
dict(attrs={'class' : 'grid-4 right-hand-column'}), dict(attrs={'class' : 'grid-4 right-hand-column'}),
] ]
feeds = [ feeds = [
@ -47,40 +43,24 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
(u'Showbiz', u'http://www.thesun.co.uk/sol/homepage/showbiz/rss'), (u'Showbiz', u'http://www.thesun.co.uk/sol/homepage/showbiz/rss'),
(u'Woman', u'http://www.thesun.co.uk/sol/homepage/woman/rss'), (u'Woman', u'http://www.thesun.co.uk/sol/homepage/woman/rss'),
] ]
# starsons code # starsons code
def parse_feeds (self): def parse_feeds(self):
feeds = BasicNewsRecipe.parse_feeds(self) feeds = BasicNewsRecipe.parse_feeds(self)
for feed in feeds: for feed in feeds:
for article in feed.articles[:]: for article in feed.articles[:]:
print 'article.title is: ', article.title if 'Try out The Sun' in article.title.upper() or 'Try-out-The-Suns' in article.url:
if 'Try out The Sun' in article.title.upper() or 'Try-out-The-Suns' in article.url: feed.articles.remove(article)
feed.articles.remove(article) if 'Web porn harms kids' in article.title.upper() or 'Sun-says-Web-porn' in article.url:
if 'Web porn harms kids' in article.title.upper() or 'Sun-says-Web-porn' in article.url: feed.articles.remove(article)
feed.articles.remove(article) return feeds
return feeds
def get_cover_url(self): def get_cover_url(self):
soup = self.index_to_soup('http://www.politicshome.com/uk/latest_frontpage.html')
# look for the block containing the sun button and url
cov = soup.find(attrs={'style' : 'background-image: url(http://www.politicshome.com/images/sources/source_frontpage_button_84.gif);'})
#cov = soup.find(attrs={'id' : 'large'})
cov2 = str(cov)
cov2='http://www.politicshome.com'+cov2[9:-133]
#cov2 now contains url of the page containing pic
#cov2 now contains url of the page containing pic
soup = self.index_to_soup(cov2)
cov = soup.find(attrs={'id' : 'large'})
cov=str(cov)
cov2 = re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', cov)
cov2 = str(cov2)
cov2=cov2[2:len(cov2)-2]
br = browser() br = browser()
br.set_handle_redirect(False) br.set_handle_redirect(False)
cover_url = 'http://www.thepaperboy.com/frontpages/current/The_Sun_newspaper_front_page.jpg'
try: try:
br.open_novisit(cov2) br.open_novisit('http://www.thepaperboy.com/frontpages/current/The_Sun_newspaper_front_page.jpg')
cover_url = cov2
except: except:
cover_url = random.choice([ cover_url = random.choice([
'http://img.thesun.co.uk/multimedia/archive/00905/errorpage6_677961a_905507a.jpg' 'http://img.thesun.co.uk/multimedia/archive/00905/errorpage6_677961a_905507a.jpg'
@ -88,6 +68,6 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
,'http://img.thesun.co.uk/multimedia/archive/00905/errorpage5_677960a_905512a.jpg' ,'http://img.thesun.co.uk/multimedia/archive/00905/errorpage5_677960a_905512a.jpg'
,'http://img.thesun.co.uk/multimedia/archive/00905/errorpage2_677957a_905502a.jpg' ,'http://img.thesun.co.uk/multimedia/archive/00905/errorpage2_677957a_905502a.jpg'
,'http://img.thesun.co.uk/multimedia/archive/00905/errorpage3_677958a_905503a.jpg' ,'http://img.thesun.co.uk/multimedia/archive/00905/errorpage3_677958a_905503a.jpg'
]) ])
return cover_url return cover_url

View File

@ -531,3 +531,9 @@ numeric_collation = False
# number here. The default is ten libraries. # number here. The default is ten libraries.
many_libraries = 10 many_libraries = 10
#: Highlight the count of books when using a Virtual Library
# The count of books next to the Virtual Library button is highlighted in
# yellow when using a Virtual Library. By setting this to False, you can turn
# that off.
highlight_virtual_library_book_count = True

View File

@ -116,7 +116,9 @@ tarball. Edit setup.py and set zip_safe=False. Then run::
Run the following command to install python dependencies:: Run the following command to install python dependencies::
easy_install --always-unzip -U mechanize pyreadline python-dateutil dnspython cssutils clientform pycrypto cssselect easy_install --always-unzip -U mechanize python-dateutil dnspython cssutils clientform pycrypto cssselect
Install pyreadline from https://pypi.python.org/pypi/pyreadline/2.0
Install pywin32 and edit win32com\__init__.py setting _frozen = True and Install pywin32 and edit win32com\__init__.py setting _frozen = True and
__gen_path__ to a temp dir (otherwise it tries to set it to a dir in the __gen_path__ to a temp dir (otherwise it tries to set it to a dir in the

View File

@ -12,14 +12,14 @@ msgstr ""
"Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-" "Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
"devel@lists.alioth.debian.org>\n" "devel@lists.alioth.debian.org>\n"
"POT-Creation-Date: 2011-11-25 14:01+0000\n" "POT-Creation-Date: 2011-11-25 14:01+0000\n"
"PO-Revision-Date: 2013-04-21 08:00+0000\n" "PO-Revision-Date: 2013-05-06 09:36+0000\n"
"Last-Translator: Ferran Rius <frius64@hotmail.com>\n" "Last-Translator: Ferran Rius <frius64@hotmail.com>\n"
"Language-Team: Catalan <linux@softcatala.org>\n" "Language-Team: Catalan <linux@softcatala.org>\n"
"MIME-Version: 1.0\n" "MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n" "Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n" "Content-Transfer-Encoding: 8bit\n"
"X-Launchpad-Export-Date: 2013-04-22 05:23+0000\n" "X-Launchpad-Export-Date: 2013-05-07 05:28+0000\n"
"X-Generator: Launchpad (build 16567)\n" "X-Generator: Launchpad (build 16598)\n"
"Language: ca\n" "Language: ca\n"
#. name for aaa #. name for aaa
@ -2024,7 +2024,7 @@ msgstr "Àzeri meridional"
#. name for aze #. name for aze
msgid "Azerbaijani" msgid "Azerbaijani"
msgstr "Serbi" msgstr ""
#. name for azg #. name for azg
msgid "Amuzgo; San Pedro Amuzgos" msgid "Amuzgo; San Pedro Amuzgos"
@ -7288,7 +7288,7 @@ msgstr "Epie"
#. name for epo #. name for epo
msgid "Esperanto" msgid "Esperanto"
msgstr "Alemany" msgstr "Esperanto"
#. name for era #. name for era
msgid "Eravallan" msgid "Eravallan"
@ -21816,7 +21816,7 @@ msgstr "Ramoaaina"
#. name for raj #. name for raj
msgid "Rajasthani" msgid "Rajasthani"
msgstr "Marwari" msgstr ""
#. name for rak #. name for rak
msgid "Tulu-Bohuai" msgid "Tulu-Bohuai"

View File

@ -13762,7 +13762,7 @@ msgstr ""
#. name for lav #. name for lav
msgid "Latvian" msgid "Latvian"
msgstr "litevština" msgstr ""
#. name for law #. name for law
msgid "Lauje" msgid "Lauje"

View File

@ -1429,7 +1429,7 @@ msgstr ""
#. name for arg #. name for arg
msgid "Aragonese" msgid "Aragonese"
msgstr "Færøsk" msgstr ""
#. name for arh #. name for arh
msgid "Arhuaco" msgid "Arhuaco"

View File

@ -18,14 +18,14 @@ msgstr ""
"Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-" "Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
"devel@lists.alioth.debian.org>\n" "devel@lists.alioth.debian.org>\n"
"POT-Creation-Date: 2011-11-25 14:01+0000\n" "POT-Creation-Date: 2011-11-25 14:01+0000\n"
"PO-Revision-Date: 2013-04-11 13:29+0000\n" "PO-Revision-Date: 2013-05-06 09:41+0000\n"
"Last-Translator: Simon Schütte <simonschuette@arcor.de>\n" "Last-Translator: Simon Schütte <simonschuette@arcor.de>\n"
"Language-Team: Ubuntu German Translators\n" "Language-Team: Ubuntu German Translators\n"
"MIME-Version: 1.0\n" "MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n" "Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n" "Content-Transfer-Encoding: 8bit\n"
"X-Launchpad-Export-Date: 2013-04-12 05:20+0000\n" "X-Launchpad-Export-Date: 2013-05-07 05:29+0000\n"
"X-Generator: Launchpad (build 16564)\n" "X-Generator: Launchpad (build 16598)\n"
"Language: de\n" "Language: de\n"
#. name for aaa #. name for aaa
@ -319,7 +319,7 @@ msgstr "Adangme"
#. name for adb #. name for adb
msgid "Adabe" msgid "Adabe"
msgstr "Adangme" msgstr "Adabe"
#. name for add #. name for add
msgid "Dzodinka" msgid "Dzodinka"
@ -367,7 +367,7 @@ msgstr "Adap"
#. name for adq #. name for adq
msgid "Adangbe" msgid "Adangbe"
msgstr "Adangme" msgstr "Adangbe"
#. name for adr #. name for adr
msgid "Adonara" msgid "Adonara"

View File

@ -2022,7 +2022,7 @@ msgstr ""
#. name for aze #. name for aze
msgid "Azerbaijani" msgid "Azerbaijani"
msgstr "Turkiera" msgstr ""
#. name for azg #. name for azg
msgid "Amuzgo; San Pedro Amuzgos" msgid "Amuzgo; San Pedro Amuzgos"
@ -13126,7 +13126,7 @@ msgstr ""
#. name for kur #. name for kur
msgid "Kurdish" msgid "Kurdish"
msgstr "Turkiera" msgstr ""
#. name for kus #. name for kus
msgid "Kusaal" msgid "Kusaal"
@ -16190,7 +16190,7 @@ msgstr ""
#. name for mlt #. name for mlt
msgid "Maltese" msgid "Maltese"
msgstr "Koreera" msgstr ""
#. name for mlu #. name for mlu
msgid "To'abaita" msgid "To'abaita"

View File

@ -13764,7 +13764,7 @@ msgstr "Laba"
#. name for lav #. name for lav
msgid "Latvian" msgid "Latvian"
msgstr "Lituano" msgstr ""
#. name for law #. name for law
msgid "Lauje" msgid "Lauje"
@ -22212,7 +22212,7 @@ msgstr "Roglai do norte"
#. name for roh #. name for roh
msgid "Romansh" msgid "Romansh"
msgstr "Romanés" msgstr ""
#. name for rol #. name for rol
msgid "Romblomanon" msgid "Romblomanon"

View File

@ -20538,7 +20538,7 @@ msgstr ""
#. name for peo #. name for peo
msgid "Persian; Old (ca. 600-400 B.C.)" msgid "Persian; Old (ca. 600-400 B.C.)"
msgstr "perzsa" msgstr ""
#. name for pep #. name for pep
msgid "Kunja" msgid "Kunja"

View File

@ -15049,7 +15049,7 @@ msgstr "Magahi"
#. name for mah #. name for mah
msgid "Marshallese" msgid "Marshallese"
msgstr "Maltneska" msgstr ""
#. name for mai #. name for mai
msgid "Maithili" msgid "Maithili"

View File

@ -3742,7 +3742,7 @@ msgstr ""
#. name for bre #. name for bre
msgid "Breton" msgid "Breton"
msgstr "프랑스어" msgstr ""
#. name for brf #. name for brf
msgid "Bera" msgid "Bera"

View File

@ -6804,7 +6804,7 @@ msgstr "डोगोन; तेबुल उरे"
#. name for dua #. name for dua
msgid "Duala" msgid "Duala"
msgstr "ड्युला" msgstr ""
#. name for dub #. name for dub
msgid "Dubli" msgid "Dubli"

View File

@ -27790,7 +27790,7 @@ msgstr ""
#. name for wln #. name for wln
msgid "Walloon" msgid "Walloon"
msgstr "Vietnamesisk" msgstr ""
#. name for wlo #. name for wlo
msgid "Wolio" msgid "Wolio"

View File

@ -9862,7 +9862,7 @@ msgstr "Hya"
#. name for hye #. name for hye
msgid "Armenian" msgid "Armenian"
msgstr "Albanés" msgstr ""
#. name for iai #. name for iai
msgid "Iaai" msgid "Iaai"
@ -13762,7 +13762,7 @@ msgstr "Laba"
#. name for lav #. name for lav
msgid "Latvian" msgid "Latvian"
msgstr "Lituanian" msgstr ""
#. name for law #. name for law
msgid "Lauje" msgid "Lauje"

View File

@ -2089,7 +2089,7 @@ msgstr "Башкирский"
#. name for bal #. name for bal
msgid "Baluchi" msgid "Baluchi"
msgstr "Балийский" msgstr ""
#. name for bam #. name for bam
msgid "Bambara" msgid "Bambara"

View File

@ -13763,7 +13763,7 @@ msgstr ""
#. name for lav #. name for lav
msgid "Latvian" msgid "Latvian"
msgstr "Lotyšský" msgstr ""
#. name for law #. name for law
msgid "Lauje" msgid "Lauje"

View File

@ -1016,7 +1016,7 @@ msgstr ""
#. name for amh #. name for amh
msgid "Amharic" msgid "Amharic"
msgstr "阿拉伯语" msgstr ""
#. name for ami #. name for ami
msgid "Amis" msgid "Amis"

View File

@ -18,7 +18,7 @@ def qt_sources():
'src/gui/widgets/qdialogbuttonbox.cpp', 'src/gui/widgets/qdialogbuttonbox.cpp',
])) ]))
class POT(Command): # {{{ class POT(Command): # {{{
description = 'Update the .pot translation template' description = 'Update the .pot translation template'
PATH = os.path.join(Command.SRC, __appname__, 'translations') PATH = os.path.join(Command.SRC, __appname__, 'translations')
@ -63,7 +63,6 @@ class POT(Command): # {{{
return '\n'.join(ans) return '\n'.join(ans)
def run(self, opts): def run(self, opts):
pot_header = textwrap.dedent('''\ pot_header = textwrap.dedent('''\
# Translation template file.. # Translation template file..
@ -117,11 +116,10 @@ class POT(Command): # {{{
f.write(src) f.write(src)
self.info('Translations template:', os.path.abspath(pot)) self.info('Translations template:', os.path.abspath(pot))
return pot return pot
# }}} # }}}
class Translations(POT): # {{{ class Translations(POT): # {{{
description='''Compile the translations''' description='''Compile the translations'''
DEST = os.path.join(os.path.dirname(POT.SRC), 'resources', 'localization', DEST = os.path.join(os.path.dirname(POT.SRC), 'resources', 'localization',
'locales') 'locales')
@ -134,6 +132,7 @@ class Translations(POT): # {{{
return locale, os.path.join(self.DEST, locale, 'messages.mo') return locale, os.path.join(self.DEST, locale, 'messages.mo')
def run(self, opts): def run(self, opts):
self.iso639_errors = []
for f in self.po_files(): for f in self.po_files():
locale, dest = self.mo_file(f) locale, dest = self.mo_file(f)
base = os.path.dirname(dest) base = os.path.dirname(dest)
@ -146,18 +145,46 @@ class Translations(POT): # {{{
'%s.po'%iscpo) '%s.po'%iscpo)
if os.path.exists(iso639): if os.path.exists(iso639):
self.check_iso639(iso639)
dest = self.j(self.d(dest), 'iso639.mo') dest = self.j(self.d(dest), 'iso639.mo')
if self.newer(dest, iso639): if self.newer(dest, iso639):
self.info('\tCopying ISO 639 translations') self.info('\tCopying ISO 639 translations for %s' % iscpo)
subprocess.check_call(['msgfmt', '-o', dest, iso639]) subprocess.check_call(['msgfmt', '-o', dest, iso639])
elif locale not in ('en_GB', 'en_CA', 'en_AU', 'si', 'ur', 'sc', elif locale not in ('en_GB', 'en_CA', 'en_AU', 'si', 'ur', 'sc',
'ltg', 'nds', 'te', 'yi', 'fo', 'sq', 'ast', 'ml', 'ku', 'ltg', 'nds', 'te', 'yi', 'fo', 'sq', 'ast', 'ml', 'ku',
'fr_CA', 'him', 'jv', 'ka', 'fur', 'ber'): 'fr_CA', 'him', 'jv', 'ka', 'fur', 'ber'):
self.warn('No ISO 639 translations for locale:', locale) self.warn('No ISO 639 translations for locale:', locale)
if self.iso639_errors:
for err in self.iso639_errors:
print (err)
raise SystemExit(1)
self.write_stats() self.write_stats()
self.freeze_locales() self.freeze_locales()
def check_iso639(self, path):
from calibre.utils.localization import langnames_to_langcodes
with open(path, 'rb') as f:
raw = f.read()
rmap = {}
msgid = None
for match in re.finditer(r'^(msgid|msgstr)\s+"(.*?)"', raw, re.M):
if match.group(1) == 'msgid':
msgid = match.group(2)
else:
msgstr = match.group(2)
if not msgstr:
continue
omsgid = rmap.get(msgstr, None)
if omsgid is not None:
cm = langnames_to_langcodes([omsgid, msgid])
if cm[msgid] and cm[omsgid] and cm[msgid] != cm[omsgid]:
self.iso639_errors.append('In file %s the name %s is used as translation for both %s and %s' % (
os.path.basename(path), msgstr, msgid, rmap[msgstr]))
# raise SystemExit(1)
rmap[msgstr] = msgid
def freeze_locales(self): def freeze_locales(self):
zf = self.DEST + '.zip' zf = self.DEST + '.zip'
from calibre import CurrentDir from calibre import CurrentDir
@ -191,7 +218,6 @@ class Translations(POT): # {{{
locale = self.mo_file(f)[0] locale = self.mo_file(f)[0]
stats[locale] = min(1.0, float(trans)/total) stats[locale] = min(1.0, float(trans)/total)
import cPickle import cPickle
cPickle.dump(stats, open(dest, 'wb'), -1) cPickle.dump(stats, open(dest, 'wb'), -1)
@ -211,7 +237,7 @@ class Translations(POT): # {{{
# }}} # }}}
class GetTranslations(Translations): # {{{ class GetTranslations(Translations): # {{{
description = 'Get updated translations from Launchpad' description = 'Get updated translations from Launchpad'
BRANCH = 'lp:~kovid/calibre/translations' BRANCH = 'lp:~kovid/calibre/translations'
@ -286,7 +312,7 @@ class GetTranslations(Translations): # {{{
# }}} # }}}
class ISO639(Command): # {{{ class ISO639(Command): # {{{
description = 'Compile translations for ISO 639 codes' description = 'Compile translations for ISO 639 codes'
DEST = os.path.join(os.path.dirname(POT.SRC), 'resources', 'localization', DEST = os.path.join(os.path.dirname(POT.SRC), 'resources', 'localization',

View File

@ -4,7 +4,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
__appname__ = u'calibre' __appname__ = u'calibre'
numeric_version = (0, 9, 29) numeric_version = (0, 9, 30)
__version__ = u'.'.join(map(unicode, numeric_version)) __version__ = u'.'.join(map(unicode, numeric_version))
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>" __author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"

View File

@ -1476,7 +1476,6 @@ class StoreKoobeStore(StoreBase):
drm_free_only = True drm_free_only = True
headquarters = 'PL' headquarters = 'PL'
formats = ['EPUB', 'MOBI', 'PDF'] formats = ['EPUB', 'MOBI', 'PDF']
affiliate = True
class StoreLegimiStore(StoreBase): class StoreLegimiStore(StoreBase):
name = 'Legimi' name = 'Legimi'
@ -1660,7 +1659,6 @@ class StoreWoblinkStore(StoreBase):
headquarters = 'PL' headquarters = 'PL'
formats = ['EPUB', 'MOBI', 'PDF', 'WOBLINK'] formats = ['EPUB', 'MOBI', 'PDF', 'WOBLINK']
affiliate = True
class XinXiiStore(StoreBase): class XinXiiStore(StoreBase):
name = 'XinXii' name = 'XinXii'

View File

@ -19,10 +19,10 @@ class BLACKBERRY(USBMS):
VENDOR_ID = [0x0fca] VENDOR_ID = [0x0fca]
PRODUCT_ID = [0x8004, 0x0004] PRODUCT_ID = [0x8004, 0x0004]
BCD = [0x0200, 0x0107, 0x0210, 0x0201, 0x0211, 0x0220] BCD = [0x0200, 0x0107, 0x0210, 0x0201, 0x0211, 0x0220, 0x232]
VENDOR_NAME = 'RIM' VENDOR_NAME = 'RIM'
WINDOWS_MAIN_MEM = 'BLACKBERRY_SD' WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['BLACKBERRY_SD', 'BLACKBERRY']
MAIN_MEMORY_VOLUME_LABEL = 'Blackberry SD Card' MAIN_MEMORY_VOLUME_LABEL = 'Blackberry SD Card'

View File

@ -35,7 +35,7 @@ class KOBO(USBMS):
gui_name = 'Kobo Reader' gui_name = 'Kobo Reader'
description = _('Communicate with the Kobo Reader') description = _('Communicate with the Kobo Reader')
author = 'Timothy Legge and David Forrester' author = 'Timothy Legge and David Forrester'
version = (2, 0, 9) version = (2, 0, 10)
dbversion = 0 dbversion = 0
fwversion = 0 fwversion = 0
@ -45,6 +45,7 @@ class KOBO(USBMS):
supported_platforms = ['windows', 'osx', 'linux'] supported_platforms = ['windows', 'osx', 'linux']
booklist_class = CollectionsBookList booklist_class = CollectionsBookList
book_class = Book
# Ordered list of supported formats # Ordered list of supported formats
FORMATS = ['epub', 'pdf', 'txt', 'cbz', 'cbr'] FORMATS = ['epub', 'pdf', 'txt', 'cbz', 'cbr']
@ -115,7 +116,6 @@ class KOBO(USBMS):
def initialize(self): def initialize(self):
USBMS.initialize(self) USBMS.initialize(self)
self.book_class = Book
self.dbversion = 7 self.dbversion = 7
def books(self, oncard=None, end_session=True): def books(self, oncard=None, end_session=True):
@ -1213,7 +1213,7 @@ class KOBOTOUCH(KOBO):
min_dbversion_archive = 71 min_dbversion_archive = 71
min_dbversion_images_on_sdcard = 77 min_dbversion_images_on_sdcard = 77
max_supported_fwversion = (2,5,1) max_supported_fwversion = (2,5,3)
min_fwversion_images_on_sdcard = (2,4,1) min_fwversion_images_on_sdcard = (2,4,1)
has_kepubs = True has_kepubs = True
@ -1237,11 +1237,9 @@ class KOBOTOUCH(KOBO):
_('Keep cover aspect ratio') + _('Keep cover aspect ratio') +
':::'+_('When uploading covers, do not change the aspect ratio when resizing for the device.' ':::'+_('When uploading covers, do not change the aspect ratio when resizing for the device.'
' This is for firmware versions 2.3.1 and later.'), ' This is for firmware versions 2.3.1 and later.'),
_('Show expired books') + _('Show archived books') +
':::'+_('A bug in an earlier version left non kepubs book records' ':::'+_('Archived books are listed on the device but need to be downloaded to read.'
' in the database. With this option Calibre will show the ' ' Use this option to show these books and match them with books in the calibre library.'),
'expired records and allow you to delete them with '
'the new delete logic.'),
_('Show Previews') + _('Show Previews') +
':::'+_('Kobo previews are included on the Touch and some other versions' ':::'+_('Kobo previews are included on the Touch and some other versions'
' by default they are no longer displayed as there is no good reason to ' ' by default they are no longer displayed as there is no good reason to '
@ -1289,7 +1287,7 @@ class KOBOTOUCH(KOBO):
OPT_UPLOAD_COVERS = 3 OPT_UPLOAD_COVERS = 3
OPT_UPLOAD_GRAYSCALE_COVERS = 4 OPT_UPLOAD_GRAYSCALE_COVERS = 4
OPT_KEEP_COVER_ASPECT_RATIO = 5 OPT_KEEP_COVER_ASPECT_RATIO = 5
OPT_SHOW_EXPIRED_BOOK_RECORDS = 6 OPT_SHOW_ARCHIVED_BOOK_RECORDS = 6
OPT_SHOW_PREVIEWS = 7 OPT_SHOW_PREVIEWS = 7
OPT_SHOW_RECOMMENDATIONS = 8 OPT_SHOW_RECOMMENDATIONS = 8
OPT_UPDATE_SERIES_DETAILS = 9 OPT_UPDATE_SERIES_DETAILS = 9
@ -1347,6 +1345,10 @@ class KOBOTOUCH(KOBO):
self.set_device_name() self.set_device_name()
return super(KOBOTOUCH, self).get_device_information(end_session) return super(KOBOTOUCH, self).get_device_information(end_session)
def device_database_path(self):
return self.normalize_path(self._main_prefix + '.kobo/KoboReader.sqlite')
def books(self, oncard=None, end_session=True): def books(self, oncard=None, end_session=True):
debug_print("KoboTouch:books - oncard='%s'"%oncard) debug_print("KoboTouch:books - oncard='%s'"%oncard)
from calibre.ebooks.metadata.meta import path_to_ext from calibre.ebooks.metadata.meta import path_to_ext
@ -1599,9 +1601,7 @@ class KOBOTOUCH(KOBO):
self.debug_index = 0 self.debug_index = 0
import sqlite3 as sqlite import sqlite3 as sqlite
with closing(sqlite.connect( with closing(sqlite.connect(self.device_database_path())) as connection:
self.normalize_path(self._main_prefix +
'.kobo/KoboReader.sqlite'))) as connection:
debug_print("KoboTouch:books - reading device database") debug_print("KoboTouch:books - reading device database")
# return bytestrings if the content cannot the decoded as unicode # return bytestrings if the content cannot the decoded as unicode
@ -1618,7 +1618,21 @@ class KOBOTOUCH(KOBO):
debug_print("KoboTouch:books - shelf list:", self.bookshelvelist) debug_print("KoboTouch:books - shelf list:", self.bookshelvelist)
opts = self.settings() opts = self.settings()
if self.supports_series(): if self.supports_kobo_archive():
query= ("select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, " \
"ImageID, ReadStatus, ___ExpirationStatus, FavouritesIndex, Accessibility, " \
"IsDownloaded, Series, SeriesNumber, ___UserID " \
" from content " \
" where BookID is Null " \
" and ((Accessibility = -1 and IsDownloaded in ('true', 1 )) or (Accessibility in (1,2) %(expiry)s) " \
" %(previews)s %(recomendations)s )" \
" and not ((___ExpirationStatus=3 or ___ExpirationStatus is Null) and ContentType = 6)") % \
dict(\
expiry="" if opts.extra_customization[self.OPT_SHOW_ARCHIVED_BOOK_RECORDS] else "and IsDownloaded in ('true', 1)", \
previews=" or (Accessibility in (6) and ___UserID <> '')" if opts.extra_customization[self.OPT_SHOW_PREVIEWS] else "", \
recomendations=" or (Accessibility in (-1, 4, 6) and ___UserId = '')" if opts.extra_customization[self.OPT_SHOW_RECOMMENDATIONS] else "" \
)
elif self.supports_series():
query= ("select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, " \ query= ("select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, " \
"ImageID, ReadStatus, ___ExpirationStatus, FavouritesIndex, Accessibility, " \ "ImageID, ReadStatus, ___ExpirationStatus, FavouritesIndex, Accessibility, " \
"IsDownloaded, Series, SeriesNumber, ___UserID " \ "IsDownloaded, Series, SeriesNumber, ___UserID " \
@ -1627,7 +1641,7 @@ class KOBOTOUCH(KOBO):
" and ((Accessibility = -1 and IsDownloaded in ('true', 1)) or (Accessibility in (1,2)) %(previews)s %(recomendations)s )" \ " and ((Accessibility = -1 and IsDownloaded in ('true', 1)) or (Accessibility in (1,2)) %(previews)s %(recomendations)s )" \
" and not ((___ExpirationStatus=3 or ___ExpirationStatus is Null) %(expiry)s") % \ " and not ((___ExpirationStatus=3 or ___ExpirationStatus is Null) %(expiry)s") % \
dict(\ dict(\
expiry=" and ContentType = 6)" if opts.extra_customization[self.OPT_SHOW_EXPIRED_BOOK_RECORDS] else ")", \ expiry=" and ContentType = 6)" if opts.extra_customization[self.OPT_SHOW_ARCHIVED_BOOK_RECORDS] else ")", \
previews=" or (Accessibility in (6) and ___UserID <> '')" if opts.extra_customization[self.OPT_SHOW_PREVIEWS] else "", \ previews=" or (Accessibility in (6) and ___UserID <> '')" if opts.extra_customization[self.OPT_SHOW_PREVIEWS] else "", \
recomendations=" or (Accessibility in (-1, 4, 6) and ___UserId = '')" if opts.extra_customization[self.OPT_SHOW_RECOMMENDATIONS] else "" \ recomendations=" or (Accessibility in (-1, 4, 6) and ___UserId = '')" if opts.extra_customization[self.OPT_SHOW_RECOMMENDATIONS] else "" \
) )
@ -1638,7 +1652,7 @@ class KOBOTOUCH(KOBO):
' from content ' \ ' from content ' \
' where BookID is Null %(previews)s %(recomendations)s and not ((___ExpirationStatus=3 or ___ExpirationStatus is Null) %(expiry)s') % \ ' where BookID is Null %(previews)s %(recomendations)s and not ((___ExpirationStatus=3 or ___ExpirationStatus is Null) %(expiry)s') % \
dict(\ dict(\
expiry=' and ContentType = 6)' if opts.extra_customization[self.OPT_SHOW_EXPIRED_BOOK_RECORDS] else ')', \ expiry=' and ContentType = 6)' if opts.extra_customization[self.OPT_SHOW_ARCHIVED_BOOK_RECORDS] else ')', \
previews=' and Accessibility <> 6' if opts.extra_customization[self.OPT_SHOW_PREVIEWS] == False else '', \ previews=' and Accessibility <> 6' if opts.extra_customization[self.OPT_SHOW_PREVIEWS] == False else '', \
recomendations=' and IsDownloaded in (\'true\', 1)' if opts.extra_customization[self.OPT_SHOW_RECOMMENDATIONS] == False else ''\ recomendations=' and IsDownloaded in (\'true\', 1)' if opts.extra_customization[self.OPT_SHOW_RECOMMENDATIONS] == False else ''\
) )
@ -1648,7 +1662,7 @@ class KOBOTOUCH(KOBO):
'"1" as IsDownloaded, null as Series, null as SeriesNumber, ___UserID' \ '"1" as IsDownloaded, null as Series, null as SeriesNumber, ___UserID' \
' from content where ' \ ' from content where ' \
'BookID is Null and not ((___ExpirationStatus=3 or ___ExpirationStatus is Null) %(expiry)s') % dict(expiry=' and ContentType = 6)' \ 'BookID is Null and not ((___ExpirationStatus=3 or ___ExpirationStatus is Null) %(expiry)s') % dict(expiry=' and ContentType = 6)' \
if opts.extra_customization[self.OPT_SHOW_EXPIRED_BOOK_RECORDS] else ')') if opts.extra_customization[self.OPT_SHOW_ARCHIVED_BOOK_RECORDS] else ')')
else: else:
query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \ query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \
'ImageID, ReadStatus, "-1" as ___ExpirationStatus, "-1" as FavouritesIndex, "-1" as Accessibility, ' \ 'ImageID, ReadStatus, "-1" as ___ExpirationStatus, "-1" as FavouritesIndex, "-1" as Accessibility, ' \
@ -2586,7 +2600,7 @@ class KOBOTOUCH(KOBO):
def modify_database_check(self, function): def modify_database_check(self, function):
# Checks to see whether the database version is supported # Checks to see whether the database version is supported
# and whether the user has chosen to support the firmware version # and whether the user has chosen to support the firmware version
# debug_print("KoboTouch:modify_database_check - self.fwversion <= self.max_supported_fwversion=", self.fwversion > self.max_supported_fwversion) # debug_print("KoboTouch:modify_database_check - self.fwversion > self.max_supported_fwversion=", self.fwversion > self.max_supported_fwversion)
if self.dbversion > self.supported_dbversion or self.fwversion > self.max_supported_fwversion: if self.dbversion > self.supported_dbversion or self.fwversion > self.max_supported_fwversion:
# Unsupported database # Unsupported database
opts = self.settings() opts = self.settings()

View File

@ -63,7 +63,6 @@ class TXTInput(InputFormatPlugin):
normalize_line_endings, convert_textile, remove_indents, normalize_line_endings, convert_textile, remove_indents,
block_to_single_line, separate_hard_scene_breaks) block_to_single_line, separate_hard_scene_breaks)
self.log = log self.log = log
txt = '' txt = ''
log.debug('Reading text from file...') log.debug('Reading text from file...')
@ -92,8 +91,15 @@ class TXTInput(InputFormatPlugin):
log.debug('Using user specified input encoding of %s' % ienc) log.debug('Using user specified input encoding of %s' % ienc)
else: else:
det_encoding = detect(txt) det_encoding = detect(txt)
ienc = det_encoding['encoding'] det_encoding, confidence = det_encoding['encoding'], det_encoding['confidence']
log.debug('Detected input encoding as %s with a confidence of %s%%' % (ienc, det_encoding['confidence'] * 100)) if det_encoding and det_encoding.lower().replace('_', '-').strip() in (
'gb2312', 'chinese', 'csiso58gb231280', 'euc-cn', 'euccn',
'eucgb2312-cn', 'gb2312-1980', 'gb2312-80', 'iso-ir-58'):
# Microsoft Word exports to HTML with encoding incorrectly set to
# gb2312 instead of gbk. gbk is a superset of gb2312, anyway.
det_encoding = 'gbk'
ienc = det_encoding
log.debug('Detected input encoding as %s with a confidence of %s%%' % (ienc, confidence * 100))
if not ienc: if not ienc:
ienc = 'utf-8' ienc = 'utf-8'
log.debug('No input encoding specified and could not auto detect using %s' % ienc) log.debug('No input encoding specified and could not auto detect using %s' % ienc)

View File

@ -0,0 +1,11 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
class InvalidDOCX(ValueError):
pass

View File

@ -0,0 +1,267 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
from collections import OrderedDict
from calibre.ebooks.docx.names import XPath, get
class Inherit:
pass
inherit = Inherit()
def binary_property(parent, name):
vals = XPath('./w:%s' % name)(parent)
if not vals:
return inherit
val = get(vals[0], 'w:val', 'on')
return True if val in {'on', '1', 'true'} else False
def simple_color(col, auto='black'):
if not col or col == 'auto' or len(col) != 6:
return auto
return '#'+col
def simple_float(val, mult=1.0):
try:
return float(val) * mult
except (ValueError, TypeError, AttributeError, KeyError):
return None
LINE_STYLES = { # {{{
'basicBlackDashes': 'dashed',
'basicBlackDots': 'dotted',
'basicBlackSquares': 'dashed',
'basicThinLines': 'solid',
'dashDotStroked': 'groove',
'dashed': 'dashed',
'dashSmallGap': 'dashed',
'dotDash': 'dashed',
'dotDotDash': 'dashed',
'dotted': 'dotted',
'double': 'double',
'inset': 'inset',
'nil': 'none',
'none': 'none',
'outset': 'outset',
'single': 'solid',
'thick': 'solid',
'thickThinLargeGap': 'double',
'thickThinMediumGap': 'double',
'thickThinSmallGap' : 'double',
'thinThickLargeGap': 'double',
'thinThickMediumGap': 'double',
'thinThickSmallGap': 'double',
'thinThickThinLargeGap': 'double',
'thinThickThinMediumGap': 'double',
'thinThickThinSmallGap': 'double',
'threeDEmboss': 'ridge',
'threeDEngrave': 'groove',
'triple': 'double',
} # }}}
# Read from XML {{{
def read_border(parent, dest):
tvals = {'padding_%s':inherit, 'border_%s_width':inherit,
'border_%s_style':inherit, 'border_%s_color':inherit}
vals = {}
for edge in ('left', 'top', 'right', 'bottom'):
vals.update({k % edge:v for k, v in tvals.iteritems()})
for border in XPath('./w:pBdr')(parent):
for edge in ('left', 'top', 'right', 'bottom'):
for elem in XPath('./w:%s' % edge):
color = get(elem, 'w:color')
if color is not None:
vals['border_%s_color' % edge] = simple_color(color)
style = get(elem, 'w:val')
if style is not None:
vals['border_%s_style' % edge] = LINE_STYLES.get(style, 'solid')
space = get(elem, 'w:space')
if space is not None:
try:
vals['padding_%s' % edge] = float(space)
except (ValueError, TypeError):
pass
sz = get(elem, 'w:sz')
if sz is not None:
# we dont care about art borders (they are only used for page borders)
try:
vals['border_%s_width' % edge] = min(96, max(2, float(sz))) / 8
except (ValueError, TypeError):
pass
for key, val in vals.iteritems():
setattr(dest, key, val)
def read_indent(parent, dest):
padding_left = padding_right = text_indent = inherit
for indent in XPath('./w:ind')(parent):
l, lc = get(indent, 'w:left'), get(indent, 'w:leftChars')
pl = simple_float(lc, 0.01) if lc is not None else simple_float(l, 0.05) if l is not None else None
if pl is not None:
padding_left = '%.3g%s' % (pl, 'em' if lc is not None else 'pt')
r, rc = get(indent, 'w:right'), get(indent, 'w:rightChars')
pr = simple_float(rc, 0.01) if rc is not None else simple_float(r, 0.05) if r is not None else None
if pr is not None:
padding_right = '%.3g%s' % (pr, 'em' if rc is not None else 'pt')
h, hc = get(indent, 'w:hanging'), get(indent, 'w:hangingChars')
fl, flc = get(indent, 'w:firstLine'), get(indent, 'w:firstLineChars')
h = h if h is None else '-'+h
hc = hc if hc is None else '-'+hc
ti = (simple_float(hc, 0.01) if hc is not None else simple_float(h, 0.05) if h is not None else
simple_float(flc, 0.01) if flc is not None else simple_float(fl, 0.05) if fl is not None else None)
if ti is not None:
text_indent = '%.3g%s' % (ti, 'em' if hc is not None or (h is None and flc is not None) else 'pt')
setattr(dest, 'margin_left', padding_left)
setattr(dest, 'margin_right', padding_right)
setattr(dest, 'text_indent', text_indent)
def read_justification(parent, dest):
ans = inherit
for jc in XPath('./w:jc[@w:val]')(parent):
val = get(jc, 'w:val')
if not val:
continue
if val in {'both', 'distribute'} or 'thai' in val or 'kashida' in val:
ans = 'justify'
if val in {'left', 'center', 'right',}:
ans = val
setattr(dest, 'text_align', ans)
def read_spacing(parent, dest):
padding_top = padding_bottom = line_height = inherit
for s in XPath('./w:spacing')(parent):
a, al, aa = get(s, 'w:after'), get(s, 'w:afterLines'), get(s, 'w:afterAutospacing')
pb = None if aa in {'on', '1', 'true'} else simple_float(al, 0.02) if al is not None else simple_float(a, 0.05) if a is not None else None
if pb is not None:
padding_bottom = '%.3g%s' % (pb, 'ex' if al is not None else 'pt')
b, bl, bb = get(s, 'w:before'), get(s, 'w:beforeLines'), get(s, 'w:beforeAutospacing')
pt = None if bb in {'on', '1', 'true'} else simple_float(bl, 0.02) if bl is not None else simple_float(b, 0.05) if b is not None else None
if pt is not None:
padding_top = '%.3g%s' % (pt, 'ex' if bl is not None else 'pt')
l, lr = get(s, 'w:line'), get(s, 'w:lineRule', 'auto')
if l is not None:
lh = simple_float(l, 0.05) if lr in {'exactly', 'atLeast'} else simple_float(l, 1/240.0)
line_height = '%.3g%s' % (lh, 'pt' if lr in {'exactly', 'atLeast'} else '')
setattr(dest, 'margin_top', padding_top)
setattr(dest, 'margin_bottom', padding_bottom)
setattr(dest, 'line_height', line_height)
def read_direction(parent, dest):
ans = inherit
for jc in XPath('./w:textFlow[@w:val]')(parent):
val = get(jc, 'w:val')
if not val:
continue
if 'rl' in val.lower():
ans = 'rtl'
setattr(dest, 'direction', ans)
def read_shd(parent, dest):
ans = inherit
for shd in XPath('./w:shd[@w:fill]')(parent):
val = get(shd, 'w:fill')
if val:
ans = simple_color(val, auto='transparent')
setattr(dest, 'background_color', ans)
# }}}
class ParagraphStyle(object):
all_properties = (
'adjustRightInd', 'autoSpaceDE', 'autoSpaceDN', 'bidi',
'contextualSpacing', 'keepLines', 'keepNext', 'mirrorIndents',
'pageBreakBefore', 'snapToGrid', 'suppressLineNumbers',
'suppressOverlap', 'topLinePunct', 'widowControl', 'wordWrap',
# Border margins padding
'border_left_width', 'border_left_style', 'border_left_color', 'padding_left',
'border_top_width', 'border_top_style', 'border_top_color', 'padding_top',
'border_right_width', 'border_right_style', 'border_right_color', 'padding_right',
'border_bottom_width', 'border_bottom_style', 'border_bottom_color', 'padding_bottom',
'margin_left', 'margin_top', 'margin_right', 'margin_bottom',
# Misc.
'text_indent', 'text_align', 'line_height', 'direction', 'background_color',
)
def __init__(self, pPr=None):
self.linked_style = None
if pPr is None:
for p in self.all_properties:
setattr(self, p, inherit)
else:
for p in (
'adjustRightInd', 'autoSpaceDE', 'autoSpaceDN', 'bidi',
'contextualSpacing', 'keepLines', 'keepNext', 'mirrorIndents',
'pageBreakBefore', 'snapToGrid', 'suppressLineNumbers',
'suppressOverlap', 'topLinePunct', 'widowControl', 'wordWrap',
):
setattr(self, p, binary_property(pPr, p))
for x in ('border', 'indent', 'justification', 'spacing', 'direction', 'shd'):
f = globals()['read_%s' % x]
f(pPr, self)
for s in XPath('./w:pStyle[@w:val]')(pPr):
self.linked_style = get(s, 'w:val')
self._css = None
def update(self, other):
for prop in self.all_properties:
nval = getattr(other, prop)
if nval is not inherit:
setattr(self, prop, nval)
if other.linked_style is not None:
self.linked_style = other.linked_style
def resolve_based_on(self, parent):
for p in self.all_properties:
val = getattr(self, p)
if val is inherit:
setattr(self, p, getattr(parent, p))
@property
def css(self):
if self._css is None:
self._css = c = OrderedDict()
if self.keepLines is True:
c['page-break-inside'] = 'avoid'
if self.pageBreakBefore is True:
c['page-break-before'] = 'always'
for edge in ('left', 'top', 'right', 'bottom'):
val = getattr(self, 'border_%s_width' % edge)
if val is not inherit:
c['border-left-width'] = '%.3gpt' % val
for x in ('style', 'color'):
val = getattr(self, 'border_%s_%s' % (edge, x))
if val is not inherit:
c['border-%s-%s' % (edge, x)] = val
val = getattr(self, 'padding_%s' % edge)
if val is not inherit:
c['padding-%s' % edge] = '%.3gpt' % val
val = getattr(self, 'margin_%s' % edge)
if val is not inherit:
c['margin-%s' % edge] = val
for x in ('text_indent', 'text_align', 'line_height', 'background_color'):
val = getattr(self, x)
if val is not inherit:
c[x.replace('_', '-')] = val
return self._css
# TODO: keepNext must be done at markup level

View File

@ -0,0 +1,230 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
from collections import OrderedDict
from calibre.ebooks.docx.block_styles import ( # noqa
inherit, simple_color, LINE_STYLES, simple_float, binary_property, read_shd)
from calibre.ebooks.docx.names import XPath, get
# Read from XML {{{
def read_text_border(parent, dest):
border_color = border_style = border_width = padding = inherit
elems = XPath('./w:bdr')(parent)
if elems:
border_color = simple_color('auto')
border_style = 'solid'
border_width = 1
for elem in elems:
color = get(elem, 'w:color')
if color is not None:
border_color = simple_color(color)
style = get(elem, 'w:val')
if style is not None:
border_style = LINE_STYLES.get(style, 'solid')
space = get(elem, 'w:space')
if space is not None:
try:
padding = float(space)
except (ValueError, TypeError):
pass
sz = get(elem, 'w:sz')
if sz is not None:
# we dont care about art borders (they are only used for page borders)
try:
border_width = min(96, max(2, float(sz))) / 8
except (ValueError, TypeError):
pass
setattr(dest, 'border_color', border_color)
setattr(dest, 'border_style', border_style)
setattr(dest, 'border_width', border_width)
setattr(dest, 'padding', padding)
def read_color(parent, dest):
ans = inherit
for col in XPath('./w:color[@w:val]')(parent):
val = get(col, 'w:val')
if not val:
continue
ans = simple_color(val)
setattr(dest, 'color', ans)
def read_highlight(parent, dest):
ans = inherit
for col in XPath('./w:highlight[@w:val]')(parent):
val = get(col, 'w:val')
if not val:
continue
if not val or val == 'none':
val = 'transparent'
ans = val
setattr(dest, 'highlight', ans)
def read_lang(parent, dest):
ans = inherit
for col in XPath('./w:lang[@w:val]')(parent):
val = get(col, 'w:val')
if not val:
continue
try:
code = int(val, 16)
except (ValueError, TypeError):
ans = val
else:
from calibre.ebooks.docx.lcid import lcid
val = lcid.get(code, None)
if val:
ans = val
setattr(dest, 'lang', ans)
def read_letter_spacing(parent, dest):
ans = inherit
for col in XPath('./w:spacing[@w:val]')(parent):
val = simple_float(get(col, 'w:val'), 0.05)
if val is not None:
ans = val
setattr(dest, 'letter_spacing', ans)
def read_sz(parent, dest):
ans = inherit
for col in XPath('./w:sz[@w:val]')(parent):
val = simple_float(get(col, 'w:val'), 0.5)
if val is not None:
ans = val
setattr(dest, 'font_size', ans)
def read_underline(parent, dest):
ans = inherit
for col in XPath('./w:u[@w:val]')(parent):
val = get(col, 'w:val')
if val:
ans = 'underline'
setattr(dest, 'text_decoration', ans)
def read_vert_align(parent, dest):
ans = inherit
for col in XPath('./w:vertAlign[@w:val]')(parent):
val = get(col, 'w:val')
if val and val in {'baseline', 'subscript', 'superscript'}:
ans = val
setattr(dest, 'vert_align', ans)
# }}}
class RunStyle(object):
all_properties = {
'b', 'bCs', 'caps', 'cs', 'dstrike', 'emboss', 'i', 'iCs', 'imprint',
'rtl', 'shadow', 'smallCaps', 'strike', 'vanish',
'border_color', 'border_style', 'border_width', 'padding', 'color', 'highlight', 'background_color',
'letter_spacing', 'font_size', 'text_decoration', 'vert_align', 'lang',
}
toggle_properties = {
'b', 'bCs', 'caps', 'emboss', 'i', 'iCs', 'imprint', 'shadow', 'smallCaps', 'strike', 'dstrike', 'vanish',
}
def __init__(self, rPr=None):
self.linked_style = None
if rPr is None:
for p in self.all_properties:
setattr(self, p, inherit)
else:
for p in (
'b', 'bCs', 'caps', 'cs', 'dstrike', 'emboss', 'i', 'iCs', 'imprint', 'rtl', 'shadow',
'smallCaps', 'strike', 'vanish',
):
setattr(self, p, binary_property(rPr, p))
for x in ('text_border', 'color', 'highlight', 'shd', 'letter_spacing', 'sz', 'underline', 'vert_align', 'lang'):
f = globals()['read_%s' % x]
f(rPr, self)
for s in XPath('./w:rStyle[@w:val]')(rPr):
self.linked_style = get(s, 'w:val')
self._css = None
def update(self, other):
for prop in self.all_properties:
nval = getattr(other, prop)
if nval is not inherit:
setattr(self, prop, nval)
if other.linked_style is not None:
self.linked_style = other.linked_style
def resolve_based_on(self, parent):
for p in self.all_properties:
val = getattr(self, p)
if val is inherit:
setattr(self, p, getattr(parent, p))
@property
def css(self):
if self._css is None:
c = self._css = OrderedDict()
td = set()
if self.text_decoration is not inherit:
td.add(self.text_decoration)
if self.strike:
td.add('line-through')
if self.dstrike:
td.add('line-through')
if td:
c['text-decoration'] = ' '.join(td)
if self.caps is True:
c['text-transform'] = 'uppercase'
if self.i is True:
c['font-style'] = 'italic'
if self.shadow:
c['text-shadow'] = '2px 2px'
if self.smallCaps is True:
c['font-variant'] = 'small-caps'
if self.vanish is True:
c['display'] = 'none'
for x in ('color', 'style', 'width'):
val = getattr(self, 'border_'+x)
if x == 'width' and val is not inherit:
val = '%.3gpt' % val
if val is not inherit:
c['border-%s' % x] = val
if self.padding is not inherit:
c['padding'] = '%.3gpt' % self.padding
for x in ('color', 'background_color'):
val = getattr(self, x)
if val is not inherit:
c[x.replace('_', '-')] = val
for x in ('letter_spacing', 'font_size'):
val = getattr(self, x)
if val is not inherit:
c[x.replace('_', '-')] = '%.3gpt' % val
if self.highlight is not inherit and self.highlight != 'transparent':
c['background-color'] = self.highlight
if self.b:
c['font-weight'] = 'bold'
return self._css
def same_border(self, other):
for x in (self, other):
has_border = False
for y in ('color', 'style', 'width'):
if ('border-%s' % y) in x.css:
has_border = True
break
if not has_border:
return False
s = tuple(self.css.get('border-%s' % y, None) for y in ('color', 'style', 'width'))
o = tuple(other.css.get('border-%s' % y, None) for y in ('color', 'style', 'width'))
return s == o

View File

@ -0,0 +1,231 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
import os, sys, shutil
from lxml import etree
from calibre import walk, guess_type
from calibre.ebooks.metadata import string_to_authors
from calibre.ebooks.metadata.book.base import Metadata
from calibre.ebooks.docx import InvalidDOCX
from calibre.ebooks.docx.names import DOCUMENT, DOCPROPS, XPath, APPPROPS
from calibre.ptempfile import PersistentTemporaryDirectory
from calibre.utils.localization import canonicalize_lang
from calibre.utils.logging import default_log
from calibre.utils.zipfile import ZipFile
from calibre.ebooks.oeb.parse_utils import RECOVER_PARSER
def fromstring(raw, parser=RECOVER_PARSER):
return etree.fromstring(raw, parser=parser)
# Read metadata {{{
def read_doc_props(raw, mi):
root = fromstring(raw)
titles = XPath('//dc:title')(root)
if titles:
title = titles[0].text
if title and title.strip():
mi.title = title.strip()
tags = []
for subject in XPath('//dc:subject')(root):
if subject.text and subject.text.strip():
tags.append(subject.text.strip().replace(',', '_'))
for keywords in XPath('//cp:keywords')(root):
if keywords.text and keywords.text.strip():
for x in keywords.text.split():
tags.extend(y.strip() for y in x.split(','))
if tags:
mi.tags = tags
authors = XPath('//dc:creator')(root)
aut = []
for author in authors:
if author.text and author.text.strip():
aut.extend(string_to_authors(author.text))
if aut:
mi.authors = aut
desc = XPath('//dc:description')(root)
if desc:
raw = etree.tostring(desc[0], method='text', encoding=unicode)
mi.comments = raw
langs = []
for lang in XPath('//dc:language')(root):
if lang.text and lang.text.strip():
l = canonicalize_lang(lang.text)
if l:
langs.append(l)
if langs:
mi.languages = langs
def read_app_props(raw, mi):
root = fromstring(raw)
company = root.xpath('//*[local-name()="Company"]')
if company and company[0].text and company[0].text.strip():
mi.publisher = company[0].text.strip()
# }}}
class DOCX(object):
def __init__(self, path_or_stream, log=None, extract=True):
stream = path_or_stream if hasattr(path_or_stream, 'read') else open(path_or_stream, 'rb')
self.name = getattr(stream, 'name', None) or '<stream>'
self.log = log or default_log
if extract:
self.extract(stream)
else:
self.init_zipfile(stream)
self.read_content_types()
self.read_package_relationships()
def init_zipfile(self, stream):
self.zipf = ZipFile(stream)
self.names = frozenset(self.zipf.namelist())
def extract(self, stream):
self.tdir = PersistentTemporaryDirectory('docx_container')
try:
zf = ZipFile(stream)
zf.extractall(self.tdir)
except:
self.log.exception('DOCX appears to be invalid ZIP file, trying a'
' more forgiving ZIP parser')
from calibre.utils.localunzip import extractall
stream.seek(0)
extractall(stream, self.tdir)
self.names = {}
for f in walk(self.tdir):
name = os.path.relpath(f, self.tdir).replace(os.sep, '/')
self.names[name] = f
def exists(self, name):
return name in self.names
def read(self, name):
if hasattr(self, 'zipf'):
return self.zipf.open(name).read()
path = self.names[name]
with open(path, 'rb') as f:
return f.read()
def read_content_types(self):
try:
raw = self.read('[Content_Types].xml')
except KeyError:
raise InvalidDOCX('The file %s docx file has no [Content_Types].xml' % self.name)
root = fromstring(raw)
self.content_types = {}
self.default_content_types = {}
for item in root.xpath('//*[local-name()="Types"]/*[local-name()="Default" and @Extension and @ContentType]'):
self.default_content_types[item.get('Extension').lower()] = item.get('ContentType')
for item in root.xpath('//*[local-name()="Types"]/*[local-name()="Override" and @PartName and @ContentType]'):
name = item.get('PartName').lstrip('/')
self.content_types[name] = item.get('ContentType')
def content_type(self, name):
if name in self.content_types:
return self.content_types[name]
ext = name.rpartition('.')[-1].lower()
if ext in self.default_content_types:
return self.default_content_types[ext]
return guess_type(name)[0]
def read_package_relationships(self):
try:
raw = self.read('_rels/.rels')
except KeyError:
raise InvalidDOCX('The file %s docx file has no _rels/.rels' % self.name)
root = fromstring(raw)
self.relationships = {}
self.relationships_rmap = {}
for item in root.xpath('//*[local-name()="Relationships"]/*[local-name()="Relationship" and @Type and @Target]'):
target = item.get('Target').lstrip('/')
typ = item.get('Type')
self.relationships[typ] = target
self.relationships_rmap[target] = typ
@property
def document_name(self):
name = self.relationships.get(DOCUMENT, None)
if name is None:
names = tuple(n for n in self.names if n == 'document.xml' or n.endswith('/document.xml'))
if not names:
raise InvalidDOCX('The file %s docx file has no main document' % self.name)
name = names[0]
return name
@property
def document(self):
return fromstring(self.read(self.document_name))
@property
def document_relationships(self):
name = self.document_name
base = '/'.join(name.split('/')[:-1])
by_id, by_type = {}, {}
parts = name.split('/')
name = '/'.join(parts[:-1] + ['_rels', parts[-1] + '.rels'])
try:
raw = self.read(name)
except KeyError:
pass
else:
root = fromstring(raw)
for item in root.xpath('//*[local-name()="Relationships"]/*[local-name()="Relationship" and @Type and @Target]'):
target = '/'.join((base, item.get('Target').lstrip('/')))
typ = item.get('Type')
Id = item.get('Id')
by_id[Id] = by_type[typ] = target
return by_id, by_type
@property
def metadata(self):
mi = Metadata(_('Unknown'))
name = self.relationships.get(DOCPROPS, None)
if name is None:
names = tuple(n for n in self.names if n.lower() == 'docprops/core.xml')
if names:
name = names[0]
if name:
try:
raw = self.read(name)
except KeyError:
pass
else:
read_doc_props(raw, mi)
name = self.relationships.get(APPPROPS, None)
if name is None:
names = tuple(n for n in self.names if n.lower() == 'docprops/app.xml')
if names:
name = names[0]
if name:
try:
raw = self.read(name)
except KeyError:
pass
else:
read_app_props(raw, mi)
return mi
def close(self):
if hasattr(self, 'zipf'):
self.zipf.close()
else:
try:
shutil.rmtree(self.tdir)
except EnvironmentError:
pass
if __name__ == '__main__':
d = DOCX(sys.argv[-1], extract=False)
print (d.metadata)

View File

@ -0,0 +1,233 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
lcid = {
1078: 'af', # Afrikaans - South Africa
1052: 'sq', # Albanian - Albania
1118: 'am', # Amharic - Ethiopia
1025: 'ar', # Arabic - Saudi Arabia
5121: 'ar', # Arabic - Algeria
15361: 'ar', # Arabic - Bahrain
3073: 'ar', # Arabic - Egypt
2049: 'ar', # Arabic - Iraq
11265: 'ar', # Arabic - Jordan
13313: 'ar', # Arabic - Kuwait
12289: 'ar', # Arabic - Lebanon
4097: 'ar', # Arabic - Libya
6145: 'ar', # Arabic - Morocco
8193: 'ar', # Arabic - Oman
16385: 'ar', # Arabic - Qatar
10241: 'ar', # Arabic - Syria
7169: 'ar', # Arabic - Tunisia
14337: 'ar', # Arabic - U.A.E.
9217: 'ar', # Arabic - Yemen
1067: 'hy', # Armenian - Armenia
1101: 'as', # Assamese
2092: 'az', # Azeri (Cyrillic)
1068: 'az', # Azeri (Latin)
1069: 'eu', # Basque
1059: 'be', # Belarusian
1093: 'bn', # Bengali (India)
2117: 'bn', # Bengali (Bangladesh)
5146: 'bs', # Bosnian (Bosnia/Herzegovina)
1026: 'bg', # Bulgarian
1109: 'my', # Burmese
1027: 'ca', # Catalan
1116: 'chr', # Cherokee - United States
2052: 'zh', # Chinese - People's Republic of China
4100: 'zh', # Chinese - Singapore
1028: 'zh', # Chinese - Taiwan
3076: 'zh', # Chinese - Hong Kong SAR
5124: 'zh', # Chinese - Macao SAR
1050: 'hr', # Croatian
4122: 'hr', # Croatian (Bosnia/Herzegovina)
1029: 'cs', # Czech
1030: 'da', # Danish
1125: 'dv', # Divehi
1043: 'nl', # Dutch - Netherlands
2067: 'nl', # Dutch - Belgium
1126: 'bin', # Edo
1033: 'en', # English - United States
2057: 'en', # English - United Kingdom
3081: 'en', # English - Australia
10249: 'en', # English - Belize
4105: 'en', # English - Canada
9225: 'en', # English - Caribbean
15369: 'en', # English - Hong Kong SAR
16393: 'en', # English - India
14345: 'en', # English - Indonesia
6153: 'en', # English - Ireland
8201: 'en', # English - Jamaica
17417: 'en', # English - Malaysia
5129: 'en', # English - New Zealand
13321: 'en', # English - Philippines
18441: 'en', # English - Singapore
7177: 'en', # English - South Africa
11273: 'en', # English - Trinidad
12297: 'en', # English - Zimbabwe
1061: 'et', # Estonian
1080: 'fo', # Faroese
1065: None, # TODO: Farsi
1124: 'fil', # Filipino
1035: 'fi', # Finnish
1036: 'fr', # French - France
2060: 'fr', # French - Belgium
11276: 'fr', # French - Cameroon
3084: 'fr', # French - Canada
9228: 'fr', # French - Democratic Rep. of Congo
12300: 'fr', # French - Cote d'Ivoire
15372: 'fr', # French - Haiti
5132: 'fr', # French - Luxembourg
13324: 'fr', # French - Mali
6156: 'fr', # French - Monaco
14348: 'fr', # French - Morocco
58380: 'fr', # French - North Africa
8204: 'fr', # French - Reunion
10252: 'fr', # French - Senegal
4108: 'fr', # French - Switzerland
7180: 'fr', # French - West Indies
1122: 'fy', # Frisian - Netherlands
1127: None, # TODO: Fulfulde - Nigeria
1071: 'mk', # FYRO Macedonian
2108: 'ga', # Gaelic (Ireland)
1084: 'gd', # Gaelic (Scotland)
1110: 'gl', # Galician
1079: 'ka', # Georgian
1031: 'de', # German - Germany
3079: 'de', # German - Austria
5127: 'de', # German - Liechtenstein
4103: 'de', # German - Luxembourg
2055: 'de', # German - Switzerland
1032: 'el', # Greek
1140: 'gn', # Guarani - Paraguay
1095: 'gu', # Gujarati
1128: 'ha', # Hausa - Nigeria
1141: 'haw', # Hawaiian - United States
1037: 'he', # Hebrew
1081: 'hi', # Hindi
1038: 'hu', # Hungarian
1129: None, # TODO: Ibibio - Nigeria
1039: 'is', # Icelandic
1136: 'ig', # Igbo - Nigeria
1057: 'id', # Indonesian
1117: 'iu', # Inuktitut
1040: 'it', # Italian - Italy
2064: 'it', # Italian - Switzerland
1041: 'ja', # Japanese
1099: 'kn', # Kannada
1137: 'kr', # Kanuri - Nigeria
2144: 'ks', # Kashmiri
1120: 'ks', # Kashmiri (Arabic)
1087: 'kk', # Kazakh
1107: 'km', # Khmer
1111: 'kok', # Konkani
1042: 'ko', # Korean
1088: 'ky', # Kyrgyz (Cyrillic)
1108: 'lo', # Lao
1142: 'la', # Latin
1062: 'lv', # Latvian
1063: 'lt', # Lithuanian
1086: 'ms', # Malay - Malaysia
2110: 'ms', # Malay - Brunei Darussalam
1100: 'ml', # Malayalam
1082: 'mt', # Maltese
1112: 'mni', # Manipuri
1153: 'mi', # Maori - New Zealand
1102: 'mr', # Marathi
1104: 'mn', # Mongolian (Cyrillic)
2128: 'mn', # Mongolian (Mongolian)
1121: 'ne', # Nepali
2145: 'ne', # Nepali - India
1044: 'no', # Norwegian (Bokmᅢᆬl)
2068: 'no', # Norwegian (Nynorsk)
1096: 'or', # Oriya
1138: 'om', # Oromo
1145: 'pap', # Papiamentu
1123: 'ps', # Pashto
1045: 'pl', # Polish
1046: 'pt', # Portuguese - Brazil
2070: 'pt', # Portuguese - Portugal
1094: 'pa', # Punjabi
2118: 'pa', # Punjabi (Pakistan)
1131: 'qu', # Quecha - Bolivia
2155: 'qu', # Quecha - Ecuador
3179: 'qu', # Quecha - Peru
1047: 'rm', # Rhaeto-Romanic
1048: 'ro', # Romanian
2072: 'ro', # Romanian - Moldava
1049: 'ru', # Russian
2073: 'ru', # Russian - Moldava
1083: 'se', # Sami (Lappish)
1103: 'sa', # Sanskrit
1132: 'nso', # Sepedi
3098: 'sr', # Serbian (Cyrillic)
2074: 'sr', # Serbian (Latin)
1113: 'sd', # Sindhi - India
2137: 'sd', # Sindhi - Pakistan
1115: 'si', # Sinhalese - Sri Lanka
1051: 'sk', # Slovak
1060: 'sl', # Slovenian
1143: 'so', # Somali
1070: 'wen', # Sorbian
3082: 'es', # Spanish - Spain (Modern Sort)
1034: 'es', # Spanish - Spain (Traditional Sort)
11274: 'es', # Spanish - Argentina
16394: 'es', # Spanish - Bolivia
13322: 'es', # Spanish - Chile
9226: 'es', # Spanish - Colombia
5130: 'es', # Spanish - Costa Rica
7178: 'es', # Spanish - Dominican Republic
12298: 'es', # Spanish - Ecuador
17418: 'es', # Spanish - El Salvador
4106: 'es', # Spanish - Guatemala
18442: 'es', # Spanish - Honduras
58378: 'es', # Spanish - Latin America
2058: 'es', # Spanish - Mexico
19466: 'es', # Spanish - Nicaragua
6154: 'es', # Spanish - Panama
15370: 'es', # Spanish - Paraguay
10250: 'es', # Spanish - Peru
20490: 'es', # Spanish - Puerto Rico
21514: 'es', # Spanish - United States
14346: 'es', # Spanish - Uruguay
8202: 'es', # Spanish - Venezuela
1072: None, # TODO: Sutu
1089: 'sw', # Swahili
1053: 'sv', # Swedish
2077: 'sv', # Swedish - Finland
1114: 'syr', # Syriac
1064: 'tg', # Tajik
1119: None, # TODO: Tamazight (Arabic)
2143: None, # TODO: Tamazight (Latin)
1097: 'ta', # Tamil
1092: 'tt', # Tatar
1098: 'te', # Telugu
1054: 'th', # Thai
2129: 'bo', # Tibetan - Bhutan
1105: 'bo', # Tibetan - People's Republic of China
2163: 'ti', # Tigrigna - Eritrea
1139: 'ti', # Tigrigna - Ethiopia
1073: 'ts', # Tsonga
1074: 'tn', # Tswana
1055: 'tr', # Turkish
1090: 'tk', # Turkmen
1152: 'ug', # Uighur - China
1058: 'uk', # Ukrainian
1056: 'ur', # Urdu
2080: 'ur', # Urdu - India
2115: 'uz', # Uzbek (Cyrillic)
1091: 'uz', # Uzbek (Latin)
1075: 've', # Venda
1066: 'vi', # Vietnamese
1106: 'cy', # Welsh
1076: 'xh', # Xhosa
1144: 'ii', # Yi
1085: 'yi', # Yiddish
1130: 'yo', # Yoruba
1077: 'zu' # Zulu
}

View File

@ -0,0 +1,65 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
from lxml.etree import XPath as X
DOCUMENT = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument'
DOCPROPS = 'http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties'
APPPROPS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties'
STYLES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles'
NUMBERING = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/numbering'
namespaces = {
'mo': 'http://schemas.microsoft.com/office/mac/office/2008/main',
'o': 'urn:schemas-microsoft-com:office:office',
've': 'http://schemas.openxmlformats.org/markup-compatibility/2006',
# Text Content
'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main',
'w10': 'urn:schemas-microsoft-com:office:word',
'wne': 'http://schemas.microsoft.com/office/word/2006/wordml',
'xml': 'http://www.w3.org/XML/1998/namespace',
# Drawing
'a': 'http://schemas.openxmlformats.org/drawingml/2006/main',
'm': 'http://schemas.openxmlformats.org/officeDocument/2006/math',
'mv': 'urn:schemas-microsoft-com:mac:vml',
'pic': 'http://schemas.openxmlformats.org/drawingml/2006/picture',
'v': 'urn:schemas-microsoft-com:vml',
'wp': 'http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing',
# Properties (core and extended)
'cp': 'http://schemas.openxmlformats.org/package/2006/metadata/core-properties',
'dc': 'http://purl.org/dc/elements/1.1/',
'ep': 'http://schemas.openxmlformats.org/officeDocument/2006/extended-properties',
'xsi': 'http://www.w3.org/2001/XMLSchema-instance',
# Content Types
'ct': 'http://schemas.openxmlformats.org/package/2006/content-types',
# Package Relationships
'r': 'http://schemas.openxmlformats.org/officeDocument/2006/relationships',
'pr': 'http://schemas.openxmlformats.org/package/2006/relationships',
# Dublin Core document properties
'dcmitype': 'http://purl.org/dc/dcmitype/',
'dcterms': 'http://purl.org/dc/terms/'
}
def XPath(expr):
return X(expr, namespaces=namespaces)
def is_tag(x, q):
tag = getattr(x, 'tag', x)
ns, name = q.partition(':')[0::2]
return '{%s}%s' % (namespaces.get(ns, None), name) == tag
def barename(x):
return x.rpartition('}')[-1]
def XML(x):
return '{%s}%s' % (namespaces['xml'], x)
def get(x, attr, default=None):
ns, name = attr.partition(':')[0::2]
return x.attrib.get('{%s}%s' % (namespaces[ns], name), default)

View File

@ -0,0 +1,156 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
from calibre.ebooks.docx.block_styles import ParagraphStyle
from calibre.ebooks.docx.char_styles import RunStyle
from calibre.ebooks.docx.names import XPath, get
STYLE_MAP = {
'aiueo': 'hiragana',
'aiueoFullWidth': 'hiragana',
'hebrew1': 'hebrew',
'iroha': 'katakana-iroha',
'irohaFullWidth': 'katakana-iroha',
'lowerLetter': 'lower-alpha',
'lowerRoman': 'lower-roman',
'none': 'none',
'upperLetter': 'upper-alpha',
'upperRoman': 'upper-roman',
'chineseCounting': 'cjk-ideographic',
'decimalZero': 'decimal-leading-zero',
}
class Level(object):
def __init__(self, lvl=None):
self.restart = None
self.start = 0
self.fmt = 'decimal'
self.para_link = None
self.paragraph_style = self.character_style = None
if lvl is not None:
self.read_from_xml(lvl)
def read_from_xml(self, lvl, override=False):
for lr in XPath('./w:lvlRestart[@w:val]')(lvl):
try:
self.restart = int(get(lr, 'w:val'))
except (TypeError, ValueError):
pass
for lr in XPath('./w:start[@w:val]')(lvl):
try:
self.start = int(get(lr, 'w:val'))
except (TypeError, ValueError):
pass
lt = None
for lr in XPath('./w:lvlText[@w:val]')(lvl):
lt = get(lr, 'w:val')
for lr in XPath('./w:numFmt[@w:val]')(lvl):
val = get(lr, 'w:val')
if val == 'bullet':
self.fmt = {'\uf0a7':'square', 'o':'circle'}.get(lt, 'disc')
else:
self.fmt = STYLE_MAP.get(val, 'decimal')
for lr in XPath('./w:pStyle[@w:val]')(lvl):
self.para_link = get(lr, 'w:val')
for pPr in XPath('./w:pPr')(lvl):
ps = ParagraphStyle(pPr)
if self.paragraph_style is None:
self.paragraph_style = ps
else:
self.paragraph_style.update(ps)
for rPr in XPath('./w:rPr')(lvl):
ps = RunStyle(rPr)
if self.character_style is None:
self.character_style = ps
else:
self.character_style.update(ps)
def copy(self):
ans = Level()
for x in ('restart', 'start', 'fmt', 'para_link', 'paragraph_style', 'character_style'):
setattr(ans, x, getattr(self, x))
return ans
class NumberingDefinition(object):
def __init__(self, parent=None):
self.levels = {}
if parent is not None:
for lvl in XPath('./w:lvl')(parent):
try:
ilvl = int(get(lvl, 'w:ilvl', 0))
except (TypeError, ValueError):
ilvl = 0
self.levels[ilvl] = Level(lvl)
def copy(self):
ans = NumberingDefinition()
for l, lvl in self.levels.iteritems():
ans.levels[l] = lvl.copy()
return ans
class Numbering(object):
def __init__(self):
self.definitions = {}
self.instances = {}
def __call__(self, root, styles):
' Read all numbering style definitions '
lazy_load = {}
for an in XPath('./w:abstractNum[@w:abstractNumId]')(root):
an_id = get(an, 'w:abstractNumId')
nsl = XPath('./w:numStyleLink[@w:val]')(an)
if nsl:
lazy_load[an_id] = get(nsl[0], 'w:val')
else:
nd = NumberingDefinition(an)
self.definitions[an_id] = nd
def create_instance(n, definition):
nd = definition.copy()
for lo in XPath('./w:lvlOverride')(n):
ilvl = get(lo, 'w:ilvl')
for lvl in XPath('./w:lvl')(lo)[:1]:
nilvl = get(lvl, 'w:ilvl')
ilvl = nilvl if ilvl is None else ilvl
alvl = nd.levels.get(ilvl, None)
if alvl is None:
alvl = Level()
alvl.read_from_xml(lvl, override=True)
next_pass = {}
for n in XPath('./w:num[@w:numId]')(root):
an_id = None
num_id = get(n, 'w:numId')
for an in XPath('./w:abstractNumId[@w:val]')(n):
an_id = get(an, 'w:val')
d = self.definitions.get(an_id, None)
if d is None:
next_pass[num_id] = (an_id, n)
continue
self.instances[num_id] = create_instance(n, d)
numbering_links = styles.numbering_style_links
for an_id, style_link in lazy_load.iteritems():
num_id = numbering_links[style_link]
self.definitions[an_id] = self.instances[num_id].copy()
for num_id, (an_id, n) in next_pass.iteritems():
d = self.definitions.get(an_id, None)
if d is not None:
self.instances[num_id] = create_instance(n, d)

View File

@ -0,0 +1,284 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
import textwrap
from collections import OrderedDict, Counter
from calibre.ebooks.docx.block_styles import ParagraphStyle, inherit
from calibre.ebooks.docx.char_styles import RunStyle
from calibre.ebooks.docx.names import XPath, get
class Style(object):
'''
Class representing a <w:style> element. Can contain block, character, etc. styles.
'''
name_path = XPath('./w:name[@w:val]')
based_on_path = XPath('./w:basedOn[@w:val]')
def __init__(self, elem):
self.resolved = False
self.style_id = get(elem, 'w:styleId')
self.style_type = get(elem, 'w:type')
names = self.name_path(elem)
self.name = get(names[-1], 'w:val') if names else None
based_on = self.based_on_path(elem)
self.based_on = get(based_on[0], 'w:val') if based_on else None
if self.style_type == 'numbering':
self.based_on = None
self.is_default = get(elem, 'w:default') in {'1', 'on', 'true'}
self.paragraph_style = self.character_style = None
if self.style_type in {'paragraph', 'character'}:
if self.style_type == 'paragraph':
for pPr in XPath('./w:pPr')(elem):
ps = ParagraphStyle(pPr)
if self.paragraph_style is None:
self.paragraph_style = ps
else:
self.paragraph_style.update(ps)
for rPr in XPath('./w:rPr')(elem):
rs = RunStyle(rPr)
if self.character_style is None:
self.character_style = rs
else:
self.character_style.update(rs)
if self.style_type == 'numbering':
self.numbering_style_link = None
for x in XPath('./w:pPr/w:numPr/w:numId[@w:val]')(elem):
self.numbering_style_link = get(x, 'w:val')
def resolve_based_on(self, parent):
if parent.paragraph_style is not None:
if self.paragraph_style is None:
self.paragraph_style = ParagraphStyle()
self.paragraph_style.resolve_based_on(parent.paragraph_style)
if parent.character_style is not None:
if self.character_style is None:
self.character_style = RunStyle()
self.character_style.resolve_based_on(parent.character_style)
class Styles(object):
'''
Collection of all styles defined in the document. Used to get the final styles applicable to elements in the document markup.
'''
def __init__(self):
self.id_map = OrderedDict()
self.para_cache = {}
self.para_char_cache = {}
self.run_cache = {}
self.classes = {}
self.counter = Counter()
self.default_styles = {}
self.numbering_style_links = {}
def __iter__(self):
for s in self.id_map.itervalues():
yield s
def __getitem__(self, key):
return self.id_map[key]
def __len__(self):
return len(self.id_map)
def get(self, key, default=None):
return self.id_map.get(key, default)
def __call__(self, root):
for s in XPath('//w:style')(root):
s = Style(s)
if s.style_id:
self.id_map[s.style_id] = s
if s.is_default:
self.default_styles[s.style_type] = s
if s.style_type == 'numbering' and s.numbering_style_link:
self.numbering_style_links[s.style_id] = s.numbering_style_link
self.default_paragraph_style = self.default_character_style = None
for dd in XPath('./w:docDefaults')(root):
for pd in XPath('./w:pPrDefault')(dd):
for pPr in XPath('./w:pPr')(pd):
ps = ParagraphStyle(pPr)
if self.default_paragraph_style is None:
self.default_paragraph_style = ps
else:
self.default_paragraph_style.update(ps)
for pd in XPath('./w:rPrDefault')(dd):
for pPr in XPath('./w:rPr')(pd):
ps = RunStyle(pPr)
if self.default_character_style is None:
self.default_character_style = ps
else:
self.default_character_style.update(ps)
def resolve(s, p):
if p is not None:
if not p.resolved:
resolve(p, self.get(p.based_on))
s.resolve_based_on(p)
s.resolved = True
for s in self:
if not s.resolved:
resolve(s, self.get(s.based_on))
def para_val(self, parent_styles, direct_formatting, attr):
val = getattr(direct_formatting, attr)
if val is inherit:
for ps in reversed(parent_styles):
pval = getattr(ps, attr)
if pval is not inherit:
val = pval
break
return val
def run_val(self, parent_styles, direct_formatting, attr):
val = getattr(direct_formatting, attr)
if val is not inherit:
return val
if attr in direct_formatting.toggle_properties:
val = False
for rs in parent_styles:
pval = getattr(rs, attr)
if pval is True:
val ^= True
return val
for rs in reversed(parent_styles):
rval = getattr(rs, attr)
if rval is not inherit:
return rval
return val
def resolve_paragraph(self, p):
ans = self.para_cache.get(p, None)
if ans is None:
ans = self.para_cache[p] = ParagraphStyle()
ans.style_name = None
direct_formatting = None
for pPr in XPath('./w:pPr')(p):
ps = ParagraphStyle(pPr)
if direct_formatting is None:
direct_formatting = ps
else:
direct_formatting.update(ps)
if direct_formatting is None:
direct_formatting = ParagraphStyle()
parent_styles = []
if self.default_paragraph_style is not None:
parent_styles.append(self.default_paragraph_style)
default_para = self.default_styles.get('paragraph', None)
if direct_formatting.linked_style is not None:
ls = self.get(direct_formatting.linked_style)
if ls is not None:
ans.style_name = ls.name
ps = ls.paragraph_style
if ps is not None:
parent_styles.append(ps)
if ls.character_style is not None:
self.para_char_cache[p] = ls.character_style
elif default_para is not None:
if default_para.paragraph_style is not None:
parent_styles.append(default_para.paragraph_style)
if default_para.character_style is not None:
self.para_char_cache[p] = default_para.character_style
for attr in ans.all_properties:
setattr(ans, attr, self.para_val(parent_styles, direct_formatting, attr))
return ans
def resolve_run(self, r):
ans = self.run_cache.get(r, None)
if ans is None:
p = r.getparent()
ans = self.run_cache[r] = RunStyle()
direct_formatting = None
for rPr in XPath('./w:rPr')(r):
rs = RunStyle(rPr)
if direct_formatting is None:
direct_formatting = rs
else:
direct_formatting.update(rs)
if direct_formatting is None:
direct_formatting = RunStyle()
parent_styles = []
default_char = self.default_styles.get('character', None)
if self.default_character_style is not None:
parent_styles.append(self.default_character_style)
pstyle = self.para_char_cache.get(p, None)
if pstyle is not None:
parent_styles.append(pstyle)
if direct_formatting.linked_style is not None:
ls = self.get(direct_formatting.linked_style).character_style
if ls is not None:
parent_styles.append(ls)
elif default_char is not None and default_char.character_style is not None:
parent_styles.append(default_char.character_style)
for attr in ans.all_properties:
setattr(ans, attr, self.run_val(parent_styles, direct_formatting, attr))
return ans
def resolve(self, obj):
if obj.tag.endswith('}p'):
return self.resolve_paragraph(obj)
if obj.tag.endswith('}r'):
return self.resolve_run(obj)
def resolve_numbering(self, numbering):
pass # TODO: Implement this
def register(self, css, prefix):
h = hash(tuple(css.iteritems()))
ans, _ = self.classes.get(h, (None, None))
if ans is None:
self.counter[prefix] += 1
ans = '%s_%d' % (prefix, self.counter[prefix])
self.classes[h] = (ans, css)
return ans
def generate_classes(self):
for bs in self.para_cache.itervalues():
css = bs.css
if css:
self.register(css, 'block')
for bs in self.run_cache.itervalues():
css = bs.css
if css:
self.register(css, 'text')
def class_name(self, css):
h = hash(tuple(css.iteritems()))
return self.classes.get(h, (None, None))[0]
def generate_css(self):
prefix = textwrap.dedent(
'''\
p { margin: 0; padding: 0; text-indent: 1.5em }
''')
ans = []
for (cls, css) in sorted(self.classes.itervalues(), key=lambda x:x[0]):
b = ('\t%s: %s;' % (k, v) for k, v in css.iteritems())
b = '\n'.join(b)
ans.append('.%s {\n%s\n}\n' % (cls, b.rstrip(';')))
return prefix + '\n' + '\n'.join(ans)

View File

@ -0,0 +1,233 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
import sys, os, re
from lxml import html
from lxml.html.builder import (
HTML, HEAD, TITLE, BODY, LINK, META, P, SPAN, BR)
from calibre.ebooks.docx.container import DOCX, fromstring
from calibre.ebooks.docx.names import XPath, is_tag, barename, XML, STYLES, NUMBERING
from calibre.ebooks.docx.styles import Styles, inherit
from calibre.ebooks.docx.numbering import Numbering
from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1
class Text:
def __init__(self, elem, attr, buf):
self.elem, self.attr, self.buf = elem, attr, buf
def add_elem(self, elem):
setattr(self.elem, self.attr, ''.join(self.buf))
self.elem, self.attr, self.buf = elem, 'tail', []
class Convert(object):
def __init__(self, path_or_stream, dest_dir=None, log=None):
self.docx = DOCX(path_or_stream, log=log)
self.log = self.docx.log
self.dest_dir = dest_dir or os.getcwdu()
self.mi = self.docx.metadata
self.body = BODY()
self.styles = Styles()
self.object_map = {}
self.html = HTML(
HEAD(
META(charset='utf-8'),
TITLE(self.mi.title or _('Unknown')),
LINK(rel='stylesheet', type='text/css', href='docx.css'),
),
self.body
)
self.html.text='\n\t'
self.html[0].text='\n\t\t'
self.html[0].tail='\n'
for child in self.html[0]:
child.tail = '\n\t\t'
self.html[0][-1].tail = '\n\t'
self.html[1].text = self.html[1].tail = '\n'
lang = canonicalize_lang(self.mi.language)
if lang and lang != 'und':
lang = lang_as_iso639_1(lang)
if lang:
self.html.set('lang', lang)
def __call__(self):
doc = self.docx.document
relationships_by_id, relationships_by_type = self.docx.document_relationships
self.read_styles(relationships_by_type)
for top_level in XPath('/w:document/w:body/*')(doc):
if is_tag(top_level, 'w:p'):
p = self.convert_p(top_level)
self.body.append(p)
elif is_tag(top_level, 'w:tbl'):
pass # TODO: tables
elif is_tag(top_level, 'w:sectPr'):
pass # TODO: Last section properties
else:
self.log.debug('Unknown top-level tag: %s, ignoring' % barename(top_level.tag))
if len(self.body) > 0:
self.body.text = '\n\t'
for child in self.body:
child.tail = '\n\t'
self.body[-1].tail = '\n'
self.styles.generate_classes()
for html_obj, obj in self.object_map.iteritems():
style = self.styles.resolve(obj)
if style is not None:
css = style.css
if css:
cls = self.styles.class_name(css)
if cls:
html_obj.set('class', cls)
self.write()
def read_styles(self, relationships_by_type):
def get_name(rtype, defname):
name = relationships_by_type.get(rtype, None)
if name is None:
cname = self.docx.document_name.split('/')
cname[-1] = defname
if self.docx.exists(cname):
name = name
return name
nname = get_name(NUMBERING, 'numbering.xml')
sname = get_name(STYLES, 'styles.xml')
numbering = Numbering()
if sname is not None:
try:
raw = self.docx.read(sname)
except KeyError:
self.log.warn('Styles %s do not exist' % sname)
else:
self.styles(fromstring(raw))
if nname is not None:
try:
raw = self.docx.read(nname)
except KeyError:
self.log.warn('Numbering styles %s do not exist' % nname)
else:
numbering(fromstring(raw), self.styles)
self.styles.resolve_numbering(numbering)
def write(self):
raw = html.tostring(self.html, encoding='utf-8', doctype='<!DOCTYPE html>')
with open(os.path.join(self.dest_dir, 'index.html'), 'wb') as f:
f.write(raw)
css = self.styles.generate_css()
if css:
with open(os.path.join(self.dest_dir, 'docx.css'), 'wb') as f:
f.write(css.encode('utf-8'))
def convert_p(self, p):
dest = P()
style = self.styles.resolve_paragraph(p)
for run in XPath('descendant::w:r')(p):
span = self.convert_run(run)
dest.append(span)
m = re.match(r'heading\s+(\d+)$', style.style_name or '', re.IGNORECASE)
if m is not None:
n = min(1, max(6, int(m.group(1))))
dest.tag = 'h%d' % n
if style.direction == 'rtl':
dest.set('dir', 'rtl')
border_runs = []
common_borders = []
for span in dest:
run = self.object_map[span]
style = self.styles.resolve_run(run)
if not border_runs or border_runs[-1][1].same_border(style):
border_runs.append((span, style))
elif border_runs:
if len(border_runs) > 1:
common_borders.append(border_runs)
border_runs = []
for border_run in common_borders:
spans = []
bs = {}
for span, style in border_run:
c = style.css
spans.append(span)
for x in ('width', 'color', 'style'):
val = c.pop('border-%s' % x, None)
if val is not None:
bs['border-%s' % x] = val
if bs:
cls = self.styles.register(bs, 'text_border')
wrapper = self.wrap_elems(spans, SPAN())
wrapper.set('class', cls)
self.object_map[dest] = p
return dest
def wrap_elems(self, elems, wrapper):
p = elems[0].getparent()
idx = p.index(elems[0])
p.insert(idx, wrapper)
wrapper.tail = elems[-1].tail
elems[-1].tail = None
for elem in elems:
p.remove(elem)
wrapper.append(elem)
def convert_run(self, run):
ans = SPAN()
ans.run = run
text = Text(ans, 'text', [])
for child in run:
if is_tag(child, 'w:t'):
if not child.text:
continue
space = child.get(XML('space'), None)
if space == 'preserve':
text.add_elem(SPAN(child.text, style="whitespace:pre-wrap"))
ans.append(text.elem)
else:
text.buf.append(child.text)
elif is_tag(child, 'w:cr'):
text.add_elem(BR())
ans.append(text.elem)
elif is_tag(child, 'w:br'):
typ = child.get('type', None)
if typ in {'column', 'page'}:
br = BR(style='page-break-after:always')
else:
clear = child.get('clear', None)
if clear in {'all', 'left', 'right'}:
br = BR(style='clear:%s'%('both' if clear == 'all' else clear))
else:
br = BR()
text.add_elem(br)
ans.append(text.elem)
if text.buf:
setattr(text.elem, text.attr, ''.join(text.buf))
style = self.styles.resolve_run(run)
if style.vert_align in {'superscript', 'subscript'}:
ans.tag = 'sub' if style.vert_align == 'subscript' else 'sup'
if style.lang is not inherit:
ans.lang = style.lang
self.object_map[ans] = run
return ans
if __name__ == '__main__':
from calibre.utils.logging import default_log
default_log.filter_level = default_log.DEBUG
Convert(sys.argv[-1], log=default_log)()

View File

@ -136,7 +136,7 @@ class FB2MLizer(object):
metadata['author'] += '<last-name>%s</last-name>' % prepare_string_for_xml(author_last) metadata['author'] += '<last-name>%s</last-name>' % prepare_string_for_xml(author_last)
metadata['author'] += '</author>' metadata['author'] += '</author>'
if not metadata['author']: if not metadata['author']:
metadata['author'] = u'<author><first-name></first-name><last-name><last-name></author>' metadata['author'] = u'<author><first-name></first-name><last-name></last-name></author>'
metadata['keywords'] = u'' metadata['keywords'] = u''
tags = list(map(unicode, self.oeb_book.metadata.subject)) tags = list(map(unicode, self.oeb_book.metadata.subject))

View File

@ -178,6 +178,8 @@ class Metadata(object):
return key in object.__getattribute__(self, '_data') return key in object.__getattribute__(self, '_data')
def deepcopy(self): def deepcopy(self):
''' Do not use this method unless you know what you are doing, if you want to create a simple clone of
this object, use :method:`deepcopy_metadata` instead. '''
m = Metadata(None) m = Metadata(None)
m.__dict__ = copy.deepcopy(self.__dict__) m.__dict__ = copy.deepcopy(self.__dict__)
object.__setattr__(m, '_data', copy.deepcopy(object.__getattribute__(self, '_data'))) object.__setattr__(m, '_data', copy.deepcopy(object.__getattribute__(self, '_data')))

View File

@ -14,16 +14,15 @@ class SafeFormat(TemplateFormatter):
def __init__(self): def __init__(self):
TemplateFormatter.__init__(self) TemplateFormatter.__init__(self)
from calibre.ebooks.metadata.book.base import field_metadata
self.field_metadata = field_metadata
def get_value(self, orig_key, args, kwargs): def get_value(self, orig_key, args, kwargs):
if not orig_key: if not orig_key:
return '' return ''
key = orig_key = orig_key.lower() key = orig_key = orig_key.lower()
if key != 'title_sort' and key not in TOP_LEVEL_IDENTIFIERS and \ if (key != 'title_sort' and key not in TOP_LEVEL_IDENTIFIERS and
key not in ALL_METADATA_FIELDS: key not in ALL_METADATA_FIELDS):
key = self.field_metadata.search_term_to_field_key(key) from calibre.ebooks.metadata.book.base import field_metadata
key = field_metadata.search_term_to_field_key(key)
if key is None or (self.book and if key is None or (self.book and
key not in self.book.all_field_keys()): key not in self.book.all_field_keys()):
if hasattr(self.book, orig_key): if hasattr(self.book, orig_key):

View File

@ -7,70 +7,21 @@ __license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
from lxml import etree from calibre.ebooks.docx.container import DOCX
from calibre.ebooks.metadata.book.base import Metadata
from calibre.utils.zipfile import ZipFile from calibre.utils.zipfile import ZipFile
from calibre.utils.magick.draw import identify_data from calibre.utils.magick.draw import identify_data
from calibre.ebooks.oeb.base import DC11_NS
from calibre.ebooks.oeb.parse_utils import RECOVER_PARSER
NSMAP = {'dc':DC11_NS,
'cp':'http://schemas.openxmlformats.org/package/2006/metadata/core-properties'}
def XPath(expr):
return etree.XPath(expr, namespaces=NSMAP)
def _read_doc_props(raw, mi):
from calibre.ebooks.metadata import string_to_authors
root = etree.fromstring(raw, parser=RECOVER_PARSER)
titles = XPath('//dc:title')(root)
if titles:
title = titles[0].text
if title and title.strip():
mi.title = title.strip()
tags = []
for subject in XPath('//dc:subject')(root):
if subject.text and subject.text.strip():
tags.append(subject.text.strip().replace(',', '_'))
for keywords in XPath('//cp:keywords')(root):
if keywords.text and keywords.text.strip():
for x in keywords.text.split():
tags.extend(y.strip() for y in x.split(','))
if tags:
mi.tags = tags
authors = XPath('//dc:creator')(root)
aut = []
for author in authors:
if author.text and author.text.strip():
aut.extend(string_to_authors(author.text))
if aut:
mi.authors = aut
desc = XPath('//dc:description')(root)
if desc:
raw = etree.tostring(desc[0], method='text', encoding=unicode)
mi.comments = raw
def _read_app_props(raw, mi):
root = etree.fromstring(raw, parser=RECOVER_PARSER)
company = root.xpath('//*[local-name()="Company"]')
if company and company[0].text and company[0].text.strip():
mi.publisher = company[0].text.strip()
def get_metadata(stream): def get_metadata(stream):
c = DOCX(stream, extract=False)
mi = c.metadata
c.close()
stream.seek(0)
cdata = None
with ZipFile(stream, 'r') as zf: with ZipFile(stream, 'r') as zf:
mi = Metadata(_('Unknown'))
cdata = None
for zi in zf.infolist(): for zi in zf.infolist():
ext = zi.filename.rpartition('.')[-1].lower() ext = zi.filename.rpartition('.')[-1].lower()
if zi.filename.lower() == 'docprops/core.xml': if cdata is None and ext in {'jpeg', 'jpg', 'png', 'gif'}:
_read_doc_props(zf.read(zi), mi)
elif zi.filename.lower() == 'docprops/app.xml':
_read_app_props(zf.read(zi), mi)
elif cdata is None and ext in {'jpeg', 'jpg', 'png', 'gif'}:
raw = zf.read(zi) raw = zf.read(zi)
try: try:
width, height, fmt = identify_data(raw) width, height, fmt = identify_data(raw)

View File

@ -13,12 +13,12 @@ from calibre.utils.date import parse_date
from calibre.ebooks.mobi import MobiError from calibre.ebooks.mobi import MobiError
from calibre.ebooks.metadata import MetaInformation, check_isbn from calibre.ebooks.metadata import MetaInformation, check_isbn
from calibre.ebooks.mobi.langcodes import main_language, sub_language, mobi2iana from calibre.ebooks.mobi.langcodes import main_language, sub_language, mobi2iana
from calibre.utils.cleantext import clean_ascii_chars from calibre.utils.cleantext import clean_ascii_chars, clean_xml_chars
from calibre.utils.localization import canonicalize_lang from calibre.utils.localization import canonicalize_lang
NULL_INDEX = 0xffffffff NULL_INDEX = 0xffffffff
class EXTHHeader(object): # {{{ class EXTHHeader(object): # {{{
def __init__(self, raw, codec, title): def __init__(self, raw, codec, title):
self.doctype = raw[:4] self.doctype = raw[:4]
@ -62,7 +62,7 @@ class EXTHHeader(object): # {{{
elif idx == 502: elif idx == 502:
# last update time # last update time
pass pass
elif idx == 503: # Long title elif idx == 503: # Long title
# Amazon seems to regard this as the definitive book title # Amazon seems to regard this as the definitive book title
# rather than the title from the PDB header. In fact when # rather than the title from the PDB header. In fact when
# sending MOBI files through Amazon's email service if the # sending MOBI files through Amazon's email service if the
@ -72,7 +72,7 @@ class EXTHHeader(object): # {{{
title = self.decode(content) title = self.decode(content)
except: except:
pass pass
elif idx == 524: # Lang code elif idx == 524: # Lang code
try: try:
lang = content.decode(codec) lang = content.decode(codec)
lang = canonicalize_lang(lang) lang = canonicalize_lang(lang)
@ -83,22 +83,22 @@ class EXTHHeader(object): # {{{
#else: #else:
# print 'unknown record', idx, repr(content) # print 'unknown record', idx, repr(content)
if title: if title:
self.mi.title = replace_entities(clean_ascii_chars(title)) self.mi.title = replace_entities(clean_xml_chars(clean_ascii_chars(title)))
def process_metadata(self, idx, content, codec): def process_metadata(self, idx, content, codec):
if idx == 100: if idx == 100:
if self.mi.is_null('authors'): if self.mi.is_null('authors'):
self.mi.authors = [] self.mi.authors = []
au = self.decode(content).strip() au = clean_xml_chars(self.decode(content).strip())
self.mi.authors.append(au) self.mi.authors.append(au)
if self.mi.is_null('author_sort') and re.match(r'\S+?\s*,\s+\S+', au.strip()): if self.mi.is_null('author_sort') and re.match(r'\S+?\s*,\s+\S+', au.strip()):
self.mi.author_sort = au.strip() self.mi.author_sort = au.strip()
elif idx == 101: elif idx == 101:
self.mi.publisher = self.decode(content).strip() self.mi.publisher = clean_xml_chars(self.decode(content).strip())
if self.mi.publisher in {'Unknown', _('Unknown')}: if self.mi.publisher in {'Unknown', _('Unknown')}:
self.mi.publisher = None self.mi.publisher = None
elif idx == 103: elif idx == 103:
self.mi.comments = self.decode(content).strip() self.mi.comments = clean_xml_chars(self.decode(content).strip())
elif idx == 104: elif idx == 104:
raw = check_isbn(self.decode(content).strip().replace('-', '')) raw = check_isbn(self.decode(content).strip().replace('-', ''))
if raw: if raw:
@ -106,7 +106,7 @@ class EXTHHeader(object): # {{{
elif idx == 105: elif idx == 105:
if not self.mi.tags: if not self.mi.tags:
self.mi.tags = [] self.mi.tags = []
self.mi.tags.extend([x.strip() for x in self.decode(content).split(';')]) self.mi.tags.extend([x.strip() for x in clean_xml_chars(self.decode(content)).split(';')])
self.mi.tags = list(set(self.mi.tags)) self.mi.tags = list(set(self.mi.tags))
elif idx == 106: elif idx == 106:
try: try:
@ -114,8 +114,8 @@ class EXTHHeader(object): # {{{
except: except:
pass pass
elif idx == 108: elif idx == 108:
self.mi.book_producer = self.decode(content).strip() self.mi.book_producer = clean_xml_chars(self.decode(content).strip())
elif idx == 112: # dc:source set in some EBSP amazon samples elif idx == 112: # dc:source set in some EBSP amazon samples
try: try:
content = content.decode(codec).strip() content = content.decode(codec).strip()
isig = 'urn:isbn:' isig = 'urn:isbn:'
@ -131,7 +131,7 @@ class EXTHHeader(object): # {{{
self.mi.application_id = self.mi.uuid = cid self.mi.application_id = self.mi.uuid = cid
except: except:
pass pass
elif idx == 113: # ASIN or other id elif idx == 113: # ASIN or other id
try: try:
self.uuid = content.decode('ascii') self.uuid = content.decode('ascii')
self.mi.set_identifier('mobi-asin', self.uuid) self.mi.set_identifier('mobi-asin', self.uuid)
@ -242,7 +242,7 @@ class BookHeader(object):
# if cnt is 1 or less, fdst section number can be garbage # if cnt is 1 or less, fdst section number can be garbage
if self.fdstcnt <= 1: if self.fdstcnt <= 1:
self.fdstidx = NULL_INDEX self.fdstidx = NULL_INDEX
else: # Null values else: # Null values
self.skelidx = self.dividx = self.othidx = self.fdstidx = \ self.skelidx = self.dividx = self.othidx = self.fdstidx = \
NULL_INDEX NULL_INDEX

View File

@ -24,6 +24,7 @@ from calibre.ebooks.oeb.base import namespace, barename, XPath, xpath, \
urlnormalize, BINARY_MIME, \ urlnormalize, BINARY_MIME, \
OEBError, OEBBook, DirContainer OEBError, OEBBook, DirContainer
from calibre.ebooks.oeb.writer import OEBWriter from calibre.ebooks.oeb.writer import OEBWriter
from calibre.utils.cleantext import clean_xml_chars
from calibre.utils.localization import get_lang from calibre.utils.localization import get_lang
from calibre.ptempfile import TemporaryDirectory from calibre.ptempfile import TemporaryDirectory
from calibre.constants import __appname__, __version__ from calibre.constants import __appname__, __version__
@ -106,7 +107,7 @@ class OEBReader(object):
try: try:
opf = etree.fromstring(data) opf = etree.fromstring(data)
except etree.XMLSyntaxError: except etree.XMLSyntaxError:
data = xml_replace_entities(data, encoding=None) data = xml_replace_entities(clean_xml_chars(data), encoding=None)
try: try:
opf = etree.fromstring(data) opf = etree.fromstring(data)
self.logger.warn('OPF contains invalid HTML named entities') self.logger.warn('OPF contains invalid HTML named entities')

View File

@ -187,7 +187,7 @@ def _config(): # {{{
c.add_opt('shortcuts_search_history', default=[], c.add_opt('shortcuts_search_history', default=[],
help='Search history for the keyboard preferences') help='Search history for the keyboard preferences')
c.add_opt('jobs_search_history', default=[], c.add_opt('jobs_search_history', default=[],
help='Search history for the keyboard preferences') help='Search history for the tweaks preferences')
c.add_opt('tweaks_search_history', default=[], c.add_opt('tweaks_search_history', default=[],
help='Search history for tweaks') help='Search history for tweaks')
c.add_opt('worker_limit', default=6, c.add_opt('worker_limit', default=6,

View File

@ -116,11 +116,12 @@ class MovedDialog(QDialog): # {{{
self.cd.setIcon(QIcon(I('document_open.png'))) self.cd.setIcon(QIcon(I('document_open.png')))
self.cd.clicked.connect(self.choose_dir) self.cd.clicked.connect(self.choose_dir)
l.addWidget(self.cd, 2, 1, 1, 1) l.addWidget(self.cd, 2, 1, 1, 1)
self.bb = QDialogButtonBox(self) self.bb = QDialogButtonBox(QDialogButtonBox.Abort)
b = self.bb.addButton(_('Library moved'), self.bb.AcceptRole) b = self.bb.addButton(_('Library moved'), self.bb.AcceptRole)
b.setIcon(QIcon(I('ok.png'))) b.setIcon(QIcon(I('ok.png')))
b = self.bb.addButton(_('Forget library'), self.bb.RejectRole) b = self.bb.addButton(_('Forget library'), self.bb.RejectRole)
b.setIcon(QIcon(I('edit-clear.png'))) b.setIcon(QIcon(I('edit-clear.png')))
b.clicked.connect(self.forget_library)
self.bb.accepted.connect(self.accept) self.bb.accepted.connect(self.accept)
self.bb.rejected.connect(self.reject) self.bb.rejected.connect(self.reject)
l.addWidget(self.bb, 3, 0, 1, ncols) l.addWidget(self.bb, 3, 0, 1, ncols)
@ -132,9 +133,8 @@ class MovedDialog(QDialog): # {{{
if d is not None: if d is not None:
self.loc.setText(d) self.loc.setText(d)
def reject(self): def forget_library(self):
self.stats.remove(self.location) self.stats.remove(self.location)
QDialog.reject(self)
def accept(self): def accept(self):
newloc = unicode(self.loc.text()) newloc = unicode(self.loc.text())

View File

@ -240,9 +240,10 @@ class EditMetadataAction(InterfaceAction):
opf, cov = id_map[book_id] opf, cov = id_map[book_id]
cfile = mi.cover cfile = mi.cover
mi.cover, mi.cover_data = None, (None, None) mi.cover, mi.cover_data = None, (None, None)
with open(opf, 'wb') as f: if opf is not None:
f.write(metadata_to_opf(mi)) with open(opf, 'wb') as f:
if cfile: f.write(metadata_to_opf(mi))
if cfile and cov:
shutil.copyfile(cfile, cov) shutil.copyfile(cfile, cov)
os.remove(cfile) os.remove(cfile)
nid_map[book_id] = id_map[book_id] nid_map[book_id] = id_map[book_id]

View File

@ -38,6 +38,13 @@ class ShowQuickviewAction(InterfaceAction):
Quickview(self.gui, self.gui.library_view, index) Quickview(self.gui, self.gui.library_view, index)
self.current_instance.show() self.current_instance.show()
def change_quickview_column(self, idx):
self.show_quickview()
if self.current_instance:
if self.current_instance.is_closed:
return
self.current_instance.change_quickview_column.emit(idx)
def library_changed(self, db): def library_changed(self, db):
if self.current_instance and not self.current_instance.is_closed: if self.current_instance and not self.current_instance.is_closed:
self.current_instance.set_database(db) self.current_instance.set_database(db)

View File

@ -28,7 +28,10 @@ class ConfigWidget(QWidget, Ui_ConfigWidget):
all_formats = set(all_formats) all_formats = set(all_formats)
self.calibre_known_formats = device.FORMATS self.calibre_known_formats = device.FORMATS
self.device_name = device.get_gui_name() try:
self.device_name = device.get_gui_name()
except TypeError:
self.device_name = getattr(device, 'gui_name', None) or _('Device')
if device.USER_CAN_ADD_NEW_FORMATS: if device.USER_CAN_ADD_NEW_FORMATS:
all_formats = set(all_formats) | set(BOOK_EXTENSIONS) all_formats = set(all_formats) | set(BOOK_EXTENSIONS)

View File

@ -6,7 +6,7 @@ __docformat__ = 'restructuredtext en'
from PyQt4.Qt import (Qt, QDialog, QAbstractItemView, QTableWidgetItem, from PyQt4.Qt import (Qt, QDialog, QAbstractItemView, QTableWidgetItem,
QListWidgetItem, QByteArray, QCoreApplication, QListWidgetItem, QByteArray, QCoreApplication,
QApplication) QApplication, pyqtSignal)
from calibre.customize.ui import find_plugin from calibre.customize.ui import find_plugin
from calibre.gui2 import gprefs from calibre.gui2 import gprefs
@ -44,6 +44,8 @@ class TableItem(QTableWidgetItem):
class Quickview(QDialog, Ui_Quickview): class Quickview(QDialog, Ui_Quickview):
change_quickview_column = pyqtSignal(object)
def __init__(self, gui, view, row): def __init__(self, gui, view, row):
QDialog.__init__(self, gui, flags=Qt.Window) QDialog.__init__(self, gui, flags=Qt.Window)
Ui_Quickview.__init__(self) Ui_Quickview.__init__(self)
@ -105,6 +107,7 @@ class Quickview(QDialog, Ui_Quickview):
self.refresh(row) self.refresh(row)
self.view.clicked.connect(self.slave) self.view.clicked.connect(self.slave)
self.change_quickview_column.connect(self.slave)
QCoreApplication.instance().aboutToQuit.connect(self.save_state) QCoreApplication.instance().aboutToQuit.connect(self.save_state)
self.search_button.clicked.connect(self.do_search) self.search_button.clicked.connect(self.do_search)
view.model().new_bookdisplay_data.connect(self.book_was_changed) view.model().new_bookdisplay_data.connect(self.book_was_changed)
@ -164,6 +167,8 @@ class Quickview(QDialog, Ui_Quickview):
if vals: if vals:
self.no_valid_items = False self.no_valid_items = False
if self.db.field_metadata[key]['datatype'] == 'rating':
vals = unicode(vals/2)
if not isinstance(vals, list): if not isinstance(vals, list):
vals = [vals] vals = [vals]
vals.sort(key=sort_key) vals.sort(key=sort_key)

View File

@ -197,6 +197,16 @@ class BooksView(QTableView): # {{{
elif action.startswith('align_'): elif action.startswith('align_'):
alignment = action.partition('_')[-1] alignment = action.partition('_')[-1]
self._model.change_alignment(column, alignment) self._model.change_alignment(column, alignment)
elif action == 'quickview':
from calibre.customize.ui import find_plugin
qv = find_plugin('Show Quickview')
if qv:
rows = self.selectionModel().selectedRows()
if len(rows) > 0:
current_row = rows[0].row()
current_col = self.column_map.index(column)
index = self.model().index(current_row, current_col)
qv.actual_plugin_.change_quickview_column(index)
self.save_state() self.save_state()
@ -240,7 +250,14 @@ class BooksView(QTableView): # {{{
a.setCheckable(True) a.setCheckable(True)
a.setChecked(True) a.setChecked(True)
if self._model.db.field_metadata[col]['is_category']:
act = self.column_header_context_menu.addAction(_('Quickview column %s') %
name,
partial(self.column_header_context_handler, action='quickview',
column=col))
rows = self.selectionModel().selectedRows()
if len(rows) > 1:
act.setEnabled(False)
hidden_cols = [self.column_map[i] for i in hidden_cols = [self.column_map[i] for i in
range(self.column_header.count()) if range(self.column_header.count()) if

View File

@ -172,7 +172,10 @@ class Tweaks(QAbstractListModel, SearchQueryParser): # {{{
doc.append(line[1:].strip()) doc.append(line[1:].strip())
doc = '\n'.join(doc) doc = '\n'.join(doc)
while True: while True:
line = lines[pos] try:
line = lines[pos]
except IndexError:
break
if not line.strip(): if not line.strip():
break break
spidx1 = line.find(' ') spidx1 = line.find(' ')

View File

@ -12,6 +12,7 @@ from PyQt4.Qt import (
from calibre.gui2 import error_dialog, question_dialog from calibre.gui2 import error_dialog, question_dialog
from calibre.gui2.widgets import ComboBoxWithHelp from calibre.gui2.widgets import ComboBoxWithHelp
from calibre.utils.config_base import tweaks
from calibre.utils.icu import sort_key from calibre.utils.icu import sort_key
from calibre.utils.search_query_parser import ParseException from calibre.utils.search_query_parser import ParseException
from calibre.utils.search_query_parser import saved_searches from calibre.utils.search_query_parser import saved_searches
@ -549,6 +550,9 @@ class SearchRestrictionMixin(object):
restriction = '' restriction = ''
self._apply_search_restriction(restriction, r) self._apply_search_restriction(restriction, r)
def clear_additional_restriction(self):
self._apply_search_restriction('', '')
def _apply_search_restriction(self, restriction, name): def _apply_search_restriction(self, restriction, name):
self.saved_search.clear() self.saved_search.clear()
# The order below is important. Set the restriction, force a '' search # The order below is important. Set the restriction, force a '' search
@ -561,6 +565,10 @@ class SearchRestrictionMixin(object):
self.set_number_of_books_shown() self.set_number_of_books_shown()
self.current_view().setFocus(Qt.OtherFocusReason) self.current_view().setFocus(Qt.OtherFocusReason)
self.set_window_title() self.set_window_title()
v = self.current_view()
if not v.currentIndex().isValid():
v.set_current_row()
v.refresh_book_details()
def set_number_of_books_shown(self): def set_number_of_books_shown(self):
db = self.library_view.model().db db = self.library_view.model().db
@ -569,8 +577,9 @@ class SearchRestrictionMixin(object):
rows = self.current_view().row_count() rows = self.current_view().row_count()
rbc = max(rows, db.data.get_search_restriction_book_count()) rbc = max(rows, db.data.get_search_restriction_book_count())
t = _("({0} of {1})").format(rows, rbc) t = _("({0} of {1})").format(rows, rbc)
self.search_count.setStyleSheet( if tweaks['highlight_virtual_library_book_count']:
'QLabel { border-radius: 8px; background-color: yellow; }') self.search_count.setStyleSheet(
'QLabel { border-radius: 8px; background-color: yellow; }')
else: # No restriction or not library view else: # No restriction or not library view
if not self.search.in_a_search(): if not self.search.in_a_search():
t = _("(all books)") t = _("(all books)")

View File

@ -1,14 +1,13 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import (division, absolute_import, print_function) from __future__ import (division, absolute_import, print_function)
store_version = 1 # Needed for dynamic plugin loading store_version = 2 # Needed for dynamic plugin loading
__license__ = 'GPL 3' __license__ = 'GPL 3'
__copyright__ = '2013, Tomasz Długosz <tomek3d@gmail.com>' __copyright__ = '2013, Tomasz Długosz <tomek3d@gmail.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import urllib import urllib
from base64 import b64encode
from contextlib import closing from contextlib import closing
from lxml import html from lxml import html
@ -25,19 +24,21 @@ from calibre.gui2.store.web_store_dialog import WebStoreDialog
class KoobeStore(BasicStoreConfig, StorePlugin): class KoobeStore(BasicStoreConfig, StorePlugin):
def open(self, parent=None, detail_item=None, external=False): def open(self, parent=None, detail_item=None, external=False):
aff_root = 'https://www.a4b-tracking.com/pl/stat-click-text-link/15/58/' #aff_root = 'https://www.a4b-tracking.com/pl/stat-click-text-link/15/58/'
url = 'http://www.koobe.pl/' url = 'http://www.koobe.pl/'
aff_url = aff_root + str(b64encode(url)) #aff_url = aff_root + str(b64encode(url))
detail_url = None detail_url = None
if detail_item: if detail_item:
detail_url = aff_root + str(b64encode(detail_item)) detail_url = detail_item #aff_root + str(b64encode(detail_item))
if external or self.config.get('open_external', False): if external or self.config.get('open_external', False):
open_url(QUrl(url_slash_cleaner(detail_url if detail_url else aff_url))) #open_url(QUrl(url_slash_cleaner(detail_url if detail_url else aff_url)))
open_url(QUrl(url_slash_cleaner(detail_url if detail_url else url)))
else: else:
d = WebStoreDialog(self.gui, url, parent, detail_url if detail_url else aff_url) #d = WebStoreDialog(self.gui, url, parent, detail_url if detail_url else aff_url)
d = WebStoreDialog(self.gui, url, parent, detail_url if detail_url else url)
d.setWindowTitle(self.name) d.setWindowTitle(self.name)
d.set_tags(self.config.get('tags', '')) d.set_tags(self.config.get('tags', ''))
d.exec_() d.exec_()

View File

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function) from __future__ import (unicode_literals, division, absolute_import, print_function)
store_version = 2 # Needed for dynamic plugin loading store_version = 3 # Needed for dynamic plugin loading
__license__ = 'GPL 3' __license__ = 'GPL 3'
__copyright__ = '2011-2013, Tomasz Długosz <tomek3d@gmail.com>' __copyright__ = '2011-2013, Tomasz Długosz <tomek3d@gmail.com>'
@ -9,7 +9,6 @@ __docformat__ = 'restructuredtext en'
import re import re
import urllib import urllib
from base64 import b64encode
from contextlib import closing from contextlib import closing
from lxml import html from lxml import html
@ -26,19 +25,21 @@ from calibre.gui2.store.web_store_dialog import WebStoreDialog
class WoblinkStore(BasicStoreConfig, StorePlugin): class WoblinkStore(BasicStoreConfig, StorePlugin):
def open(self, parent=None, detail_item=None, external=False): def open(self, parent=None, detail_item=None, external=False):
aff_root = 'https://www.a4b-tracking.com/pl/stat-click-text-link/16/58/' #aff_root = 'https://www.a4b-tracking.com/pl/stat-click-text-link/16/58/'
url = 'http://woblink.com/publication' url = 'http://woblink.com/publication'
aff_url = aff_root + str(b64encode(url)) #aff_url = aff_root + str(b64encode(url))
detail_url = None detail_url = None
if detail_item: if detail_item:
detail_url = aff_root + str(b64encode('http://woblink.com' + detail_item)) detail_url = 'http://woblink.com' + detail_item #aff_root + str(b64encode('http://woblink.com' + detail_item))
if external or self.config.get('open_external', False): if external or self.config.get('open_external', False):
open_url(QUrl(url_slash_cleaner(detail_url if detail_url else aff_url))) #open_url(QUrl(url_slash_cleaner(detail_url if detail_url else aff_url)))
open_url(QUrl(url_slash_cleaner(detail_url if detail_url else url)))
else: else:
d = WebStoreDialog(self.gui, url, parent, detail_url if detail_url else aff_url) #d = WebStoreDialog(self.gui, url, parent, detail_url if detail_url else aff_url)
d = WebStoreDialog(self.gui, url, parent, detail_url if detail_url else url)
d.setWindowTitle(self.name) d.setWindowTitle(self.name)
d.set_tags(self.config.get('tags', '')) d.set_tags(self.config.get('tags', ''))
d.exec_() d.exec_()

View File

@ -265,6 +265,27 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{
action=self.esc_action) action=self.esc_action)
self.esc_action.triggered.connect(self.esc) self.esc_action.triggered.connect(self.esc)
self.shift_esc_action = QAction(self)
self.addAction(self.shift_esc_action)
self.keyboard.register_shortcut('focus book list',
_('Focus the book list'), default_keys=('Shift+Esc',),
action=self.shift_esc_action)
self.shift_esc_action.triggered.connect(self.shift_esc)
self.ctrl_esc_action = QAction(self)
self.addAction(self.ctrl_esc_action)
self.keyboard.register_shortcut('clear virtual library',
_('Clear the virtual library'), default_keys=('Ctrl+Esc',),
action=self.ctrl_esc_action)
self.ctrl_esc_action.triggered.connect(self.ctrl_esc)
self.alt_esc_action = QAction(self)
self.addAction(self.alt_esc_action)
self.keyboard.register_shortcut('clear additional restriction',
_('Clear the additional restriction'), default_keys=('Alt+Esc',),
action=self.alt_esc_action)
self.alt_esc_action.triggered.connect(self.clear_additional_restriction)
####################### Start spare job server ######################## ####################### Start spare job server ########################
QTimer.singleShot(1000, self.add_spare_server) QTimer.singleShot(1000, self.add_spare_server)
@ -377,6 +398,13 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{
def esc(self, *args): def esc(self, *args):
self.clear_button.click() self.clear_button.click()
def shift_esc(self):
self.current_view().setFocus(Qt.OtherFocusReason)
def ctrl_esc(self):
self.apply_virtual_library()
self.current_view().setFocus(Qt.OtherFocusReason)
def start_smartdevice(self): def start_smartdevice(self):
message = None message = None
if self.device_manager.get_option('smartdevice', 'autostart'): if self.device_manager.get_option('smartdevice', 'autostart'):

View File

@ -139,12 +139,20 @@ class ConfigDialog(QDialog, Ui_Dialog):
self.load_options(opts) self.load_options(opts)
self.init_load_themes() self.init_load_themes()
self.clear_search_history_button.clicked.connect(self.clear_search_history)
def clear_search_history(self):
from calibre.gui2 import config
config['viewer_search_history'] = []
def save_theme(self): def save_theme(self):
themename, ok = QInputDialog.getText(self, _('Theme name'), themename, ok = QInputDialog.getText(self, _('Theme name'),
_('Choose a name for this theme')) _('Choose a name for this theme'))
if not ok: return if not ok:
return
themename = unicode(themename).strip() themename = unicode(themename).strip()
if not themename: return if not themename:
return
c = config('') c = config('')
c.add_opt('theme_name_xxx', default=themename) c.add_opt('theme_name_xxx', default=themename)
self.save_options(c) self.save_options(c)
@ -247,7 +255,8 @@ class ConfigDialog(QDialog, Ui_Dialog):
def update_sample_colors(self): def update_sample_colors(self):
for x in ('text', 'background'): for x in ('text', 'background'):
val = getattr(self, 'current_%s_color'%x) val = getattr(self, 'current_%s_color'%x)
if not val: val = 'inherit' if x == 'text' else 'transparent' if not val:
val = 'inherit' if x == 'text' else 'transparent'
ss = 'QLabel { %s: %s }'%('background-color' if x == 'background' ss = 'QLabel { %s: %s }'%('background-color' if x == 'background'
else 'color', val) else 'color', val)
getattr(self, '%s_color_sample'%x).setStyleSheet(ss) getattr(self, '%s_color_sample'%x).setStyleSheet(ss)

View File

@ -68,7 +68,7 @@ QToolBox::tab:hover {
<x>0</x> <x>0</x>
<y>0</y> <y>0</y>
<width>811</width> <width>811</width>
<height>352</height> <height>380</height>
</rect> </rect>
</property> </property>
<attribute name="label"> <attribute name="label">
@ -240,8 +240,8 @@ QToolBox::tab:hover {
<rect> <rect>
<x>0</x> <x>0</x>
<y>0</y> <y>0</y>
<width>397</width> <width>811</width>
<height>232</height> <height>380</height>
</rect> </rect>
</property> </property>
<attribute name="label"> <attribute name="label">
@ -370,8 +370,8 @@ QToolBox::tab:hover {
<rect> <rect>
<x>0</x> <x>0</x>
<y>0</y> <y>0</y>
<width>352</width> <width>811</width>
<height>176</height> <height>380</height>
</rect> </rect>
</property> </property>
<attribute name="label"> <attribute name="label">
@ -446,8 +446,8 @@ QToolBox::tab:hover {
<rect> <rect>
<x>0</x> <x>0</x>
<y>0</y> <y>0</y>
<width>351</width> <width>811</width>
<height>76</height> <height>380</height>
</rect> </rect>
</property> </property>
<attribute name="label"> <attribute name="label">
@ -525,8 +525,8 @@ QToolBox::tab:hover {
<rect> <rect>
<x>0</x> <x>0</x>
<y>0</y> <y>0</y>
<width>410</width> <width>811</width>
<height>120</height> <height>380</height>
</rect> </rect>
</property> </property>
<attribute name="label"> <attribute name="label">
@ -596,8 +596,8 @@ QToolBox::tab:hover {
<rect> <rect>
<x>0</x> <x>0</x>
<y>0</y> <y>0</y>
<width>352</width> <width>811</width>
<height>151</height> <height>380</height>
</rect> </rect>
</property> </property>
<attribute name="label"> <attribute name="label">
@ -628,27 +628,34 @@ QToolBox::tab:hover {
</property> </property>
</widget> </widget>
</item> </item>
<item row="3" column="0" colspan="2"> <item row="5" column="0">
<widget class="QPushButton" name="clear_search_history_button">
<property name="text">
<string>Clear search history</string>
</property>
</widget>
</item>
<item row="2" column="0">
<widget class="QCheckBox" name="opt_show_controls">
<property name="text">
<string>Show &amp;controls in the viewer window</string>
</property>
</widget>
</item>
<item row="3" column="0">
<widget class="QCheckBox" name="opt_remember_window_size"> <widget class="QCheckBox" name="opt_remember_window_size">
<property name="text"> <property name="text">
<string>Remember last used &amp;window size and layout</string> <string>Remember last used &amp;window size and layout</string>
</property> </property>
</widget> </widget>
</item> </item>
<item row="4" column="0" colspan="2"> <item row="4" column="0">
<widget class="QCheckBox" name="opt_remember_current_page"> <widget class="QCheckBox" name="opt_remember_current_page">
<property name="text"> <property name="text">
<string>Remember the &amp;current page when quitting</string> <string>Remember the &amp;current page when quitting</string>
</property> </property>
</widget> </widget>
</item> </item>
<item row="2" column="0" colspan="2">
<widget class="QCheckBox" name="opt_show_controls">
<property name="text">
<string>Show &amp;controls in the viewer window</string>
</property>
</widget>
</item>
</layout> </layout>
</widget> </widget>
</widget> </widget>

View File

@ -940,6 +940,9 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
def do_config(self): def do_config(self):
self.view.config(self) self.view.config(self)
self.load_theme_menu() self.load_theme_menu()
from calibre.gui2 import config
if not config['viewer_search_history']:
self.search.clear_history()
def bookmark(self, *args): def bookmark(self, *args):
num = 1 num = 1

View File

@ -113,7 +113,7 @@ class KindleDX(Kindle):
id = 'kindledx' id = 'kindledx'
class KindleFire(KindleDX): class KindleFire(KindleDX):
name = 'Kindle Fire' name = 'Kindle Fire and Fire HD'
id = 'kindle_fire' id = 'kindle_fire'
output_profile = 'kindle_fire' output_profile = 'kindle_fire'
supports_color = True supports_color = True
@ -431,7 +431,8 @@ class KindlePage(QWizardPage, KindleUI):
default = ac[2] default = ac[2]
if x.strip().endswith('@kindle.com'): if x.strip().endswith('@kindle.com'):
accs.append((x, default)) accs.append((x, default))
if default: has_default = True if default:
has_default = True
if has_default: if has_default:
accs = [x for x in accs if x[1]] accs = [x for x in accs if x[1]]
if accs: if accs:
@ -450,7 +451,8 @@ class KindlePage(QWizardPage, KindleUI):
if (len(parts) >= 2 and parts[0] and self.send_email_widget.set_email_settings(True)): if (len(parts) >= 2 and parts[0] and self.send_email_widget.set_email_settings(True)):
conf = smtp_prefs() conf = smtp_prefs()
accounts = conf.parse().accounts accounts = conf.parse().accounts
if not accounts: accounts = {} if not accounts:
accounts = {}
for y in accounts.values(): for y in accounts.values():
y[2] = False y[2] = False
accounts[x] = ['AZW, MOBI, TPZ, PRC, AZW1', True, True] accounts[x] = ['AZW, MOBI, TPZ, PRC, AZW1', True, True]
@ -484,9 +486,9 @@ class StanzaPage(QWizardPage, StanzaUI):
c = server_config() c = server_config()
c.set('port', p) c.set('port', p)
def set_port(self, *args): def set_port(self, *args):
if not self.content_server.isChecked(): return if not self.content_server.isChecked():
return
import socket import socket
s = socket.socket() s = socket.socket()
with closing(s): with closing(s):
@ -518,8 +520,7 @@ class DevicePage(QWizardPage, DeviceUI):
self.manufacturer_view.setModel(self.man_model) self.manufacturer_view.setModel(self.man_model)
previous = dynamic.get('welcome_wizard_device', False) previous = dynamic.get('welcome_wizard_device', False)
if previous: if previous:
previous = [x for x in get_devices() if \ previous = [x for x in get_devices() if x.id == previous]
x.id == previous]
if not previous: if not previous:
previous = [Device] previous = [Device]
previous = previous[0] previous = previous[0]
@ -841,7 +842,6 @@ class FinishPage(QWizardPage, FinishUI):
pass pass
class Wizard(QWizard): class Wizard(QWizard):
BUTTON_TEXTS = { BUTTON_TEXTS = {
@ -859,7 +859,6 @@ class Wizard(QWizard):
_('&Finish') _('&Finish')
_('Commit') _('Commit')
def __init__(self, parent): def __init__(self, parent):
QWizard.__init__(self, parent) QWizard.__init__(self, parent)
self.setWindowTitle(__appname__+' '+_('welcome wizard')) self.setWindowTitle(__appname__+' '+_('welcome wizard'))

View File

@ -61,7 +61,6 @@ class ContentServer(object):
8:'Aug', 9:'Sep', 10:'Oct', 11:'Nov', 12:'Dec'} 8:'Aug', 9:'Sep', 10:'Oct', 11:'Nov', 12:'Dec'}
return lm.replace('month', month[updated.month]) return lm.replace('month', month[updated.month])
def sort(self, items, field, order): def sort(self, items, field, order):
field = self.db.data.sanitize_sort_field_name(field) field = self.db.data.sanitize_sort_field_name(field)
if field not in self.db.field_metadata.sortable_field_keys(): if field not in self.db.field_metadata.sortable_field_keys():
@ -77,7 +76,7 @@ class ContentServer(object):
try: try:
id = int(id) id = int(id)
except ValueError: except ValueError:
id = id.rpartition('_')[-1].partition('.')[0] id = id.rpartition('.')[0].rpartition('_')[-1]
match = re.search(r'\d+', id) match = re.search(r'\d+', id)
if not match: if not match:
raise cherrypy.HTTPError(404, 'id:%s not an integer'%id) raise cherrypy.HTTPError(404, 'id:%s not an integer'%id)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More