mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Read character styles
This commit is contained in:
parent
3e40c288ff
commit
a5549a0fc4
233
src/calibre/ebooks/docx/lcid.py
Normal file
233
src/calibre/ebooks/docx/lcid.py
Normal file
@ -0,0 +1,233 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=utf-8
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
|
print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
|
lcid = {
|
||||||
|
1078: 'af', # Afrikaans - South Africa
|
||||||
|
1052: 'sq', # Albanian - Albania
|
||||||
|
1118: 'am', # Amharic - Ethiopia
|
||||||
|
1025: 'ar', # Arabic - Saudi Arabia
|
||||||
|
5121: 'ar', # Arabic - Algeria
|
||||||
|
15361: 'ar', # Arabic - Bahrain
|
||||||
|
3073: 'ar', # Arabic - Egypt
|
||||||
|
2049: 'ar', # Arabic - Iraq
|
||||||
|
11265: 'ar', # Arabic - Jordan
|
||||||
|
13313: 'ar', # Arabic - Kuwait
|
||||||
|
12289: 'ar', # Arabic - Lebanon
|
||||||
|
4097: 'ar', # Arabic - Libya
|
||||||
|
6145: 'ar', # Arabic - Morocco
|
||||||
|
8193: 'ar', # Arabic - Oman
|
||||||
|
16385: 'ar', # Arabic - Qatar
|
||||||
|
10241: 'ar', # Arabic - Syria
|
||||||
|
7169: 'ar', # Arabic - Tunisia
|
||||||
|
14337: 'ar', # Arabic - U.A.E.
|
||||||
|
9217: 'ar', # Arabic - Yemen
|
||||||
|
1067: 'hy', # Armenian - Armenia
|
||||||
|
1101: 'as', # Assamese
|
||||||
|
2092: 'az', # Azeri (Cyrillic)
|
||||||
|
1068: 'az', # Azeri (Latin)
|
||||||
|
1069: 'eu', # Basque
|
||||||
|
1059: 'be', # Belarusian
|
||||||
|
1093: 'bn', # Bengali (India)
|
||||||
|
2117: 'bn', # Bengali (Bangladesh)
|
||||||
|
5146: 'bs', # Bosnian (Bosnia/Herzegovina)
|
||||||
|
1026: 'bg', # Bulgarian
|
||||||
|
1109: 'my', # Burmese
|
||||||
|
1027: 'ca', # Catalan
|
||||||
|
1116: 'chr', # Cherokee - United States
|
||||||
|
2052: 'zh', # Chinese - People's Republic of China
|
||||||
|
4100: 'zh', # Chinese - Singapore
|
||||||
|
1028: 'zh', # Chinese - Taiwan
|
||||||
|
3076: 'zh', # Chinese - Hong Kong SAR
|
||||||
|
5124: 'zh', # Chinese - Macao SAR
|
||||||
|
1050: 'hr', # Croatian
|
||||||
|
4122: 'hr', # Croatian (Bosnia/Herzegovina)
|
||||||
|
1029: 'cs', # Czech
|
||||||
|
1030: 'da', # Danish
|
||||||
|
1125: 'dv', # Divehi
|
||||||
|
1043: 'nl', # Dutch - Netherlands
|
||||||
|
2067: 'nl', # Dutch - Belgium
|
||||||
|
1126: 'bin', # Edo
|
||||||
|
1033: 'en', # English - United States
|
||||||
|
2057: 'en', # English - United Kingdom
|
||||||
|
3081: 'en', # English - Australia
|
||||||
|
10249: 'en', # English - Belize
|
||||||
|
4105: 'en', # English - Canada
|
||||||
|
9225: 'en', # English - Caribbean
|
||||||
|
15369: 'en', # English - Hong Kong SAR
|
||||||
|
16393: 'en', # English - India
|
||||||
|
14345: 'en', # English - Indonesia
|
||||||
|
6153: 'en', # English - Ireland
|
||||||
|
8201: 'en', # English - Jamaica
|
||||||
|
17417: 'en', # English - Malaysia
|
||||||
|
5129: 'en', # English - New Zealand
|
||||||
|
13321: 'en', # English - Philippines
|
||||||
|
18441: 'en', # English - Singapore
|
||||||
|
7177: 'en', # English - South Africa
|
||||||
|
11273: 'en', # English - Trinidad
|
||||||
|
12297: 'en', # English - Zimbabwe
|
||||||
|
1061: 'et', # Estonian
|
||||||
|
1080: 'fo', # Faroese
|
||||||
|
1065: None, # TODO: Farsi
|
||||||
|
1124: 'fil', # Filipino
|
||||||
|
1035: 'fi', # Finnish
|
||||||
|
1036: 'fr', # French - France
|
||||||
|
2060: 'fr', # French - Belgium
|
||||||
|
11276: 'fr', # French - Cameroon
|
||||||
|
3084: 'fr', # French - Canada
|
||||||
|
9228: 'fr', # French - Democratic Rep. of Congo
|
||||||
|
12300: 'fr', # French - Cote d'Ivoire
|
||||||
|
15372: 'fr', # French - Haiti
|
||||||
|
5132: 'fr', # French - Luxembourg
|
||||||
|
13324: 'fr', # French - Mali
|
||||||
|
6156: 'fr', # French - Monaco
|
||||||
|
14348: 'fr', # French - Morocco
|
||||||
|
58380: 'fr', # French - North Africa
|
||||||
|
8204: 'fr', # French - Reunion
|
||||||
|
10252: 'fr', # French - Senegal
|
||||||
|
4108: 'fr', # French - Switzerland
|
||||||
|
7180: 'fr', # French - West Indies
|
||||||
|
1122: 'fy', # Frisian - Netherlands
|
||||||
|
1127: None, # TODO: Fulfulde - Nigeria
|
||||||
|
1071: 'mk', # FYRO Macedonian
|
||||||
|
2108: 'ga', # Gaelic (Ireland)
|
||||||
|
1084: 'gd', # Gaelic (Scotland)
|
||||||
|
1110: 'gl', # Galician
|
||||||
|
1079: 'ka', # Georgian
|
||||||
|
1031: 'de', # German - Germany
|
||||||
|
3079: 'de', # German - Austria
|
||||||
|
5127: 'de', # German - Liechtenstein
|
||||||
|
4103: 'de', # German - Luxembourg
|
||||||
|
2055: 'de', # German - Switzerland
|
||||||
|
1032: 'el', # Greek
|
||||||
|
1140: 'gn', # Guarani - Paraguay
|
||||||
|
1095: 'gu', # Gujarati
|
||||||
|
1128: 'ha', # Hausa - Nigeria
|
||||||
|
1141: 'haw', # Hawaiian - United States
|
||||||
|
1037: 'he', # Hebrew
|
||||||
|
1081: 'hi', # Hindi
|
||||||
|
1038: 'hu', # Hungarian
|
||||||
|
1129: None, # TODO: Ibibio - Nigeria
|
||||||
|
1039: 'is', # Icelandic
|
||||||
|
1136: 'ig', # Igbo - Nigeria
|
||||||
|
1057: 'id', # Indonesian
|
||||||
|
1117: 'iu', # Inuktitut
|
||||||
|
1040: 'it', # Italian - Italy
|
||||||
|
2064: 'it', # Italian - Switzerland
|
||||||
|
1041: 'ja', # Japanese
|
||||||
|
1099: 'kn', # Kannada
|
||||||
|
1137: 'kr', # Kanuri - Nigeria
|
||||||
|
2144: 'ks', # Kashmiri
|
||||||
|
1120: 'ks', # Kashmiri (Arabic)
|
||||||
|
1087: 'kk', # Kazakh
|
||||||
|
1107: 'km', # Khmer
|
||||||
|
1111: 'kok', # Konkani
|
||||||
|
1042: 'ko', # Korean
|
||||||
|
1088: 'ky', # Kyrgyz (Cyrillic)
|
||||||
|
1108: 'lo', # Lao
|
||||||
|
1142: 'la', # Latin
|
||||||
|
1062: 'lv', # Latvian
|
||||||
|
1063: 'lt', # Lithuanian
|
||||||
|
1086: 'ms', # Malay - Malaysia
|
||||||
|
2110: 'ms', # Malay - Brunei Darussalam
|
||||||
|
1100: 'ml', # Malayalam
|
||||||
|
1082: 'mt', # Maltese
|
||||||
|
1112: 'mni', # Manipuri
|
||||||
|
1153: 'mi', # Maori - New Zealand
|
||||||
|
1102: 'mr', # Marathi
|
||||||
|
1104: 'mn', # Mongolian (Cyrillic)
|
||||||
|
2128: 'mn', # Mongolian (Mongolian)
|
||||||
|
1121: 'ne', # Nepali
|
||||||
|
2145: 'ne', # Nepali - India
|
||||||
|
1044: 'no', # Norwegian (Bokmᅢᆬl)
|
||||||
|
2068: 'no', # Norwegian (Nynorsk)
|
||||||
|
1096: 'or', # Oriya
|
||||||
|
1138: 'om', # Oromo
|
||||||
|
1145: 'pap', # Papiamentu
|
||||||
|
1123: 'ps', # Pashto
|
||||||
|
1045: 'pl', # Polish
|
||||||
|
1046: 'pt', # Portuguese - Brazil
|
||||||
|
2070: 'pt', # Portuguese - Portugal
|
||||||
|
1094: 'pa', # Punjabi
|
||||||
|
2118: 'pa', # Punjabi (Pakistan)
|
||||||
|
1131: 'qu', # Quecha - Bolivia
|
||||||
|
2155: 'qu', # Quecha - Ecuador
|
||||||
|
3179: 'qu', # Quecha - Peru
|
||||||
|
1047: 'rm', # Rhaeto-Romanic
|
||||||
|
1048: 'ro', # Romanian
|
||||||
|
2072: 'ro', # Romanian - Moldava
|
||||||
|
1049: 'ru', # Russian
|
||||||
|
2073: 'ru', # Russian - Moldava
|
||||||
|
1083: 'se', # Sami (Lappish)
|
||||||
|
1103: 'sa', # Sanskrit
|
||||||
|
1132: 'nso', # Sepedi
|
||||||
|
3098: 'sr', # Serbian (Cyrillic)
|
||||||
|
2074: 'sr', # Serbian (Latin)
|
||||||
|
1113: 'sd', # Sindhi - India
|
||||||
|
2137: 'sd', # Sindhi - Pakistan
|
||||||
|
1115: 'si', # Sinhalese - Sri Lanka
|
||||||
|
1051: 'sk', # Slovak
|
||||||
|
1060: 'sl', # Slovenian
|
||||||
|
1143: 'so', # Somali
|
||||||
|
1070: 'wen', # Sorbian
|
||||||
|
3082: 'es', # Spanish - Spain (Modern Sort)
|
||||||
|
1034: 'es', # Spanish - Spain (Traditional Sort)
|
||||||
|
11274: 'es', # Spanish - Argentina
|
||||||
|
16394: 'es', # Spanish - Bolivia
|
||||||
|
13322: 'es', # Spanish - Chile
|
||||||
|
9226: 'es', # Spanish - Colombia
|
||||||
|
5130: 'es', # Spanish - Costa Rica
|
||||||
|
7178: 'es', # Spanish - Dominican Republic
|
||||||
|
12298: 'es', # Spanish - Ecuador
|
||||||
|
17418: 'es', # Spanish - El Salvador
|
||||||
|
4106: 'es', # Spanish - Guatemala
|
||||||
|
18442: 'es', # Spanish - Honduras
|
||||||
|
58378: 'es', # Spanish - Latin America
|
||||||
|
2058: 'es', # Spanish - Mexico
|
||||||
|
19466: 'es', # Spanish - Nicaragua
|
||||||
|
6154: 'es', # Spanish - Panama
|
||||||
|
15370: 'es', # Spanish - Paraguay
|
||||||
|
10250: 'es', # Spanish - Peru
|
||||||
|
20490: 'es', # Spanish - Puerto Rico
|
||||||
|
21514: 'es', # Spanish - United States
|
||||||
|
14346: 'es', # Spanish - Uruguay
|
||||||
|
8202: 'es', # Spanish - Venezuela
|
||||||
|
1072: None, # TODO: Sutu
|
||||||
|
1089: 'sw', # Swahili
|
||||||
|
1053: 'sv', # Swedish
|
||||||
|
2077: 'sv', # Swedish - Finland
|
||||||
|
1114: 'syr', # Syriac
|
||||||
|
1064: 'tg', # Tajik
|
||||||
|
1119: None, # TODO: Tamazight (Arabic)
|
||||||
|
2143: None, # TODO: Tamazight (Latin)
|
||||||
|
1097: 'ta', # Tamil
|
||||||
|
1092: 'tt', # Tatar
|
||||||
|
1098: 'te', # Telugu
|
||||||
|
1054: 'th', # Thai
|
||||||
|
2129: 'bo', # Tibetan - Bhutan
|
||||||
|
1105: 'bo', # Tibetan - People's Republic of China
|
||||||
|
2163: 'ti', # Tigrigna - Eritrea
|
||||||
|
1139: 'ti', # Tigrigna - Ethiopia
|
||||||
|
1073: 'ts', # Tsonga
|
||||||
|
1074: 'tn', # Tswana
|
||||||
|
1055: 'tr', # Turkish
|
||||||
|
1090: 'tk', # Turkmen
|
||||||
|
1152: 'ug', # Uighur - China
|
||||||
|
1058: 'uk', # Ukrainian
|
||||||
|
1056: 'ur', # Urdu
|
||||||
|
2080: 'ur', # Urdu - India
|
||||||
|
2115: 'uz', # Uzbek (Cyrillic)
|
||||||
|
1091: 'uz', # Uzbek (Latin)
|
||||||
|
1075: 've', # Venda
|
||||||
|
1066: 'vi', # Vietnamese
|
||||||
|
1106: 'cy', # Welsh
|
||||||
|
1076: 'xh', # Xhosa
|
||||||
|
1144: 'ii', # Yi
|
||||||
|
1085: 'yi', # Yiddish
|
||||||
|
1130: 'yo', # Yoruba
|
||||||
|
1077: 'zu' # Zulu
|
||||||
|
}
|
@ -15,15 +15,15 @@ class Inherit:
|
|||||||
inherit = Inherit()
|
inherit = Inherit()
|
||||||
|
|
||||||
def binary_property(parent, name):
|
def binary_property(parent, name):
|
||||||
vals = XPath('./w:%s')
|
vals = XPath('./w:%s' % name)(parent)
|
||||||
if not vals:
|
if not vals:
|
||||||
return inherit
|
return inherit
|
||||||
val = get(vals[0], 'w:val', 'on')
|
val = get(vals[0], 'w:val', 'on')
|
||||||
return True if val in {'on', '1', 'true'} else False
|
return True if val in {'on', '1', 'true'} else False
|
||||||
|
|
||||||
def simple_color(col):
|
def simple_color(col, auto='black'):
|
||||||
if not col or col == 'auto' or len(col) != 6:
|
if not col or col == 'auto' or len(col) != 6:
|
||||||
return 'black'
|
return auto
|
||||||
return '#'+col
|
return '#'+col
|
||||||
|
|
||||||
def simple_float(val, mult=1.0):
|
def simple_float(val, mult=1.0):
|
||||||
@ -66,37 +66,38 @@ LINE_STYLES = { # {{{
|
|||||||
'triple': 'double',
|
'triple': 'double',
|
||||||
} # }}}
|
} # }}}
|
||||||
|
|
||||||
def read_border(border, dest):
|
def read_border(parent, dest):
|
||||||
all_attrs = set()
|
tvals = {'padding_%s':inherit, 'border_%s_width':inherit,
|
||||||
|
'border_%s_style':inherit, 'border_%s_color':inherit}
|
||||||
|
vals = {}
|
||||||
for edge in ('left', 'top', 'right', 'bottom'):
|
for edge in ('left', 'top', 'right', 'bottom'):
|
||||||
vals = {'padding_%s':inherit, 'border_%s_width':inherit,
|
vals.update({k % edge:v for k, v in tvals.iteritems()})
|
||||||
'border_%s_style':inherit, 'border_%s_color':inherit}
|
|
||||||
all_attrs |= {key % edge for key in vals}
|
|
||||||
for elem in XPath('./w:%s' % edge):
|
|
||||||
color = get(elem, 'w:color')
|
|
||||||
if color is not None:
|
|
||||||
vals['border_%s_color'] = simple_color(color)
|
|
||||||
style = get(elem, 'w:val')
|
|
||||||
if style is not None:
|
|
||||||
vals['border_%s_style'] = LINE_STYLES.get(style, 'solid')
|
|
||||||
space = get(elem, 'w:space')
|
|
||||||
if space is not None:
|
|
||||||
try:
|
|
||||||
vals['padding_%s'] = float(space)
|
|
||||||
except (ValueError, TypeError):
|
|
||||||
pass
|
|
||||||
sz = get(elem, 'w:space')
|
|
||||||
if sz is not None:
|
|
||||||
# we dont care about art borders (they are only used for page borders)
|
|
||||||
try:
|
|
||||||
vals['border_%s_width'] = min(96, max(2, float(sz))) * 8
|
|
||||||
except (ValueError, TypeError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
for key, val in vals.iteritems():
|
for border in XPath('./w:pBdr')(parent):
|
||||||
setattr(dest, key % edge, val)
|
for edge in ('left', 'top', 'right', 'bottom'):
|
||||||
|
for elem in XPath('./w:%s' % edge):
|
||||||
|
color = get(elem, 'w:color')
|
||||||
|
if color is not None:
|
||||||
|
vals['border_%s_color' % edge] = simple_color(color)
|
||||||
|
style = get(elem, 'w:val')
|
||||||
|
if style is not None:
|
||||||
|
vals['border_%s_style' % edge] = LINE_STYLES.get(style, 'solid')
|
||||||
|
space = get(elem, 'w:space')
|
||||||
|
if space is not None:
|
||||||
|
try:
|
||||||
|
vals['padding_%s' % edge] = float(space)
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
pass
|
||||||
|
sz = get(elem, 'w:sz')
|
||||||
|
if sz is not None:
|
||||||
|
# we dont care about art borders (they are only used for page borders)
|
||||||
|
try:
|
||||||
|
vals['border_%s_width' % edge] = min(96, max(2, float(sz))) / 8
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
pass
|
||||||
|
|
||||||
return all_attrs
|
for key, val in vals.iteritems():
|
||||||
|
setattr(dest, key, val)
|
||||||
|
|
||||||
def read_indent(parent, dest):
|
def read_indent(parent, dest):
|
||||||
padding_left = padding_right = text_indent = inherit
|
padding_left = padding_right = text_indent = inherit
|
||||||
@ -116,12 +117,11 @@ def read_indent(parent, dest):
|
|||||||
ti = (simple_float(hc, 0.01) if hc is not None else simple_float(h, 0.05) if h is not None else
|
ti = (simple_float(hc, 0.01) if hc is not None else simple_float(h, 0.05) if h is not None else
|
||||||
simple_float(flc, 0.01) if flc is not None else simple_float(fl, 0.05) if fl is not None else None)
|
simple_float(flc, 0.01) if flc is not None else simple_float(fl, 0.05) if fl is not None else None)
|
||||||
if ti is not None:
|
if ti is not None:
|
||||||
text_indent = '%.3f' % (ti, 'em' if hc is not None or (h is None and flc is not None) else 'pt')
|
text_indent = '%.3f%s' % (ti, 'em' if hc is not None or (h is None and flc is not None) else 'pt')
|
||||||
|
|
||||||
setattr(dest, 'padding_left', padding_left)
|
setattr(dest, 'margin_left', padding_left)
|
||||||
setattr(dest, 'padding_right', padding_right)
|
setattr(dest, 'margin_right', padding_right)
|
||||||
setattr(dest, 'text_indent', text_indent)
|
setattr(dest, 'text_indent', text_indent)
|
||||||
return {'padding_left', 'padding_right', 'text_indent'}
|
|
||||||
|
|
||||||
def read_justification(parent, dest):
|
def read_justification(parent, dest):
|
||||||
ans = inherit
|
ans = inherit
|
||||||
@ -134,7 +134,6 @@ def read_justification(parent, dest):
|
|||||||
if val in {'left', 'center', 'right',}:
|
if val in {'left', 'center', 'right',}:
|
||||||
ans = val
|
ans = val
|
||||||
setattr(dest, 'text_align', ans)
|
setattr(dest, 'text_align', ans)
|
||||||
return {'text_align'}
|
|
||||||
|
|
||||||
def read_spacing(parent, dest):
|
def read_spacing(parent, dest):
|
||||||
padding_top = padding_bottom = line_height = inherit
|
padding_top = padding_bottom = line_height = inherit
|
||||||
@ -154,10 +153,9 @@ def read_spacing(parent, dest):
|
|||||||
lh = simple_float(l, 0.05) if lr in {'exactly', 'atLeast'} else simple_float(l, 1/240.0)
|
lh = simple_float(l, 0.05) if lr in {'exactly', 'atLeast'} else simple_float(l, 1/240.0)
|
||||||
line_height = '%.3f%s' % (lh, 'pt' if lr in {'exactly', 'atLeast'} else '')
|
line_height = '%.3f%s' % (lh, 'pt' if lr in {'exactly', 'atLeast'} else '')
|
||||||
|
|
||||||
setattr(dest, 'padding_top', padding_top)
|
setattr(dest, 'margin_top', padding_top)
|
||||||
setattr(dest, 'padding_bottom', padding_bottom)
|
setattr(dest, 'margin_bottom', padding_bottom)
|
||||||
setattr(dest, 'line_height', line_height)
|
setattr(dest, 'line_height', line_height)
|
||||||
return {'padding_top', 'padding_bottom', 'line_height'}
|
|
||||||
|
|
||||||
def read_direction(parent, dest):
|
def read_direction(parent, dest):
|
||||||
ans = inherit
|
ans = inherit
|
||||||
@ -168,34 +166,187 @@ def read_direction(parent, dest):
|
|||||||
if 'rl' in val.lower():
|
if 'rl' in val.lower():
|
||||||
ans = 'rtl'
|
ans = 'rtl'
|
||||||
setattr(dest, 'direction', ans)
|
setattr(dest, 'direction', ans)
|
||||||
return {'direction'}
|
|
||||||
|
|
||||||
|
def read_shd(parent, dest):
|
||||||
|
ans = inherit
|
||||||
|
for shd in XPath('./w:shd[@w:fill]')(parent):
|
||||||
|
val = get(shd, 'w:fill')
|
||||||
|
if val:
|
||||||
|
ans = simple_color(val, auto='transparent')
|
||||||
|
setattr(dest, 'background_color', ans)
|
||||||
|
|
||||||
class ParagraphStyle(object):
|
class ParagraphStyle(object):
|
||||||
|
|
||||||
border_path = XPath('./w:pBdr')
|
all_properties = (
|
||||||
|
'adjustRightInd', 'autoSpaceDE', 'autoSpaceDN', 'bidi',
|
||||||
|
'contextualSpacing', 'keepLines', 'keepNext', 'mirrorIndents',
|
||||||
|
'pageBreakBefore', 'snapToGrid', 'suppressLineNumbers',
|
||||||
|
'suppressOverlap', 'topLinePunct', 'widowControl', 'wordWrap',
|
||||||
|
|
||||||
|
# Border margins padding
|
||||||
|
'border_left_width', 'border_left_style', 'border_left_color', 'padding_left',
|
||||||
|
'border_top_width', 'border_top_style', 'border_top_color', 'padding_top',
|
||||||
|
'border_right_width', 'border_right_style', 'border_right_color', 'padding_right',
|
||||||
|
'border_bottom_width', 'border_bottom_style', 'border_bottom_color', 'padding_bottom',
|
||||||
|
'margin_left', 'margin_top', 'margin_right', 'margin_bottom',
|
||||||
|
|
||||||
|
# Misc.
|
||||||
|
'text_indent', 'text_align', 'line_height', 'direction', 'background_color',
|
||||||
|
)
|
||||||
|
|
||||||
def __init__(self, pPr):
|
def __init__(self, pPr):
|
||||||
self.all_properties = set()
|
|
||||||
for p in (
|
for p in (
|
||||||
'adjustRightInd', 'autoSpaceDE', 'autoSpaceDN',
|
'adjustRightInd', 'autoSpaceDE', 'autoSpaceDN', 'bidi',
|
||||||
'bidi', 'contextualSpacing', 'keepLines', 'keepNext',
|
'contextualSpacing', 'keepLines', 'keepNext', 'mirrorIndents',
|
||||||
'mirrorIndents', 'pageBreakBefore', 'snapToGrid',
|
'pageBreakBefore', 'snapToGrid', 'suppressLineNumbers',
|
||||||
'suppressLineNumbers', 'suppressOverlap', 'topLinePunct',
|
'suppressOverlap', 'topLinePunct', 'widowControl', 'wordWrap',
|
||||||
'widowControl', 'wordWrap',
|
|
||||||
):
|
):
|
||||||
self.all_properties.add(p)
|
setattr(self, p, binary_property(pPr, p))
|
||||||
setattr(p, binary_property(pPr, p))
|
|
||||||
|
|
||||||
for border in self.border_path(pPr):
|
for x in ('border', 'indent', 'justification', 'spacing', 'direction', 'shd'):
|
||||||
self.all_properties |= read_border(border, self)
|
f = globals()['read_%s' % x]
|
||||||
|
f(pPr, self)
|
||||||
self.all_properties |= read_indent(pPr, self)
|
|
||||||
self.all_properties |= read_justification(pPr, self)
|
|
||||||
self.all_properties |= read_spacing(pPr, self)
|
|
||||||
self.all_properties |= read_direction(pPr, self)
|
|
||||||
|
|
||||||
# TODO: numPr and outlineLvl
|
# TODO: numPr and outlineLvl
|
||||||
|
|
||||||
|
def update(self, other):
|
||||||
|
for prop in self.all_properties:
|
||||||
|
nval = getattr(other, prop)
|
||||||
|
if nval is not inherit:
|
||||||
|
setattr(self, prop, nval)
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
# Character styles {{{
|
||||||
|
def read_text_border(parent, dest):
|
||||||
|
border_color = border_style = border_width = padding = inherit
|
||||||
|
elems = XPath('./w:bdr')(parent)
|
||||||
|
if elems:
|
||||||
|
border_color = simple_color('auto')
|
||||||
|
border_style = 'solid'
|
||||||
|
border_width = 1
|
||||||
|
for elem in elems:
|
||||||
|
color = get(elem, 'w:color')
|
||||||
|
if color is not None:
|
||||||
|
border_color = simple_color(color)
|
||||||
|
style = get(elem, 'w:val')
|
||||||
|
if style is not None:
|
||||||
|
border_style = LINE_STYLES.get(style, 'solid')
|
||||||
|
space = get(elem, 'w:space')
|
||||||
|
if space is not None:
|
||||||
|
try:
|
||||||
|
padding = float(space)
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
pass
|
||||||
|
sz = get(elem, 'w:sz')
|
||||||
|
if sz is not None:
|
||||||
|
# we dont care about art borders (they are only used for page borders)
|
||||||
|
try:
|
||||||
|
border_width = min(96, max(2, float(sz))) / 8
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
setattr(dest, 'border_color', border_color)
|
||||||
|
setattr(dest, 'border_style', border_style)
|
||||||
|
setattr(dest, 'border_width', border_width)
|
||||||
|
setattr(dest, 'padding', padding)
|
||||||
|
|
||||||
|
def read_color(parent, dest):
|
||||||
|
ans = inherit
|
||||||
|
for col in XPath('./w:color[@w:val]')(parent):
|
||||||
|
val = get(col, 'w:val')
|
||||||
|
if not val:
|
||||||
|
continue
|
||||||
|
ans = simple_color(val)
|
||||||
|
setattr(dest, 'color', ans)
|
||||||
|
|
||||||
|
def read_highlight(parent, dest):
|
||||||
|
ans = inherit
|
||||||
|
for col in XPath('./w:highlight[@w:val]')(parent):
|
||||||
|
val = get(col, 'w:val')
|
||||||
|
if not val:
|
||||||
|
continue
|
||||||
|
if not val or val == 'none':
|
||||||
|
val = 'transparent'
|
||||||
|
ans = val
|
||||||
|
setattr(dest, 'highlight', ans)
|
||||||
|
|
||||||
|
def read_lang(parent, dest):
|
||||||
|
ans = inherit
|
||||||
|
for col in XPath('./w:lang[@w:val]')(parent):
|
||||||
|
val = get(col, 'w:val')
|
||||||
|
if not val:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
code = int(val, 16)
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
ans = val
|
||||||
|
else:
|
||||||
|
from calibre.ebooks.docx.lcid import lcid
|
||||||
|
val = lcid.get(code, None)
|
||||||
|
if val:
|
||||||
|
ans = val
|
||||||
|
setattr(dest, 'lang', ans)
|
||||||
|
|
||||||
|
def read_letter_spacing(parent, dest):
|
||||||
|
ans = inherit
|
||||||
|
for col in XPath('./w:spacing[@w:val]')(parent):
|
||||||
|
val = simple_float(get(col, 'w:val'), 0.05)
|
||||||
|
if val:
|
||||||
|
ans = val
|
||||||
|
setattr(dest, 'letter_spacing', ans)
|
||||||
|
|
||||||
|
def read_sz(parent, dest):
|
||||||
|
ans = inherit
|
||||||
|
for col in XPath('./w:sz[@w:val]')(parent):
|
||||||
|
val = simple_float(get(col, 'w:val'), 0.5)
|
||||||
|
if val:
|
||||||
|
ans = val
|
||||||
|
setattr(dest, 'font_size', ans)
|
||||||
|
|
||||||
|
def read_underline(parent, dest):
|
||||||
|
ans = inherit
|
||||||
|
for col in XPath('./w:u[@w:val]')(parent):
|
||||||
|
val = get(col, 'w:val')
|
||||||
|
if val:
|
||||||
|
ans = 'underline'
|
||||||
|
setattr(dest, 'text_decoration', ans)
|
||||||
|
|
||||||
|
def read_vert_align(parent, dest):
|
||||||
|
ans = inherit
|
||||||
|
for col in XPath('./w:vertAlign[@w:val]')(parent):
|
||||||
|
val = get(col, 'w:val')
|
||||||
|
if val and val in {'baseline', 'subscript', 'superscript'}:
|
||||||
|
ans = val
|
||||||
|
setattr(dest, 'vert_align', ans)
|
||||||
|
|
||||||
|
|
||||||
|
class RunStyle(object):
|
||||||
|
|
||||||
|
all_properties = (
|
||||||
|
'b', 'bCs', 'caps', 'cs', 'dstrike', 'emboss', 'i', 'iCs', 'imprint', 'rtl', 'shadow',
|
||||||
|
'smallCaps', 'strike', 'vanish',
|
||||||
|
|
||||||
|
'border_color', 'border_style', 'border_width', 'padding', 'color', 'highlight', 'background-color',
|
||||||
|
'letter_spacing', 'font_size', 'text_decoration', 'vert_align',
|
||||||
|
)
|
||||||
|
|
||||||
|
def __init__(self, rPr):
|
||||||
|
for p in (
|
||||||
|
'b', 'bCs', 'caps', 'cs', 'dstrike', 'emboss', 'i', 'iCs', 'imprint', 'rtl', 'shadow',
|
||||||
|
'smallCaps', 'strike', 'vanish',
|
||||||
|
):
|
||||||
|
setattr(self, p, binary_property(rPr, p))
|
||||||
|
|
||||||
|
for x in ('text_border', 'color', 'highlight', 'shd', 'letter_spacing', 'sz', 'underline', 'vert_align'):
|
||||||
|
f = globals()['read_%s' % x]
|
||||||
|
f(rPr, self)
|
||||||
|
|
||||||
|
def update(self, other):
|
||||||
|
for prop in self.all_properties:
|
||||||
|
nval = getattr(other, prop)
|
||||||
|
if nval is not inherit:
|
||||||
|
setattr(self, prop, nval)
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
class Style(object):
|
class Style(object):
|
||||||
@ -218,6 +369,24 @@ class Style(object):
|
|||||||
if self.style_type not in {'paragraph', 'character'}:
|
if self.style_type not in {'paragraph', 'character'}:
|
||||||
self.link = None
|
self.link = None
|
||||||
|
|
||||||
|
self.paragraph_style = self.character_style = None
|
||||||
|
|
||||||
|
if self.style_type in {'paragraph', 'character'}:
|
||||||
|
if self.style_type == 'paragraph':
|
||||||
|
for pPr in XPath('./w:pPr')(elem):
|
||||||
|
ps = ParagraphStyle(pPr)
|
||||||
|
if self.paragraph_style is None:
|
||||||
|
self.paragraph_style = ps
|
||||||
|
else:
|
||||||
|
self.paragraph_style.update(ps)
|
||||||
|
|
||||||
|
for rPr in XPath('./w:rPr')(elem):
|
||||||
|
rs = RunStyle(rPr)
|
||||||
|
if self.character_style is None:
|
||||||
|
self.character_style = rs
|
||||||
|
else:
|
||||||
|
self.character_style.update(rs)
|
||||||
|
|
||||||
|
|
||||||
class Styles(object):
|
class Styles(object):
|
||||||
|
|
||||||
@ -259,5 +428,3 @@ class Styles(object):
|
|||||||
|
|
||||||
# TODO: Document defaults (docDefaults)
|
# TODO: Document defaults (docDefaults)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user