@@ -320,6 +320,20 @@ def pdfRepr(obj):
320320 raise TypeError ("Don't know a PDF representation for {} objects"
321321 .format (type (obj )))
322322
323+ def _font_supports_char (fonttype , char ):
324+ """
325+ Returns True if the font is able to provided the char in a PDF
326+
327+ For a Type 3 font, this method returns True only for single-byte
328+ chars. For Type 42 fonts this method return True if the char is from
329+ the Basic Multilingual Plane.
330+ """
331+ if fonttype == 3 :
332+ return ord (char ) <= 255
333+ if fonttype == 42 :
334+ return ord (char ) <= 65535
335+ return True
336+
323337
324338class Reference :
325339 """
@@ -1194,6 +1208,8 @@ def embedTTFType42(font, characters, descriptor):
11941208 wObject = self .reserveObject ('Type 0 widths' )
11951209 toUnicodeMapObject = self .reserveObject ('ToUnicode map' )
11961210
1211+ bbox = [cvt (x , nearest = False ) for x in font .bbox ]
1212+
11971213 cidFontDict = {
11981214 'Type' : Name ('Font' ),
11991215 'Subtype' : Name ('CIDFontType2' ),
@@ -1268,13 +1284,47 @@ def embedTTFType42(font, characters, descriptor):
12681284
12691285 unicode_bfrange = []
12701286 for start , end in unicode_groups :
1287+ # Ensure the CID map contains only chars from BMP
1288+ if start > 65535 :
1289+ continue
1290+ end = min (65535 , end )
1291+
12711292 unicode_bfrange .append (
12721293 b"<%04x> <%04x> [%s]" %
12731294 (start , end ,
12741295 b" " .join (b"<%04x>" % x for x in range (start , end + 1 ))))
12751296 unicode_cmap = (self ._identityToUnicodeCMap %
12761297 (len (unicode_groups ), b"\n " .join (unicode_bfrange )))
12771298
1299+ # Add XObjects for unsupported chars
1300+ glyph_ids = []
1301+ for ccode in characters :
1302+ if not _font_supports_char (fonttype , chr (ccode )):
1303+ gind = font .get_char_index (ccode )
1304+ glyph_ids .append (gind )
1305+
1306+ rawcharprocs = _get_pdf_charprocs (filename , glyph_ids )
1307+ for charname in sorted (rawcharprocs ):
1308+ stream = rawcharprocs [charname ]
1309+ charprocDict = {'Length' : len (stream )}
1310+ charprocDict ['Type' ] = Name ('XObject' )
1311+ charprocDict ['Subtype' ] = Name ('Form' )
1312+ charprocDict ['BBox' ] = bbox
1313+ # Each glyph includes bounding box information,
1314+ # but xpdf and ghostscript can't handle it in a
1315+ # Form XObject (they segfault!!!), so we remove it
1316+ # from the stream here. It's not needed anyway,
1317+ # since the Form XObject includes it in its BBox
1318+ # value.
1319+ stream = stream [stream .find (b"d1" ) + 2 :]
1320+ charprocObject = self .reserveObject ('charProc' )
1321+ self .beginStream (charprocObject .id , None , charprocDict )
1322+ self .currentstream .write (stream )
1323+ self .endStream ()
1324+
1325+ name = self ._get_xobject_symbol_name (filename , charname )
1326+ self .multi_byte_charprocs [name ] = charprocObject
1327+
12781328 # CIDToGIDMap stream
12791329 cid_to_gid_map = "" .join (cid_to_gid_map ).encode ("utf-16be" )
12801330 self .beginStream (cidToGidMapObject .id ,
@@ -2106,16 +2156,17 @@ def draw_mathtext(self, gc, x, y, s, prop, angle):
21062156 self .check_gc (gc , gc ._rgb )
21072157 prev_font = None , None
21082158 oldx , oldy = 0 , 0
2109- type3_multibytes = []
2159+ unsupported_chars = []
21102160
21112161 self .file .output (Op .begin_text )
21122162 for font , fontsize , num , ox , oy in glyphs :
2113- self .file ._character_tracker .track (font , chr (num ))
2163+ char = chr (num )
2164+ self .file ._character_tracker .track (font , char )
21142165 fontname = font .fname
2115- if fonttype == 3 and num > 255 :
2116- # For Type3 fonts, multibyte characters must be emitted
2117- # separately (below).
2118- type3_multibytes .append ((font , fontsize , ox , oy , num ))
2166+ if not _font_supports_char ( fonttype , char ) :
2167+ # Unsupported chars (i.e. multibyte in Type 3 or beyond BMP in
2168+ # Type 42) must be emitted separately (below).
2169+ unsupported_chars .append ((font , fontsize , ox , oy , num ))
21192170 else :
21202171 self ._setup_textpos (ox , oy , 0 , oldx , oldy )
21212172 oldx , oldy = ox , oy
@@ -2127,7 +2178,7 @@ def draw_mathtext(self, gc, x, y, s, prop, angle):
21272178 Op .show )
21282179 self .file .output (Op .end_text )
21292180
2130- for font , fontsize , ox , oy , num in type3_multibytes :
2181+ for font , fontsize , ox , oy , num in unsupported_chars :
21312182 self ._draw_xobject_glyph (
21322183 font , fontsize , font .get_char_index (num ), ox , oy )
21332184
@@ -2236,18 +2287,6 @@ def encode_string(self, s, fonttype):
22362287 return s .encode ('cp1252' , 'replace' )
22372288 return s .encode ('utf-16be' , 'replace' )
22382289
2239- @staticmethod
2240- def _font_supports_char (fonttype , char ):
2241- """
2242- Returns True if the font is able to provided the char in a PDF
2243-
2244- For a Type 3 font, this method returns True only for single-byte
2245- chars. For Type 42 fonts this method always returns True.
2246- """
2247- if fonttype == 3 :
2248- return ord (char ) <= 255
2249- return True
2250-
22512290 def draw_text (self , gc , x , y , s , prop , angle , ismath = False , mtext = None ):
22522291 # docstring inherited
22532292
@@ -2311,7 +2350,7 @@ def draw_text(self, gc, x, y, s, prop, angle, ismath=False, mtext=None):
23112350 prev_was_multibyte = True
23122351 for item in _text_helpers .layout (
23132352 s , font , kern_mode = KERNING_UNFITTED ):
2314- if self . _font_supports_char (fonttype , item .char ):
2353+ if _font_supports_char (fonttype , item .char ):
23152354 if prev_was_multibyte :
23162355 singlebyte_chunks .append ((item .x , []))
23172356 if item .prev_kern :
0 commit comments