@@ -321,6 +321,21 @@ def pdfRepr(obj):
321321 .format (type (obj )))
322322
323323
324+ def _font_supports_char (fonttype , char ):
325+ """
326+ Returns True if the font is able to provided the char in a PDF
327+
328+ For a Type 3 font, this method returns True only for single-byte
329+ chars. For Type 42 fonts this method return True if the char is from
330+ the Basic Multilingual Plane.
331+ """
332+ if fonttype == 3 :
333+ return ord (char ) <= 255
334+ if fonttype == 42 :
335+ return ord (char ) <= 65535
336+ raise NotImplementedError ()
337+
338+
324339class Reference :
325340 """
326341 PDF reference object.
@@ -1194,6 +1209,8 @@ def embedTTFType42(font, characters, descriptor):
11941209 wObject = self .reserveObject ('Type 0 widths' )
11951210 toUnicodeMapObject = self .reserveObject ('ToUnicode map' )
11961211
1212+ bbox = [cvt (x , nearest = False ) for x in font .bbox ]
1213+
11971214 cidFontDict = {
11981215 'Type' : Name ('Font' ),
11991216 'Subtype' : Name ('CIDFontType2' ),
@@ -1268,13 +1285,47 @@ def embedTTFType42(font, characters, descriptor):
12681285
12691286 unicode_bfrange = []
12701287 for start , end in unicode_groups :
1288+ # Ensure the CID map contains only chars from BMP
1289+ if start > 65535 :
1290+ continue
1291+ end = min (65535 , end )
1292+
12711293 unicode_bfrange .append (
12721294 b"<%04x> <%04x> [%s]" %
12731295 (start , end ,
12741296 b" " .join (b"<%04x>" % x for x in range (start , end + 1 ))))
12751297 unicode_cmap = (self ._identityToUnicodeCMap %
12761298 (len (unicode_groups ), b"\n " .join (unicode_bfrange )))
12771299
1300+ # Add XObjects for unsupported chars
1301+ glyph_ids = []
1302+ for ccode in characters :
1303+ if not _font_supports_char (fonttype , chr (ccode )):
1304+ gind = font .get_char_index (ccode )
1305+ glyph_ids .append (gind )
1306+
1307+ rawcharprocs = _get_pdf_charprocs (filename , glyph_ids )
1308+ for charname in sorted (rawcharprocs ):
1309+ stream = rawcharprocs [charname ]
1310+ charprocDict = {'Length' : len (stream )}
1311+ charprocDict ['Type' ] = Name ('XObject' )
1312+ charprocDict ['Subtype' ] = Name ('Form' )
1313+ charprocDict ['BBox' ] = bbox
1314+ # Each glyph includes bounding box information,
1315+ # but xpdf and ghostscript can't handle it in a
1316+ # Form XObject (they segfault!!!), so we remove it
1317+ # from the stream here. It's not needed anyway,
1318+ # since the Form XObject includes it in its BBox
1319+ # value.
1320+ stream = stream [stream .find (b"d1" ) + 2 :]
1321+ charprocObject = self .reserveObject ('charProc' )
1322+ self .beginStream (charprocObject .id , None , charprocDict )
1323+ self .currentstream .write (stream )
1324+ self .endStream ()
1325+
1326+ name = self ._get_xobject_symbol_name (filename , charname )
1327+ self .multi_byte_charprocs [name ] = charprocObject
1328+
12781329 # CIDToGIDMap stream
12791330 cid_to_gid_map = "" .join (cid_to_gid_map ).encode ("utf-16be" )
12801331 self .beginStream (cidToGidMapObject .id ,
@@ -2106,16 +2157,17 @@ def draw_mathtext(self, gc, x, y, s, prop, angle):
21062157 self .check_gc (gc , gc ._rgb )
21072158 prev_font = None , None
21082159 oldx , oldy = 0 , 0
2109- type3_multibytes = []
2160+ unsupported_chars = []
21102161
21112162 self .file .output (Op .begin_text )
21122163 for font , fontsize , num , ox , oy in glyphs :
2113- self .file ._character_tracker .track (font , chr (num ))
2164+ char = chr (num )
2165+ self .file ._character_tracker .track (font , char )
21142166 fontname = font .fname
2115- if fonttype == 3 and num > 255 :
2116- # For Type3 fonts, multibyte characters must be emitted
2117- # separately (below).
2118- type3_multibytes .append ((font , fontsize , ox , oy , num ))
2167+ if not _font_supports_char ( fonttype , char ) :
2168+ # Unsupported chars (i.e. multibyte in Type 3 or beyond BMP in
2169+ # Type 42) must be emitted separately (below).
2170+ unsupported_chars .append ((font , fontsize , ox , oy , num ))
21192171 else :
21202172 self ._setup_textpos (ox , oy , 0 , oldx , oldy )
21212173 oldx , oldy = ox , oy
@@ -2127,7 +2179,7 @@ def draw_mathtext(self, gc, x, y, s, prop, angle):
21272179 Op .show )
21282180 self .file .output (Op .end_text )
21292181
2130- for font , fontsize , ox , oy , num in type3_multibytes :
2182+ for font , fontsize , ox , oy , num in unsupported_chars :
21312183 self ._draw_xobject_glyph (
21322184 font , fontsize , font .get_char_index (num ), ox , oy )
21332185
@@ -2236,20 +2288,6 @@ def encode_string(self, s, fonttype):
22362288 return s .encode ('cp1252' , 'replace' )
22372289 return s .encode ('utf-16be' , 'replace' )
22382290
2239- @staticmethod
2240- def _font_supports_char (fonttype , char ):
2241- """
2242- Returns True if the font is able to provided the char in a PDF
2243-
2244- For a Type 3 font, this method returns True only for single-byte
2245- chars. For Type 42 fonts this method always returns True.
2246- """
2247- if fonttype == 3 :
2248- return ord (char ) <= 255
2249- if fonttype == 42 :
2250- return True
2251- raise NotImplementedError ()
2252-
22532291 def draw_text (self , gc , x , y , s , prop , angle , ismath = False , mtext = None ):
22542292 # docstring inherited
22552293
@@ -2313,7 +2351,7 @@ def draw_text(self, gc, x, y, s, prop, angle, ismath=False, mtext=None):
23132351 prev_was_multibyte = True
23142352 for item in _text_helpers .layout (
23152353 s , font , kern_mode = KERNING_UNFITTED ):
2316- if self . _font_supports_char (fonttype , item .char ):
2354+ if _font_supports_char (fonttype , item .char ):
23172355 if prev_was_multibyte :
23182356 singlebyte_chunks .append ((item .x , []))
23192357 if item .prev_kern :
0 commit comments