matplotlib · tacaswell · Sep 17, 2025 · Sep 3, 2025 · tacaswell · Sep 16, 2025
diff --git a/lib/matplotlib/backends/_backend_pdf_ps.py b/lib/matplotlib/backends/_backend_pdf_ps.py
@@ -18,7 +18,7 @@
 
 
 if typing.TYPE_CHECKING:
-    from .ft2font import FT2Font, GlyphIndexType
+    from .ft2font import CharacterCodeType, FT2Font, GlyphIndexType
     from fontTools.ttLib import TTFont
 
 
@@ -107,23 +107,103 @@ class CharacterTracker:
     """
     Helper for font subsetting by the PDF and PS backends.
 
-    Maintains a mapping of font paths to the set of glyphs that are being used from that
-    font.
-    """
+    Maintains a mapping of font paths to the set of characters and glyphs that are being
+    used from that font.
+
+    Attributes
+    ----------
+    subset_size : int
+        The size at which characters are grouped into subsets.
+    used : dict[tuple[str, int], dict[CharacterCodeType, GlyphIndexType]]
+        A dictionary of font files to character maps.
+
+        The key is a font filename and subset within that font.
 
-    def __init__(self) -> None:
-        self.used: dict[str, set[GlyphIndexType]] = {}
+        The value is a dictionary mapping a character code to a glyph index. Note this
+        mapping is the inverse of FreeType, which maps glyph indices to character codes.
 
-    def track(self, font: FT2Font, s: str) -> None:
-        """Record that string *s* is being typeset using font *font*."""
+        If *subset_size* is not set, then there will only be one subset per font
+        filename.
+    """
+
+    def __init__(self, subset_size: int = 0):
+        """
+        Parameters
+        ----------
+        subset_size : int, optional
+            The maximum size that is supported for an embedded font. If provided, then
+            characters will be grouped into these sized subsets.
+        """
+        self.used: dict[tuple[str, int], dict[CharacterCodeType, GlyphIndexType]] = {}
+        self.subset_size = subset_size
+
+    def track(self, font: FT2Font, s: str) -> list[tuple[int, CharacterCodeType]]:
+        """
+        Record that string *s* is being typeset using font *font*.
+
+        Parameters
+        ----------
+        font : FT2Font
+            A font that is being used for the provided string.
+        s : str
+            The string that should be marked as tracked by the provided font.
+
+        Returns
+        -------
+        list[tuple[int, CharacterCodeType]]
+            A list of subset and character code pairs corresponding to the input string.
+            If a *subset_size* is specified on this instance, then the character code
+            will correspond with the given subset (and not necessarily the string as a
+            whole). If *subset_size* is not specified, then the subset will always be 0
+            and the character codes will be returned from the string unchanged.
+        """
+        font_glyphs = []
         char_to_font = font._get_fontmap(s)
         for _c, _f in char_to_font.items():
-            glyph_index = _f.get_char_index(ord(_c))
-            self.used.setdefault(_f.fname, set()).add(glyph_index)
-
-    def track_glyph(self, font: FT2Font, glyph_index: GlyphIndexType) -> None:
-        """Record that glyph index *glyph_index* is being typeset using font *font*."""
-        self.used.setdefault(font.fname, set()).add(glyph_index)
+            charcode = ord(_c)
+            glyph_index = _f.get_char_index(charcode)
+            if self.subset_size != 0:
+                subset = charcode // self.subset_size
+                subset_charcode = charcode % self.subset_size
+            else:
+                subset = 0
+                subset_charcode = charcode
+            self.used.setdefault((_f.fname, subset), {})[subset_charcode] = glyph_index
+            font_glyphs.append((subset, subset_charcode))
+        return font_glyphs
+
+    def track_glyph(
+            self, font: FT2Font, charcode: CharacterCodeType,
+            glyph: GlyphIndexType) -> tuple[int, CharacterCodeType]:
+        """
+        Record character code *charcode* at glyph index *glyph* as using font *font*.
+
+        Parameters
+        ----------
+        font : FT2Font
+            A font that is being used for the provided string.
+        charcode : CharacterCodeType
+            The character code to record.
+        glyph : GlyphIndexType
+            The corresponding glyph index to record.
+
+        Returns
+        -------
+        subset : int
+            The subset in which the returned character code resides. If *subset_size*
+            was not specified on this instance, then this is always 0.
+        subset_charcode : CharacterCodeType
+            The character code within the above subset. If *subset_size* was not
+            specified on this instance, then this is just *charcode* unmodified.
+        """
+        if self.subset_size != 0:
+            subset = charcode // self.subset_size
+            subset_charcode = charcode % self.subset_size
+        else:
+            subset = 0
+            subset_charcode = charcode
+        self.used.setdefault((font.fname, subset), {})[subset_charcode] = glyph
+        return (subset, subset_charcode)
 
 
 class RendererPDFPSBase(RendererBase):

diff --git a/lib/matplotlib/backends/backend_pdf.py b/lib/matplotlib/backends/backend_pdf.py
@@ -19,7 +19,6 @@
 import sys
 import time
 import types
-import typing
 import warnings
 import zlib
 
@@ -36,8 +35,7 @@
 from matplotlib.figure import Figure
 from matplotlib.font_manager import get_font, fontManager as _fontManager
 from matplotlib._afm import AFM
-from matplotlib.ft2font import (
-    FT2Font, FaceFlags, GlyphIndexType, Kerning, LoadFlags, StyleFlags)
+from matplotlib.ft2font import FT2Font, FaceFlags, Kerning, LoadFlags, StyleFlags
 from matplotlib.transforms import Affine2D, BboxBase
 from matplotlib.path import Path
 from matplotlib.dates import UTC
@@ -962,9 +960,9 @@ def writeFonts(self):
             else:
                 # a normal TrueType font
                 _log.debug('Writing TrueType font.')
-                glyphs = self._character_tracker.used.get(filename)
-                if glyphs:
-                    fonts[Fx] = self.embedTTF(filename, glyphs)
+                charmap = self._character_tracker.used.get((filename, 0))
+                if charmap:
+                    fonts[Fx] = self.embedTTF(filename, charmap)
         self.writeObject(self.fontObject, fonts)
 
     def _write_afm_font(self, filename):
@@ -1006,8 +1004,9 @@ def _embedTeXFont(self, dvifont):
 
         # Reduce the font to only the glyphs used in the document, get the encoding
         # for that subset, and compute various properties based on the encoding.
-        chars = frozenset(self._character_tracker.used[dvifont.fname])
-        t1font = t1font.subset(chars, self._get_subset_prefix(chars))
+        charmap = self._character_tracker.used[(dvifont.fname, 0)]
+        chars = frozenset(charmap.keys())
+        t1font = t1font.subset(chars, self._get_subset_prefix(charmap.values()))
         fontdict['BaseFont'] = Name(t1font.prop['FontName'])
         # createType1Descriptor writes the font data as a side effect
         fontdict['FontDescriptor'] = self.createType1Descriptor(t1font)
@@ -1138,7 +1137,7 @@ def _get_xobject_glyph_name(self, filename, glyph_name):
 end
 end"""
 
-    def embedTTF(self, filename, glyphs):
+    def embedTTF(self, filename, charmap):
         """Embed the TTF font from the named file into the document."""
         font = get_font(filename)
         fonttype = mpl.rcParams['pdf.fonttype']
@@ -1154,7 +1153,7 @@ def cvt(length, upe=font.units_per_EM, nearest=True):
             else:
                 return math.ceil(value)
 
-        def embedTTFType3(font, glyphs, descriptor):
+        def embedTTFType3(font, charmap, descriptor):
             """The Type 3-specific part of embedding a Truetype font"""
             widthsObject = self.reserveObject('font widths')
             fontdescObject = self.reserveObject('font descriptor')
@@ -1201,10 +1200,8 @@ def get_char_width(charcode):
             # that we need from this font.
             differences = []
             multi_byte_chars = set()
-            charmap = {gind: ccode for ccode, gind in font.get_charmap().items()}
-            for gind in glyphs:
+            for ccode, gind in charmap.items():
                 glyph_name = font.get_glyph_name(gind)
-                ccode = charmap.get(gind)
                 if ccode is not None and ccode <= 255:
                     differences.append((ccode, glyph_name))
                 else:
@@ -1219,7 +1216,7 @@ def get_char_width(charcode):
                 last_c = c
 
             # Make the charprocs array.
-            rawcharprocs = _get_pdf_charprocs(filename, glyphs)
+            rawcharprocs = _get_pdf_charprocs(filename, charmap.values())
             charprocs = {}
             for charname in sorted(rawcharprocs):
                 stream = rawcharprocs[charname]
@@ -1256,7 +1253,7 @@ def get_char_width(charcode):
 
             return fontdictObject
 
-        def embedTTFType42(font, glyphs, descriptor):
+        def embedTTFType42(font, charmap, descriptor):
             """The Type 42-specific part of embedding a Truetype font"""
             fontdescObject = self.reserveObject('font descriptor')
             cidFontDictObject = self.reserveObject('CID font dictionary')
@@ -1266,8 +1263,9 @@ def embedTTFType42(font, glyphs, descriptor):
             wObject = self.reserveObject('Type 0 widths')
             toUnicodeMapObject = self.reserveObject('ToUnicode map')
 
-            _log.debug("SUBSET %s characters: %s", filename, glyphs)
-            with _backend_pdf_ps.get_glyphs_subset(filename, glyphs) as subset:
+            _log.debug("SUBSET %s characters: %s", filename, charmap)
+            with _backend_pdf_ps.get_glyphs_subset(filename,
+                                                   charmap.values()) as subset:
                 fontdata = _backend_pdf_ps.font_as_file(subset)
             _log.debug(
                 "SUBSET %s %d -> %d", filename,
@@ -1315,11 +1313,9 @@ def embedTTFType42(font, glyphs, descriptor):
             cid_to_gid_map = ['\0'] * 65536
             widths = []
             max_ccode = 0
-            charmap = {gind: ccode for ccode, gind in font.get_charmap().items()}
-            for gind in glyphs:
+            for ccode, gind in charmap.items():
                 glyph = font.load_glyph(gind,
                                         flags=LoadFlags.NO_SCALE | LoadFlags.NO_HINTING)
-                ccode = charmap[gind]
                 widths.append((ccode, cvt(glyph.horiAdvance)))
                 if ccode < 65536:
                     cid_to_gid_map[ccode] = chr(gind)
@@ -1358,8 +1354,8 @@ def embedTTFType42(font, glyphs, descriptor):
 
             # Add XObjects for unsupported chars
             glyph_indices = [
-                glyph_index for glyph_index in glyphs
-                if not _font_supports_glyph(fonttype, charmap[glyph_index])
+                glyph_index for ccode, glyph_index in charmap.items()
+                if not _font_supports_glyph(fonttype, ccode)
             ]
 
             bbox = [cvt(x, nearest=False) for x in full_font.bbox]
@@ -1445,9 +1441,9 @@ def embedTTFType42(font, glyphs, descriptor):
             }
 
         if fonttype == 3:
-            return embedTTFType3(font, glyphs, descriptor)
+            return embedTTFType3(font, charmap, descriptor)
         elif fonttype == 42:
-            return embedTTFType42(font, glyphs, descriptor)
+            return embedTTFType42(font, charmap, descriptor)
 
     def alphaState(self, alpha):
         """Return name of an ExtGState that sets alpha to the given value."""
@@ -2212,7 +2208,7 @@ def draw_mathtext(self, gc, x, y, s, prop, angle):
 
         self.file.output(Op.begin_text)
         for font, fontsize, ccode, glyph_index, ox, oy in glyphs:
-            self.file._character_tracker.track_glyph(font, glyph_index)
+            self.file._character_tracker.track_glyph(font, ccode, glyph_index)
             fontname = font.fname
             if not _font_supports_glyph(fonttype, ccode):
                 # Unsupported chars (i.e. multibyte in Type 3 or beyond BMP in
@@ -2268,11 +2264,7 @@ def draw_tex(self, gc, x, y, s, prop, angle, *, mtext=None):
                 seq += [['font', pdfname, text.font.size]]
                 oldfont = text.font
             seq += [['text', text.x, text.y, [bytes([text.glyph])], text.x+text.width]]
-            # TODO: This should use glyph indices, not character codes, but will be
-            # fixed soon.
-            self.file._character_tracker.track_glyph(text.font,
-                                                     typing.cast('GlyphIndexType',
-                                                                 text.glyph))
+            self.file._character_tracker.track_glyph(text.font, text.glyph, text.index)
 
         # Find consecutive text strings with constant y coordinate and
         # combine into a sequence of strings and kerns, or just one

diff --git a/lib/matplotlib/backends/backend_ps.py b/lib/matplotlib/backends/backend_ps.py
@@ -826,7 +826,7 @@ def draw_mathtext(self, gc, x, y, s, prop, angle):
             f"{angle:g} rotate\n")
         lastfont = None
         for font, fontsize, ccode, glyph_index, ox, oy in glyphs:
-            self._character_tracker.track_glyph(font, glyph_index)
+            self._character_tracker.track_glyph(font, ccode, glyph_index)
             if (font.postscript_name, fontsize) != lastfont:
                 lastfont = font.postscript_name, fontsize
                 self._pswriter.write(
@@ -1069,18 +1069,19 @@ def print_figure_impl(fh):
             print("mpldict begin", file=fh)
             print("\n".join(_psDefs), file=fh)
             if not mpl.rcParams['ps.useafm']:
-                for font_path, glyphs in ps_renderer._character_tracker.used.items():
-                    if not glyphs:
+                for (font, subset_index), charmap in \
+                        ps_renderer._character_tracker.used.items():
+                    if not charmap:
                         continue
                     fonttype = mpl.rcParams['ps.fonttype']
                     # Can't use more than 255 chars from a single Type 3 font.
-                    if len(glyphs) > 255:
+                    if len(charmap) > 255:
                         fonttype = 42
                     fh.flush()
                     if fonttype == 3:
-                        fh.write(_font_to_ps_type3(font_path, glyphs))
+                        fh.write(_font_to_ps_type3(font, charmap.values()))
                     else:  # Type 42 only.
-                        _font_to_ps_type42(font_path, glyphs, fh)
+                        _font_to_ps_type42(font, charmap.values(), fh)
             print("end", file=fh)
             print("%%EndProlog", file=fh)