🌐 AI搜索 & 代理 主页
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
156 changes: 120 additions & 36 deletions lib/matplotlib/backends/_backend_pdf_ps.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,12 @@
from fontTools.ttLib import TTFont


_FONT_MAX_GLYPH = {
3: 256,
42: 65536,
}


@functools.lru_cache(50)
def _cached_get_afm_from_fname(fname):
with open(fname, "rb") as fh:
Expand Down Expand Up @@ -103,6 +109,57 @@ def font_as_file(font):
return fh


class GlyphMap:
"""
A two-way glyph mapping.

The forward glyph map is from (character string, glyph index)-pairs to
(subset index, subset character code)-pairs.

The inverse glyph map is from to (subset index, subset character code)-pairs to
(character string, glyph index)-pairs.
"""

def __init__(self) -> None:
self._forward: dict[tuple[CharacterCodeType, GlyphIndexType],
tuple[int, CharacterCodeType]] = {}
self._inverse: dict[tuple[int, CharacterCodeType],
tuple[CharacterCodeType, GlyphIndexType]] = {}

def get(self, charcodes: str,
glyph_index: GlyphIndexType) -> tuple[int, CharacterCodeType] | None:
"""
Get the forward mapping from a (character string, glyph index)-pair.

This may return *None* if the pair is not currently mapped.
"""
return self._forward.get((charcodes, glyph_index))

def iget(self, subset: int,
subset_charcode: CharacterCodeType) -> tuple[str, GlyphIndexType]:
"""Get the inverse mapping from a (subset, subset charcode)-pair."""
return self._inverse[(subset, subset_charcode)]

def add(self, charcode: str, glyph_index: GlyphIndexType, subset: int,
subset_charcode: CharacterCodeType) -> None:
"""
Add a mapping to this instance.

Parameters
----------
charcode : CharacterCodeType
The character code to record.
glyph : GlyphIndexType
The corresponding glyph index to record.
subset : int
The subset in which the subset character code resides.
subset_charcode : CharacterCodeType
The subset character code within the above subset.
"""
self._forward[(charcode, glyph_index)] = (subset, subset_charcode)
self._inverse[(subset, subset_charcode)] = (charcode, glyph_index)


class CharacterTracker:
"""
Helper for font subsetting by the PDF and PS backends.
Expand All @@ -114,16 +171,20 @@ class CharacterTracker:
----------
subset_size : int
The size at which characters are grouped into subsets.
used : dict[tuple[str, int], dict[CharacterCodeType, GlyphIndexType]]
used : dict
A dictionary of font files to character maps.

The key is a font filename and subset within that font.
The key is a font filename.

The value is a dictionary mapping a character code to a glyph index. Note this
mapping is the inverse of FreeType, which maps glyph indices to character codes.
The value is a list of dictionaries, each mapping at most *subset_size*
character codes to glyph indices. Note this mapping is the inverse of FreeType,
which maps glyph indices to character codes.

If *subset_size* is not set, then there will only be one subset per font
filename.
glyph_maps : dict
A dictionary of font files to glyph maps. You probably will want to use the
`.subset_to_unicode` method instead of this attribute.
"""

def __init__(self, subset_size: int = 0):
Expand All @@ -134,7 +195,8 @@ def __init__(self, subset_size: int = 0):
The maximum size that is supported for an embedded font. If provided, then
characters will be grouped into these sized subsets.
"""
self.used: dict[tuple[str, int], dict[CharacterCodeType, GlyphIndexType]] = {}
self.used: dict[str, list[dict[CharacterCodeType, GlyphIndexType]]] = {}
self.glyph_maps: dict[str, GlyphMap] = {}
self.subset_size = subset_size

def track(self, font: FT2Font, s: str) -> list[tuple[int, CharacterCodeType]]:
Expand All @@ -157,33 +219,24 @@ def track(self, font: FT2Font, s: str) -> list[tuple[int, CharacterCodeType]]:
whole). If *subset_size* is not specified, then the subset will always be 0
and the character codes will be returned from the string unchanged.
"""
font_glyphs = []
char_to_font = font._get_fontmap(s)
for _c, _f in char_to_font.items():
charcode = ord(_c)
glyph_index = _f.get_char_index(charcode)
if self.subset_size != 0:
subset = charcode // self.subset_size
subset_charcode = charcode % self.subset_size
else:
subset = 0
subset_charcode = charcode
self.used.setdefault((_f.fname, subset), {})[subset_charcode] = glyph_index
font_glyphs.append((subset, subset_charcode))
return font_glyphs

def track_glyph(
self, font: FT2Font, charcode: CharacterCodeType,
glyph: GlyphIndexType) -> tuple[int, CharacterCodeType]:
return [
self.track_glyph(f, ord(c), f.get_char_index(ord(c)))
for c, f in font._get_fontmap(s).items()
]

def track_glyph(self, font: FT2Font, chars: str | CharacterCodeType,
glyph: GlyphIndexType) -> tuple[int, CharacterCodeType]:
"""
Record character code *charcode* at glyph index *glyph* as using font *font*.

Parameters
----------
font : FT2Font
A font that is being used for the provided string.
charcode : CharacterCodeType
The character code to record.
chars : str or CharacterCodeType
The character(s) to record. This may be a single character code, or multiple
characters in a string, if the glyph maps to several characters. It will be
normalized to a string internally.
glyph : GlyphIndexType
The corresponding glyph index to record.

Expand All @@ -196,33 +249,64 @@ def track_glyph(
The character code within the above subset. If *subset_size* was not
specified on this instance, then this is just *charcode* unmodified.
"""
if self.subset_size != 0:
subset = charcode // self.subset_size
subset_charcode = charcode % self.subset_size
if isinstance(chars, str):
charcode = ord(chars[0])
else:
charcode = chars
chars = chr(chars)

glyph_map = self.glyph_maps.setdefault(font.fname, GlyphMap())
if result := glyph_map.get(chars, glyph):
return result

subset_maps = self.used.setdefault(font.fname, [{}])
use_next_charmap = (
# Multi-character glyphs always go in the non-0 subset.
len(chars) > 1 or
# Default to preserving the character code as it was.
self.subset_size != 0
and (
# But start filling a new subset if outside the first block; this
# preserves ASCII (for Type 3) or the Basic Multilingual Plane (for
# Type 42).
charcode >= self.subset_size
# Or, use a new subset if the character code is already mapped for the
# first block. This means it's using an alternate glyph.
or charcode in subset_maps[0]
)
)
if use_next_charmap:
if len(subset_maps) == 1 or len(subset_maps[-1]) == self.subset_size:
subset_maps.append({})
subset = len(subset_maps) - 1
subset_charcode = len(subset_maps[-1])
else:
subset = 0
subset_charcode = charcode
self.used.setdefault((font.fname, subset), {})[subset_charcode] = glyph
subset_maps[subset][subset_charcode] = glyph
glyph_map.add(chars, glyph, subset, subset_charcode)
return (subset, subset_charcode)

def subset_to_unicode(self, index: int,
charcode: CharacterCodeType) -> CharacterCodeType:
def subset_to_unicode(self, fontname: str, subset: int,
subset_charcode: CharacterCodeType) -> str:
"""
Map a subset index and character code to a Unicode character code.

Parameters
----------
index : int
fontname : str
The name of the font, from the *used* dictionary key.
subset : int
The subset index within a font.
charcode : CharacterCodeType
subset_charcode : CharacterCodeType
The character code within a subset to map back.

Returns
-------
CharacterCodeType
The Unicode character code corresponding to the subsetted one.
str
The Unicode character(s) corresponding to the subsetted character code.
"""
return index * self.subset_size + charcode
return self.glyph_maps[fontname].iget(subset, subset_charcode)[0]


class RendererPDFPSBase(RendererBase):
Expand Down
35 changes: 17 additions & 18 deletions lib/matplotlib/backends/backend_pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,12 +368,6 @@ def pdfRepr(obj):
"objects")


_FONT_MAX_GLYPH = {
3: 256,
42: 65536,
}


class Reference:
"""
PDF reference object.
Expand Down Expand Up @@ -691,7 +685,7 @@ def __init__(self, filename, metadata=None):
self._fontNames = {} # maps filenames to internal font names
self._dviFontInfo = {} # maps pdf names to dvifonts
self._character_tracker = _backend_pdf_ps.CharacterTracker(
_FONT_MAX_GLYPH.get(mpl.rcParams['pdf.fonttype'], 0))
_backend_pdf_ps._FONT_MAX_GLYPH.get(mpl.rcParams['ps.fonttype'], 0))

self.alphaStates = {} # maps alpha values to graphics state objects
self._alpha_state_seq = (Name(f'A{i}') for i in itertools.count(1))
Expand Down Expand Up @@ -948,9 +942,8 @@ def writeFonts(self):
else:
# a normal TrueType font
_log.debug('Writing TrueType font.')
charmap = self._character_tracker.used.get((filename, subset))
if charmap:
fonts[Fx] = self.embedTTF(filename, subset, charmap)
charmap = self._character_tracker.used[filename][subset]
fonts[Fx] = self.embedTTF(filename, subset, charmap)
self.writeObject(self.fontObject, fonts)

def _write_afm_font(self, filename):
Expand Down Expand Up @@ -992,8 +985,12 @@ def _embedTeXFont(self, dvifont):

# Reduce the font to only the glyphs used in the document, get the encoding
# for that subset, and compute various properties based on the encoding.
charmap = self._character_tracker.used[(dvifont.fname, 0)]
chars = frozenset(charmap.keys())
charmap = self._character_tracker.used[dvifont.fname][0]
chars = {
# DVI type 1 fonts always map single glyph to single character.
ord(self._character_tracker.subset_to_unicode(dvifont.fname, 0, ccode))
for ccode in charmap
}
t1font = t1font.subset(chars, self._get_subset_prefix(charmap.values()))
fontdict['BaseFont'] = Name(t1font.prop['FontName'])
# createType1Descriptor writes the font data as a side effect
Expand Down Expand Up @@ -1144,14 +1141,16 @@ def generate_unicode_cmap(subset_index, charmap):
unicode_groups[-1][1] = ccode
last_ccode = ccode

def _to_unicode(ccode):
chars = self._character_tracker.subset_to_unicode(
filename, subset_index, ccode)
hexstr = chars.encode('utf-16be').hex()
return f'<{hexstr}>'

width = 2 if fonttype == 3 else 4
unicode_bfrange = []
for start, end in unicode_groups:
real_start = self._character_tracker.subset_to_unicode(subset_index,
start)
real_end = self._character_tracker.subset_to_unicode(subset_index, end)
real_values = ' '.join('<%s>' % chr(x).encode('utf-16be').hex()
for x in range(real_start, real_end+1))
real_values = ' '.join(_to_unicode(x) for x in range(start, end+1))
unicode_bfrange.append(
f'<{start:0{width}x}> <{end:0{width}x}> [{real_values}]')
unicode_cmap = (self._identityToUnicodeCMap %
Expand Down Expand Up @@ -2325,7 +2324,7 @@ def output_singlebyte_chunk(kerns_or_chars):
for item in _text_helpers.layout(s, font, kern_mode=Kerning.UNFITTED,
language=language):
subset, charcode = self.file._character_tracker.track_glyph(
item.ft_object, ord(item.char), item.glyph_index)
item.ft_object, item.char, item.glyph_index)
if (item.ft_object, subset) != prev_font:
if singlebyte_chunk:
output_singlebyte_chunk(singlebyte_chunk)
Expand Down
Loading
Loading