🌐 AI搜索 & 代理 主页
Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions AUTHORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
- Meinrad Recheis ([@henon](https://github.com/henon))
- Mohamed Koubaa ([@koubaa](https://github.com/koubaa))
- Patrick Stewart ([@patstew](https://github.com/patstew))
- Peter Kese ([@patstew](https://github.com/pkese))
- Raphael Nestler ([@rnestler](https://github.com/rnestler))
- Rickard Holmberg ([@rickardraysearch](https://github.com/rickardraysearch))
- Sam Winstanley ([@swinstanley](https://github.com/swinstanley))
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ One must now either use enum members (e.g. `MyEnum.Option`), or use enum constru
- Exception stacktraces on `PythonException.StackTrace` are now properly formatted
- Providing an invalid type parameter to a generic type or method produces a helpful Python error
- Empty parameter names (as can be generated from F#) do not cause crashes
- Unicode strings with surrogates get truncated when converting from Python

### Removed

Expand Down
20 changes: 20 additions & 0 deletions src/embed_tests/TestPyString.cs
Original file line number Diff line number Diff line change
Expand Up @@ -94,5 +94,25 @@ public void TestUnicode()
PyObject actual = new PyString(expected);
Assert.AreEqual(expected, actual.ToString());
}

[Test]
public void TestUnicodeSurrogateToString()
{
var expected = "foo\ud83d\udc3c";
var actual = PythonEngine.Eval("'foo\ud83d\udc3c'");
Assert.AreEqual(4, actual.Length());
Assert.AreEqual(expected, actual.ToString());
}

[Test]
[Ignore("Bug: Unicode conversion issue #1466")]
public void TestUnicodeSurrogate()
{
const string expected = "foo\ud83d\udc3c"; // "foo🐼"
PyObject actual = new PyString(expected);
// python treats "foo🐼" as 4 characters, dotnet as 5
Assert.AreEqual(4, actual.Length());
Assert.AreEqual(expected, actual.ToString());
}
}
}
7 changes: 4 additions & 3 deletions src/runtime/runtime.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1646,11 +1646,12 @@ internal static string GetManagedString(IntPtr op)
if (type == PyUnicodeType)
{
using var p = PyUnicode_AsUTF16String(new BorrowedReference(op));
int length = (int)PyUnicode_GetSize(op);
char* codePoints = (char*)PyBytes_AsString(p.DangerousGetAddress());
var bytesPtr = p.DangerousGetAddress();
int bytesLength = (int)Runtime.PyBytes_Size(bytesPtr);
char* codePoints = (char*)PyBytes_AsString(bytesPtr);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NIT: better add overload for PyBytes_AsString that takes BorrowedReference. This would remove unnecessary call to DangerousGetAddress above.

return new string(codePoints,
startIndex: 1, // skip BOM
length: length);
length: bytesLength/2-1); // utf16 - BOM
}

return null;
Expand Down