diff --git a/Cargo.lock b/Cargo.lock
index 9903fd1933..7a750e5edc 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -191,7 +191,7 @@ version = "0.71.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5f58bf3d7db68cfbac37cfc485a8d711e87e064c3d0fe0435b92f7a407f9d6b3"
 dependencies = [
- "bitflags 2.9.4",
+ "bitflags 2.10.0",
  "cexpr",
  "clang-sys",
  "itertools 0.13.0",
@@ -213,9 +213,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
 
 [[package]]
 name = "bitflags"
-version = "2.9.4"
+version = "2.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394"
+checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3"
 
 [[package]]
 name = "blake2"
@@ -237,9 +237,9 @@ dependencies = [
 
 [[package]]
 name = "bstr"
-version = "1.12.0"
+version = "1.12.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "234113d19d0d7d613b40e86fb654acf958910802bcceab913a4f9e7cda03b1a4"
+checksum = "63044e1ae8e69f3b5a92c736ca6269b8d12fa7efe39bf34ddb06d102cf0e2cab"
 dependencies = [
  "memchr",
  "regex-automata",
@@ -296,9 +296,9 @@ dependencies = [
 
 [[package]]
 name = "cc"
-version = "1.2.41"
+version = "1.2.43"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ac9fe6cdbb24b6ade63616c0a0688e45bb56732262c158df3c0c4bea4ca47cb7"
+checksum = "739eb0f94557554b3ca9a86d2d37bebd49c5e6d0c1d2bda35ba5bdac830befc2"
 dependencies = [
  "find-msvc-tools",
  "shlex",
@@ -378,18 +378,18 @@ dependencies = [
 
 [[package]]
 name = "clap"
-version = "4.5.49"
+version = "4.5.50"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f4512b90fa68d3a9932cea5184017c5d200f5921df706d45e853537dea51508f"
+checksum = "0c2cfd7bf8a6017ddaa4e32ffe7403d547790db06bd171c1c53926faab501623"
 dependencies = [
  "clap_builder",
 ]
 
 [[package]]
 name = "clap_builder"
-version = "4.5.49"
+version = "4.5.50"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0025e98baa12e766c67ba13ff4695a887a1eba19569aad00a472546795bd6730"
+checksum = "0a4c05b9e80c5ccd3a7ef080ad7b6ba7d6fc00a985b8b157197075677c82c7a0"
 dependencies = [
  "anstyle",
  "clap_lex",
@@ -796,9 +796,9 @@ dependencies = [
 
 [[package]]
 name = "dns-lookup"
-version = "3.0.0"
+version = "3.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "853d5bcf0b73bd5e6d945b976288621825c7166e9f06c5a035ae1aaf42d1b64f"
+checksum = "6e39034cee21a2f5bbb66ba0e3689819c4bb5d00382a282006e802a7ffa6c41d"
 dependencies = [
  "cfg-if",
  "libc",
@@ -948,9 +948,9 @@ dependencies = [
 
 [[package]]
 name = "flate2"
-version = "1.1.4"
+version = "1.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dc5a4e564e38c699f2880d3fda590bedc2e69f3f84cd48b457bd892ce61d0aa9"
+checksum = "bfe33edd8e85a12a67454e37f8c75e730830d83e313556ab9ebf9ee7fbeb3bfb"
 dependencies = [
  "crc32fast",
  "libz-rs-sys",
@@ -990,9 +990,9 @@ dependencies = [
 
 [[package]]
 name = "get-size-derive2"
-version = "0.7.0"
+version = "0.7.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e3814abc7da8ab18d2fd820f5b540b5e39b6af0a32de1bdd7c47576693074843"
+checksum = "46b134aa084df7c3a513a1035c52f623e4b3065dfaf3d905a4f28a2e79b5bb3f"
 dependencies = [
  "attribute-derive",
  "quote",
@@ -1001,9 +1001,9 @@ dependencies = [
 
 [[package]]
 name = "get-size2"
-version = "0.7.0"
+version = "0.7.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5dfe2cec5b5ce8fb94dcdb16a1708baa4d0609cc3ce305ca5d3f6f2ffb59baed"
+checksum = "c0d51c9f2e956a517619ad9e7eaebc7a573f9c49b38152e12eade750f89156f9"
 dependencies = [
  "compact_str",
  "get-size-derive2",
@@ -1124,11 +1124,11 @@ checksum = "dfa686283ad6dd069f105e5ab091b04c62850d3e4cf5d67debad1933f55023df"
 
 [[package]]
 name = "home"
-version = "0.5.11"
+version = "0.5.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "589533453244b0995c858700322199b2becb13b627df2851f64a2775d024abcf"
+checksum = "cc627f471c528ff0c4a49e1d5e60450c8f6461dd6d10ba9dcd3a61d3dff7728d"
 dependencies = [
- "windows-sys 0.59.0",
+ "windows-sys 0.61.2",
 ]
 
 [[package]]
@@ -1167,9 +1167,12 @@ dependencies = [
 
 [[package]]
 name = "indoc"
-version = "2.0.6"
+version = "2.0.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd"
+checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706"
+dependencies = [
+ "rustversion",
+]
 
 [[package]]
 name = "insta"
@@ -1202,9 +1205,9 @@ dependencies = [
 
 [[package]]
 name = "is_terminal_polyfill"
-version = "1.70.1"
+version = "1.70.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
+checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695"
 
 [[package]]
 name = "itertools"
@@ -1373,7 +1376,7 @@ version = "0.1.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "416f7e718bdb06000964960ffa43b4335ad4012ae8b99060261aa4a8088d5ccb"
 dependencies = [
- "bitflags 2.9.4",
+ "bitflags 2.10.0",
  "libc",
 ]
 
@@ -1614,7 +1617,7 @@ version = "0.29.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "71e2746dc3a24dd78b3cfcb7be93368c6de9963d30f43a6a73998a9cf4b17b46"
 dependencies = [
- "bitflags 2.9.4",
+ "bitflags 2.10.0",
  "cfg-if",
  "cfg_aliases",
  "libc",
@@ -1627,7 +1630,7 @@ version = "0.30.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6"
 dependencies = [
- "bitflags 2.9.4",
+ "bitflags 2.10.0",
  "cfg-if",
  "cfg_aliases",
  "libc",
@@ -1683,9 +1686,9 @@ dependencies = [
 
 [[package]]
 name = "num_enum"
-version = "0.7.4"
+version = "0.7.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a973b4e44ce6cad84ce69d797acf9a044532e4184c4f267913d1b546a0727b7a"
+checksum = "b1207a7e20ad57b847bbddc6776b968420d38292bbfe2089accff5e19e82454c"
 dependencies = [
  "num_enum_derive",
  "rustversion",
@@ -1693,9 +1696,9 @@ dependencies = [
 
 [[package]]
 name = "num_enum_derive"
-version = "0.7.4"
+version = "0.7.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "77e878c846a8abae00dd069496dbe8751b16ac1c3d6bd2a7283a938e8228f90d"
+checksum = "ff32365de1b6743cb203b710788263c44a03de03802daf96092f2da4fe6ba4d7"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -1710,9 +1713,9 @@ checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
 
 [[package]]
 name = "once_cell_polyfill"
-version = "1.70.1"
+version = "1.70.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad"
+checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
 
 [[package]]
 name = "oorandom"
@@ -1726,7 +1729,7 @@ version = "0.10.74"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "24ad14dd45412269e1a30f52ad8f0664f0f4f4a89ee8fe28c3b3527021ebb654"
 dependencies = [
- "bitflags 2.9.4",
+ "bitflags 2.10.0",
  "cfg-if",
  "foreign-types",
  "libc",
@@ -1754,9 +1757,9 @@ checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e"
 
 [[package]]
 name = "openssl-src"
-version = "300.5.3+3.5.4"
+version = "300.5.4+3.5.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dc6bad8cd0233b63971e232cc9c5e83039375b8586d2312f31fda85db8f888c2"
+checksum = "a507b3792995dae9b0df8a1c1e3771e8418b7c2d9f0baeba32e6fe8b06c7cb72"
 dependencies = [
  "cc",
 ]
@@ -1963,9 +1966,9 @@ dependencies = [
 
 [[package]]
 name = "proc-macro2"
-version = "1.0.101"
+version = "1.0.103"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de"
+checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8"
 dependencies = [
  "unicode-ident",
 ]
@@ -2187,7 +2190,7 @@ version = "0.5.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d"
 dependencies = [
- "bitflags 2.9.4",
+ "bitflags 2.10.0",
 ]
 
 [[package]]
@@ -2283,7 +2286,7 @@ version = "0.0.0"
 source = "git+https://github.com/astral-sh/ruff.git?tag=0.14.1#2bffef59665ce7d2630dfd72ee99846663660db8"
 dependencies = [
  "aho-corasick",
- "bitflags 2.9.4",
+ "bitflags 2.10.0",
  "compact_str",
  "get-size2",
  "is-macro",
@@ -2301,7 +2304,7 @@ name = "ruff_python_parser"
 version = "0.0.0"
 source = "git+https://github.com/astral-sh/ruff.git?tag=0.14.1#2bffef59665ce7d2630dfd72ee99846663660db8"
 dependencies = [
- "bitflags 2.9.4",
+ "bitflags 2.10.0",
  "bstr",
  "compact_str",
  "get-size2",
@@ -2356,7 +2359,7 @@ version = "1.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e"
 dependencies = [
- "bitflags 2.9.4",
+ "bitflags 2.10.0",
  "errno",
  "libc",
  "linux-raw-sys",
@@ -2391,7 +2394,7 @@ name = "rustpython-codegen"
 version = "0.4.0"
 dependencies = [
  "ahash",
- "bitflags 2.9.4",
+ "bitflags 2.10.0",
  "indexmap",
  "insta",
  "itertools 0.14.0",
@@ -2415,7 +2418,7 @@ name = "rustpython-common"
 version = "0.4.0"
 dependencies = [
  "ascii",
- "bitflags 2.9.4",
+ "bitflags 2.10.0",
  "cfg-if",
  "getrandom 0.3.4",
  "itertools 0.14.0",
@@ -2454,7 +2457,7 @@ dependencies = [
 name = "rustpython-compiler-core"
 version = "0.4.0"
 dependencies = [
- "bitflags 2.9.4",
+ "bitflags 2.10.0",
  "itertools 0.14.0",
  "lz4_flex",
  "malachite-bigint",
@@ -2536,7 +2539,7 @@ dependencies = [
 name = "rustpython-sre_engine"
 version = "0.4.0"
 dependencies = [
- "bitflags 2.9.4",
+ "bitflags 2.10.0",
  "criterion",
  "num_enum",
  "optional",
@@ -2590,6 +2593,10 @@ dependencies = [
  "phf",
  "pymath",
  "rand_core 0.9.3",
+ "ruff_python_ast",
+ "ruff_python_parser",
+ "ruff_source_file",
+ "ruff_text_size",
  "rustix",
  "rustpython-common",
  "rustpython-derive",
@@ -2626,7 +2633,7 @@ version = "0.4.0"
 dependencies = [
  "ahash",
  "ascii",
- "bitflags 2.9.4",
+ "bitflags 2.10.0",
  "bstr",
  "caseless",
  "cfg-if",
@@ -2737,7 +2744,7 @@ version = "17.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e902948a25149d50edc1a8e0141aad50f54e22ba83ff988cf8f7c9ef07f50564"
 dependencies = [
- "bitflags 2.9.4",
+ "bitflags 2.10.0",
  "cfg-if",
  "clipboard-win",
  "fd-lock",
@@ -2979,9 +2986,9 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292"
 
 [[package]]
 name = "syn"
-version = "2.0.107"
+version = "2.0.108"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2a26dbd934e5451d21ef060c018dae56fc073894c5a7896f882928a76e6d081b"
+checksum = "da58917d35242480a05c2897064da0a80589a2a0476c9a3f2fdc83b53502e917"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -3005,7 +3012,7 @@ version = "0.6.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b"
 dependencies = [
- "bitflags 2.9.4",
+ "bitflags 2.10.0",
  "core-foundation",
  "system-configuration-sys",
 ]
@@ -3337,9 +3344,9 @@ checksum = "061dbb8cc7f108532b6087a0065eff575e892a4bcb503dc57323a197457cc202"
 
 [[package]]
 name = "unicode-ident"
-version = "1.0.19"
+version = "1.0.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d"
+checksum = "462eeb75aeb73aea900253ce739c8e18a67423fadf006037cd3ff27e82748a06"
 
 [[package]]
 name = "unicode-normalization"
diff --git a/Cargo.toml b/Cargo.toml
index 3cdc471dc3..7a958922a4 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -158,10 +158,11 @@ rustpython-sre_engine = { path = "vm/sre_engine", version = "0.4.0" }
 rustpython-wtf8 = { path = "wtf8", version = "0.4.0" }
 rustpython-doc = { git = "https://github.com/RustPython/__doc__", tag = "0.3.0", version = "0.3.0" }
 
-ruff_python_parser = { git = "https://github.com/astral-sh/ruff.git", tag = "0.14.1" }
 ruff_python_ast = { git = "https://github.com/astral-sh/ruff.git", tag = "0.14.1" }
-ruff_text_size = { git = "https://github.com/astral-sh/ruff.git", tag = "0.14.1" }
+ruff_python_parser = { git = "https://github.com/astral-sh/ruff.git", tag = "0.14.1" }
+ruff_python_trivia = { git = "https://github.com/astral-sh/ruff.git", tag = "0.14.1" }
 ruff_source_file = { git = "https://github.com/astral-sh/ruff.git", tag = "0.14.1" }
+ruff_text_size = { git = "https://github.com/astral-sh/ruff.git", tag = "0.14.1" }
 
 ahash = "0.8.12"
 ascii = "1.1"
diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py
index 61929e537f..4d15f8f1d5 100644
--- a/Lib/test/test_tarfile.py
+++ b/Lib/test/test_tarfile.py
@@ -2457,9 +2457,9 @@ def tarfilecmd_failure(self, *args):
         return script_helper.assert_python_failure('-m', 'tarfile', *args)
 
     def make_simple_tarfile(self, tar_name):
-        files = [support.findfile('tokenize_tests.txt'),
+        files = [support.findfile('tokenize_tests.txt', subdir='tokenizedata'),
                  support.findfile('tokenize_tests-no-coding-cookie-'
-                                  'and-utf8-bom-sig-only.txt')]
+                                  'and-utf8-bom-sig-only.txt', subdir='tokenizedata')]
         self.addCleanup(os_helper.unlink, tar_name)
         with tarfile.open(tar_name, 'w') as tf:
             for tardata in files:
@@ -2542,9 +2542,9 @@ def test_list_command_invalid_file(self):
         self.assertEqual(rc, 1)
 
     def test_create_command(self):
-        files = [support.findfile('tokenize_tests.txt'),
+        files = [support.findfile('tokenize_tests.txt', subdir='tokenizedata'),
                  support.findfile('tokenize_tests-no-coding-cookie-'
-                                  'and-utf8-bom-sig-only.txt')]
+                                  'and-utf8-bom-sig-only.txt', subdir='tokenizedata')]
         for opt in '-c', '--create':
             try:
                 out = self.tarfilecmd(opt, tmpname, *files)
@@ -2555,9 +2555,9 @@ def test_create_command(self):
                 os_helper.unlink(tmpname)
 
     def test_create_command_verbose(self):
-        files = [support.findfile('tokenize_tests.txt'),
+        files = [support.findfile('tokenize_tests.txt', subdir='tokenizedata'),
                  support.findfile('tokenize_tests-no-coding-cookie-'
-                                  'and-utf8-bom-sig-only.txt')]
+                                  'and-utf8-bom-sig-only.txt', subdir='tokenizedata')]
         for opt in '-v', '--verbose':
             try:
                 out = self.tarfilecmd(opt, '-c', tmpname, *files,
@@ -2569,7 +2569,7 @@ def test_create_command_verbose(self):
                 os_helper.unlink(tmpname)
 
     def test_create_command_dotless_filename(self):
-        files = [support.findfile('tokenize_tests.txt')]
+        files = [support.findfile('tokenize_tests.txt', subdir='tokenizedata')]
         try:
             out = self.tarfilecmd('-c', dotlessname, *files)
             self.assertEqual(out, b'')
@@ -2580,7 +2580,7 @@ def test_create_command_dotless_filename(self):
 
     def test_create_command_dot_started_filename(self):
         tar_name = os.path.join(TEMPDIR, ".testtar")
-        files = [support.findfile('tokenize_tests.txt')]
+        files = [support.findfile('tokenize_tests.txt', subdir='tokenizedata')]
         try:
             out = self.tarfilecmd('-c', tar_name, *files)
             self.assertEqual(out, b'')
@@ -2590,9 +2590,9 @@ def test_create_command_dot_started_filename(self):
             os_helper.unlink(tar_name)
 
     def test_create_command_compressed(self):
-        files = [support.findfile('tokenize_tests.txt'),
+        files = [support.findfile('tokenize_tests.txt', subdir='tokenizedata'),
                  support.findfile('tokenize_tests-no-coding-cookie-'
-                                  'and-utf8-bom-sig-only.txt')]
+                                  'and-utf8-bom-sig-only.txt', subdir='tokenizedata')]
         for filetype in (GzipTest, Bz2Test, LzmaTest):
             if not filetype.open:
                 continue
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index 44ef4e2416..de81a1bfa7 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -1,17 +1,18 @@
-from test import support
-from test.support import os_helper
-from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP,
-                     STRING, ENDMARKER, ENCODING, tok_name, detect_encoding,
-                     open as tokenize_open, Untokenizer, generate_tokens,
-                     NEWLINE)
-from io import BytesIO, StringIO
+import os
+import re
+import token
+import tokenize
 import unittest
+from io import BytesIO, StringIO
 from textwrap import dedent
 from unittest import TestCase, mock
-from test.test_grammar import (VALID_UNDERSCORE_LITERALS,
-                               INVALID_UNDERSCORE_LITERALS)
-import os
-import token
+from test import support
+from test.support import os_helper
+from test.support.script_helper import run_test_script, make_script, run_python_until_end
+from test.support.numbers import (
+    VALID_UNDERSCORE_LITERALS,
+    INVALID_UNDERSCORE_LITERALS,
+)
 
 
 # Converts a source string into a list of textual representation
@@ -24,12 +25,12 @@ def stringify_tokens_from_source(token_generator, source_string):
     missing_trailing_nl = source_string[-1] not in '\r\n'
 
     for type, token, start, end, line in token_generator:
-        if type == ENDMARKER:
+        if type == tokenize.ENDMARKER:
             break
         # Ignore the new line on the last line if the input lacks one
-        if missing_trailing_nl and type == NEWLINE and end[0] == num_lines:
+        if missing_trailing_nl and type == tokenize.NEWLINE and end[0] == num_lines:
             continue
-        type = tok_name[type]
+        type = tokenize.tok_name[type]
         result.append(f"    {type:10} {token!r:13} {start} {end}")
 
     return result
@@ -45,19 +46,39 @@ def check_tokenize(self, s, expected):
         # Format the tokens in s in a table format.
         # The ENDMARKER and final NEWLINE are omitted.
         f = BytesIO(s.encode('utf-8'))
-        result = stringify_tokens_from_source(tokenize(f.readline), s)
+        result = stringify_tokens_from_source(tokenize.tokenize(f.readline), s)
         self.assertEqual(result,
                          ["    ENCODING   'utf-8'       (0, 0) (0, 0)"] +
                          expected.rstrip().splitlines())
 
+    def test_invalid_readline(self):
+        def gen():
+            yield "sdfosdg"
+            yield "sdfosdg"
+        with self.assertRaises(TypeError):
+            list(tokenize.tokenize(gen().__next__))
+
+        def gen():
+            yield b"sdfosdg"
+            yield b"sdfosdg"
+        with self.assertRaises(TypeError):
+            list(tokenize.generate_tokens(gen().__next__))
+
+        def gen():
+            yield "sdfosdg"
+            1/0
+        with self.assertRaises(ZeroDivisionError):
+            list(tokenize.generate_tokens(gen().__next__))
+
     def test_implicit_newline(self):
         # Make sure that the tokenizer puts in an implicit NEWLINE
         # when the input lacks a trailing new line.
         f = BytesIO("x".encode('utf-8'))
-        tokens = list(tokenize(f.readline))
-        self.assertEqual(tokens[-2].type, NEWLINE)
-        self.assertEqual(tokens[-1].type, ENDMARKER)
+        tokens = list(tokenize.tokenize(f.readline))
+        self.assertEqual(tokens[-2].type, tokenize.NEWLINE)
+        self.assertEqual(tokens[-1].type, tokenize.ENDMARKER)
 
+    @unittest.expectedFailure # TODO: RUSTPYTHON
     def test_basic(self):
         self.check_tokenize("1 + 1", """\
     NUMBER     '1'           (1, 0) (1, 1)
@@ -83,6 +104,32 @@ def test_basic(self):
     NEWLINE    '\\n'          (4, 26) (4, 27)
     DEDENT     ''            (5, 0) (5, 0)
     """)
+
+        self.check_tokenize("if True:\r\n    # NL\r\n    foo='bar'\r\n\r\n", """\
+    NAME       'if'          (1, 0) (1, 2)
+    NAME       'True'        (1, 3) (1, 7)
+    OP         ':'           (1, 7) (1, 8)
+    NEWLINE    '\\r\\n'        (1, 8) (1, 10)
+    COMMENT    '# NL'        (2, 4) (2, 8)
+    NL         '\\r\\n'        (2, 8) (2, 10)
+    INDENT     '    '        (3, 0) (3, 4)
+    NAME       'foo'         (3, 4) (3, 7)
+    OP         '='           (3, 7) (3, 8)
+    STRING     "\'bar\'"       (3, 8) (3, 13)
+    NEWLINE    '\\r\\n'        (3, 13) (3, 15)
+    NL         '\\r\\n'        (4, 0) (4, 2)
+    DEDENT     ''            (5, 0) (5, 0)
+            """)
+
+        self.check_tokenize("x = 1 + \\\r\n1\r\n", """\
+    NAME       'x'           (1, 0) (1, 1)
+    OP         '='           (1, 2) (1, 3)
+    NUMBER     '1'           (1, 4) (1, 5)
+    OP         '+'           (1, 6) (1, 7)
+    NUMBER     '1'           (2, 0) (2, 1)
+    NEWLINE    '\\r\\n'        (2, 1) (2, 3)
+            """)
+
         indent_error_file = b"""\
 def k(x):
     x += 2
@@ -91,9 +138,18 @@ def k(x):
         readline = BytesIO(indent_error_file).readline
         with self.assertRaisesRegex(IndentationError,
                                     "unindent does not match any "
-                                    "outer indentation level"):
-            for tok in tokenize(readline):
+                                    "outer indentation level") as e:
+            for tok in tokenize.tokenize(readline):
                 pass
+        self.assertEqual(e.exception.lineno, 3)
+        self.assertEqual(e.exception.filename, '<string>')
+        self.assertEqual(e.exception.end_lineno, None)
+        self.assertEqual(e.exception.end_offset, None)
+        self.assertEqual(
+            e.exception.msg,
+            'unindent does not match any outer indentation level')
+        self.assertEqual(e.exception.offset, 9)
+        self.assertEqual(e.exception.text, '  x += 5')
 
     def test_int(self):
         # Ordinary integers and binary operators
@@ -177,7 +233,7 @@ def test_long(self):
     """)
 
     def test_float(self):
-        # Floating point numbers
+        # Floating-point numbers
         self.check_tokenize("x = 3.14159", """\
     NAME       'x'           (1, 0) (1, 1)
     OP         '='           (1, 2) (1, 3)
@@ -219,8 +275,8 @@ def test_float(self):
     def test_underscore_literals(self):
         def number_token(s):
             f = BytesIO(s.encode('utf-8'))
-            for toktype, token, start, end, line in tokenize(f.readline):
-                if toktype == NUMBER:
+            for toktype, token, start, end, line in tokenize.tokenize(f.readline):
+                if toktype == tokenize.NUMBER:
                     return token
             return 'invalid token'
         for lit in VALID_UNDERSCORE_LITERALS:
@@ -228,9 +284,19 @@ def number_token(s):
                 # this won't work with compound complex inputs
                 continue
             self.assertEqual(number_token(lit), lit)
+        # Valid cases with extra underscores in the tokenize module
+        # See gh-105549 for context
+        extra_valid_cases = {"0_7", "09_99"}
         for lit in INVALID_UNDERSCORE_LITERALS:
+            if lit in extra_valid_cases:
+                continue
+            try:
+                number_token(lit)
+            except tokenize.TokenError:
+                continue
             self.assertNotEqual(number_token(lit), lit)
 
+    @unittest.expectedFailure # TODO: RUSTPYTHON
     def test_string(self):
         # String literals
         self.check_tokenize("x = ''; y = \"\"", """\
@@ -380,21 +446,175 @@ def test_string(self):
     STRING     'rb"\""a\\\\\\nb\\\\\\nc"\""' (1, 0) (3, 4)
     """)
         self.check_tokenize('f"abc"', """\
-    STRING     'f"abc"'      (1, 0) (1, 6)
+    FSTRING_START 'f"'          (1, 0) (1, 2)
+    FSTRING_MIDDLE 'abc'         (1, 2) (1, 5)
+    FSTRING_END '"'           (1, 5) (1, 6)
     """)
         self.check_tokenize('fR"a{b}c"', """\
-    STRING     'fR"a{b}c"'   (1, 0) (1, 9)
+    FSTRING_START 'fR"'         (1, 0) (1, 3)
+    FSTRING_MIDDLE 'a'           (1, 3) (1, 4)
+    OP         '{'           (1, 4) (1, 5)
+    NAME       'b'           (1, 5) (1, 6)
+    OP         '}'           (1, 6) (1, 7)
+    FSTRING_MIDDLE 'c'           (1, 7) (1, 8)
+    FSTRING_END '"'           (1, 8) (1, 9)
+    """)
+        self.check_tokenize('fR"a{{{b!r}}}c"', """\
+    FSTRING_START 'fR"'         (1, 0) (1, 3)
+    FSTRING_MIDDLE 'a{'          (1, 3) (1, 5)
+    OP         '{'           (1, 6) (1, 7)
+    NAME       'b'           (1, 7) (1, 8)
+    OP         '!'           (1, 8) (1, 9)
+    NAME       'r'           (1, 9) (1, 10)
+    OP         '}'           (1, 10) (1, 11)
+    FSTRING_MIDDLE '}'           (1, 11) (1, 12)
+    FSTRING_MIDDLE 'c'           (1, 13) (1, 14)
+    FSTRING_END '"'           (1, 14) (1, 15)
+    """)
+        self.check_tokenize('f"{{{1+1}}}"', """\
+    FSTRING_START 'f"'          (1, 0) (1, 2)
+    FSTRING_MIDDLE '{'           (1, 2) (1, 3)
+    OP         '{'           (1, 4) (1, 5)
+    NUMBER     '1'           (1, 5) (1, 6)
+    OP         '+'           (1, 6) (1, 7)
+    NUMBER     '1'           (1, 7) (1, 8)
+    OP         '}'           (1, 8) (1, 9)
+    FSTRING_MIDDLE '}'           (1, 9) (1, 10)
+    FSTRING_END '"'           (1, 11) (1, 12)
+    """)
+        self.check_tokenize('f"""{f\'\'\'{f\'{f"{1+1}"}\'}\'\'\'}"""', """\
+    FSTRING_START 'f\"""'        (1, 0) (1, 4)
+    OP         '{'           (1, 4) (1, 5)
+    FSTRING_START "f'''"        (1, 5) (1, 9)
+    OP         '{'           (1, 9) (1, 10)
+    FSTRING_START "f'"          (1, 10) (1, 12)
+    OP         '{'           (1, 12) (1, 13)
+    FSTRING_START 'f"'          (1, 13) (1, 15)
+    OP         '{'           (1, 15) (1, 16)
+    NUMBER     '1'           (1, 16) (1, 17)
+    OP         '+'           (1, 17) (1, 18)
+    NUMBER     '1'           (1, 18) (1, 19)
+    OP         '}'           (1, 19) (1, 20)
+    FSTRING_END '"'           (1, 20) (1, 21)
+    OP         '}'           (1, 21) (1, 22)
+    FSTRING_END "'"           (1, 22) (1, 23)
+    OP         '}'           (1, 23) (1, 24)
+    FSTRING_END "'''"         (1, 24) (1, 27)
+    OP         '}'           (1, 27) (1, 28)
+    FSTRING_END '\"""'         (1, 28) (1, 31)
+    """)
+        self.check_tokenize('f"""     x\nstr(data, encoding={invalid!r})\n"""', """\
+    FSTRING_START 'f\"""'        (1, 0) (1, 4)
+    FSTRING_MIDDLE '     x\\nstr(data, encoding=' (1, 4) (2, 19)
+    OP         '{'           (2, 19) (2, 20)
+    NAME       'invalid'     (2, 20) (2, 27)
+    OP         '!'           (2, 27) (2, 28)
+    NAME       'r'           (2, 28) (2, 29)
+    OP         '}'           (2, 29) (2, 30)
+    FSTRING_MIDDLE ')\\n'         (2, 30) (3, 0)
+    FSTRING_END '\"""'         (3, 0) (3, 3)
+    """)
+        self.check_tokenize('f"""123456789\nsomething{None}bad"""', """\
+    FSTRING_START 'f\"""'        (1, 0) (1, 4)
+    FSTRING_MIDDLE '123456789\\nsomething' (1, 4) (2, 9)
+    OP         '{'           (2, 9) (2, 10)
+    NAME       'None'        (2, 10) (2, 14)
+    OP         '}'           (2, 14) (2, 15)
+    FSTRING_MIDDLE 'bad'         (2, 15) (2, 18)
+    FSTRING_END '\"""'         (2, 18) (2, 21)
     """)
         self.check_tokenize('f"""abc"""', """\
-    STRING     'f\"\"\"abc\"\"\"'  (1, 0) (1, 10)
+    FSTRING_START 'f\"""'        (1, 0) (1, 4)
+    FSTRING_MIDDLE 'abc'         (1, 4) (1, 7)
+    FSTRING_END '\"""'         (1, 7) (1, 10)
     """)
         self.check_tokenize(r'f"abc\
 def"', """\
-    STRING     'f"abc\\\\\\ndef"' (1, 0) (2, 4)
+    FSTRING_START 'f"'          (1, 0) (1, 2)
+    FSTRING_MIDDLE 'abc\\\\\\ndef'  (1, 2) (2, 3)
+    FSTRING_END '"'           (2, 3) (2, 4)
     """)
         self.check_tokenize(r'Rf"abc\
 def"', """\
-    STRING     'Rf"abc\\\\\\ndef"' (1, 0) (2, 4)
+    FSTRING_START 'Rf"'         (1, 0) (1, 3)
+    FSTRING_MIDDLE 'abc\\\\\\ndef'  (1, 3) (2, 3)
+    FSTRING_END '"'           (2, 3) (2, 4)
+    """)
+        self.check_tokenize("f'some words {a+b:.3f} more words {c+d=} final words'", """\
+    FSTRING_START "f'"          (1, 0) (1, 2)
+    FSTRING_MIDDLE 'some words ' (1, 2) (1, 13)
+    OP         '{'           (1, 13) (1, 14)
+    NAME       'a'           (1, 14) (1, 15)
+    OP         '+'           (1, 15) (1, 16)
+    NAME       'b'           (1, 16) (1, 17)
+    OP         ':'           (1, 17) (1, 18)
+    FSTRING_MIDDLE '.3f'         (1, 18) (1, 21)
+    OP         '}'           (1, 21) (1, 22)
+    FSTRING_MIDDLE ' more words ' (1, 22) (1, 34)
+    OP         '{'           (1, 34) (1, 35)
+    NAME       'c'           (1, 35) (1, 36)
+    OP         '+'           (1, 36) (1, 37)
+    NAME       'd'           (1, 37) (1, 38)
+    OP         '='           (1, 38) (1, 39)
+    OP         '}'           (1, 39) (1, 40)
+    FSTRING_MIDDLE ' final words' (1, 40) (1, 52)
+    FSTRING_END "'"           (1, 52) (1, 53)
+    """)
+        self.check_tokenize("""\
+f'''{
+3
+=}'''""", """\
+    FSTRING_START "f'''"        (1, 0) (1, 4)
+    OP         '{'           (1, 4) (1, 5)
+    NL         '\\n'          (1, 5) (1, 6)
+    NUMBER     '3'           (2, 0) (2, 1)
+    NL         '\\n'          (2, 1) (2, 2)
+    OP         '='           (3, 0) (3, 1)
+    OP         '}'           (3, 1) (3, 2)
+    FSTRING_END "'''"         (3, 2) (3, 5)
+    """)
+        self.check_tokenize("""\
+f'''__{
+    x:a
+}__'''""", """\
+    FSTRING_START "f'''"        (1, 0) (1, 4)
+    FSTRING_MIDDLE '__'          (1, 4) (1, 6)
+    OP         '{'           (1, 6) (1, 7)
+    NL         '\\n'          (1, 7) (1, 8)
+    NAME       'x'           (2, 4) (2, 5)
+    OP         ':'           (2, 5) (2, 6)
+    FSTRING_MIDDLE 'a\\n'         (2, 6) (3, 0)
+    OP         '}'           (3, 0) (3, 1)
+    FSTRING_MIDDLE '__'          (3, 1) (3, 3)
+    FSTRING_END "'''"         (3, 3) (3, 6)
+    """)
+        self.check_tokenize("""\
+f'''__{
+    x:a
+    b
+     c
+      d
+}__'''""", """\
+    FSTRING_START "f'''"        (1, 0) (1, 4)
+    FSTRING_MIDDLE '__'          (1, 4) (1, 6)
+    OP         '{'           (1, 6) (1, 7)
+    NL         '\\n'          (1, 7) (1, 8)
+    NAME       'x'           (2, 4) (2, 5)
+    OP         ':'           (2, 5) (2, 6)
+    FSTRING_MIDDLE 'a\\n    b\\n     c\\n      d\\n' (2, 6) (6, 0)
+    OP         '}'           (6, 0) (6, 1)
+    FSTRING_MIDDLE '__'          (6, 1) (6, 3)
+    FSTRING_END "'''"         (6, 3) (6, 6)
+    """)
+
+        self.check_tokenize("""\
+    '''Autorzy, którzy tą jednostkę mają wpisani jako AKTUALNA -- czyli
+    aktualni pracownicy, obecni pracownicy'''
+""", """\
+    INDENT     '    '        (1, 0) (1, 4)
+    STRING     "'''Autorzy, którzy tą jednostkę mają wpisani jako AKTUALNA -- czyli\\n    aktualni pracownicy, obecni pracownicy'''" (1, 4) (2, 45)
+    NEWLINE    '\\n'          (2, 45) (2, 46)
+    DEDENT     ''            (3, 0) (3, 0)
     """)
 
     def test_function(self):
@@ -457,6 +677,7 @@ def test_function(self):
     NAME       'pass'        (1, 34) (1, 38)
     """)
 
+    @unittest.expectedFailure # TODO: RUSTPYTHON
     def test_comparison(self):
         # Comparison
         self.check_tokenize("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != "
@@ -587,6 +808,7 @@ def test_unary(self):
     NUMBER     '1'           (1, 22) (1, 23)
     """)
 
+    @unittest.expectedFailure # TODO: RUSTPYTHON
     def test_selector(self):
         # Selector
         self.check_tokenize("import sys, time\nx = sys.modules['time'].time()", """\
@@ -609,6 +831,7 @@ def test_selector(self):
     OP         ')'           (2, 29) (2, 30)
     """)
 
+    @unittest.expectedFailure # TODO: RUSTPYTHON
     def test_method(self):
         # Methods
         self.check_tokenize("@staticmethod\ndef foo(x,y): pass", """\
@@ -626,6 +849,7 @@ def test_method(self):
     NAME       'pass'        (2, 14) (2, 18)
     """)
 
+    @unittest.expectedFailure # TODO: RUSTPYTHON
     def test_tabs(self):
         # Evil tabs
         self.check_tokenize("def f():\n"
@@ -647,6 +871,7 @@ def test_tabs(self):
     DEDENT     ''            (4, 0) (4, 0)
     """)
 
+    @unittest.expectedFailure # TODO: RUSTPYTHON
     def test_non_ascii_identifiers(self):
         # Non-ascii identifiers
         self.check_tokenize("Örter = 'places'\ngrün = 'green'", """\
@@ -659,6 +884,7 @@ def test_non_ascii_identifiers(self):
     STRING     "'green'"     (2, 7) (2, 14)
     """)
 
+    @unittest.expectedFailure # TODO: RUSTPYTHON
     def test_unicode(self):
         # Legacy unicode literals:
         self.check_tokenize("Örter = u'places'\ngrün = U'green'", """\
@@ -671,6 +897,7 @@ def test_unicode(self):
     STRING     "U'green'"    (2, 7) (2, 15)
     """)
 
+    @unittest.expectedFailure # TODO: RUSTPYTHON
     def test_async(self):
         # Async/await extension:
         self.check_tokenize("async = 1", """\
@@ -945,29 +1172,81 @@ async def bar(): pass
     DEDENT     ''            (7, 0) (7, 0)
     """)
 
+    @unittest.expectedFailure # TODO: RUSTPYTHON
+    def test_newline_after_parenthesized_block_with_comment(self):
+        self.check_tokenize('''\
+[
+    # A comment here
+    1
+]
+''', """\
+    OP         '['           (1, 0) (1, 1)
+    NL         '\\n'          (1, 1) (1, 2)
+    COMMENT    '# A comment here' (2, 4) (2, 20)
+    NL         '\\n'          (2, 20) (2, 21)
+    NUMBER     '1'           (3, 4) (3, 5)
+    NL         '\\n'          (3, 5) (3, 6)
+    OP         ']'           (4, 0) (4, 1)
+    NEWLINE    '\\n'          (4, 1) (4, 2)
+    """)
+
+    def test_closing_parenthesis_from_different_line(self):
+        self.check_tokenize("); x", """\
+    OP         ')'           (1, 0) (1, 1)
+    OP         ';'           (1, 1) (1, 2)
+    NAME       'x'           (1, 3) (1, 4)
+    """)
+
+    @unittest.expectedFailure # TODO: RUSTPYTHON
+    def test_multiline_non_ascii_fstring(self):
+        self.check_tokenize("""\
+a = f'''
+    Autorzy, którzy tą jednostkę mają wpisani jako AKTUALNA -- czyli'''""", """\
+    NAME       'a'           (1, 0) (1, 1)
+    OP         '='           (1, 2) (1, 3)
+    FSTRING_START "f\'\'\'"        (1, 4) (1, 8)
+    FSTRING_MIDDLE '\\n    Autorzy, którzy tą jednostkę mają wpisani jako AKTUALNA -- czyli' (1, 8) (2, 68)
+    FSTRING_END "\'\'\'"         (2, 68) (2, 71)
+    """)
+
+    @unittest.expectedFailure # TODO: RUSTPYTHON
+    def test_multiline_non_ascii_fstring_with_expr(self):
+        self.check_tokenize("""\
+f'''
+    🔗 This is a test {test_arg1}🔗
+🔗'''""", """\
+    FSTRING_START "f\'\'\'"        (1, 0) (1, 4)
+    FSTRING_MIDDLE '\\n    🔗 This is a test ' (1, 4) (2, 21)
+    OP         '{'           (2, 21) (2, 22)
+    NAME       'test_arg1'   (2, 22) (2, 31)
+    OP         '}'           (2, 31) (2, 32)
+    FSTRING_MIDDLE '🔗\\n🔗'        (2, 32) (3, 1)
+    FSTRING_END "\'\'\'"         (3, 1) (3, 4)
+    """)
+
 class GenerateTokensTest(TokenizeTest):
     def check_tokenize(self, s, expected):
         # Format the tokens in s in a table format.
         # The ENDMARKER and final NEWLINE are omitted.
         f = StringIO(s)
-        result = stringify_tokens_from_source(generate_tokens(f.readline), s)
+        result = stringify_tokens_from_source(tokenize.generate_tokens(f.readline), s)
         self.assertEqual(result, expected.rstrip().splitlines())
 
 
 def decistmt(s):
     result = []
-    g = tokenize(BytesIO(s.encode('utf-8')).readline)   # tokenize the string
+    g = tokenize.tokenize(BytesIO(s.encode('utf-8')).readline)   # tokenize the string
     for toknum, tokval, _, _, _  in g:
-        if toknum == NUMBER and '.' in tokval:  # replace NUMBER tokens
+        if toknum == tokenize.NUMBER and '.' in tokval:  # replace NUMBER tokens
             result.extend([
-                (NAME, 'Decimal'),
-                (OP, '('),
-                (STRING, repr(tokval)),
-                (OP, ')')
+                (tokenize.NAME, 'Decimal'),
+                (tokenize.OP, '('),
+                (tokenize.STRING, repr(tokval)),
+                (tokenize.OP, ')')
             ])
         else:
             result.append((toknum, tokval))
-    return untokenize(result).decode('utf-8')
+    return tokenize.untokenize(result).decode('utf-8').strip()
 
 class TestMisc(TestCase):
 
@@ -991,6 +1270,13 @@ def test_decistmt(self):
         self.assertEqual(eval(decistmt(s)),
                          Decimal('-3.217160342717258261933904529E-7'))
 
+    def test___all__(self):
+        expected = token.__all__ + [
+            "TokenInfo", "TokenError", "generate_tokens",
+            "detect_encoding", "untokenize", "open", "tokenize",
+        ]
+        self.assertCountEqual(tokenize.__all__, expected)
+
 
 class TestTokenizerAdheresToPep0263(TestCase):
     """
@@ -998,9 +1284,11 @@ class TestTokenizerAdheresToPep0263(TestCase):
     """
 
     def _testFile(self, filename):
-        path = os.path.join(os.path.dirname(__file__), filename)
-        TestRoundtrip.check_roundtrip(self, open(path, 'rb'))
+        path = os.path.join(os.path.dirname(__file__), 'tokenizedata', filename)
+        with open(path, 'rb') as f:
+            TestRoundtrip.check_roundtrip(self, f)
 
+    @unittest.expectedFailure # TODO: RUSTPYTHON
     def test_utf8_coding_cookie_and_no_utf8_bom(self):
         f = 'tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt'
         self._testFile(f)
@@ -1016,16 +1304,16 @@ def test_latin1_coding_cookie_and_utf8_bom(self):
         f = 'tokenize_tests-latin1-coding-cookie-and-utf8-bom-sig.txt'
         self.assertRaises(SyntaxError, self._testFile, f)
 
+    @unittest.expectedFailure # TODO: RUSTPYTHON
     def test_no_coding_cookie_and_utf8_bom(self):
         f = 'tokenize_tests-no-coding-cookie-and-utf8-bom-sig-only.txt'
         self._testFile(f)
 
+    @unittest.expectedFailure # TODO: RUSTPYTHON
     def test_utf8_coding_cookie_and_utf8_bom(self):
         f = 'tokenize_tests-utf8-coding-cookie-and-utf8-bom-sig.txt'
         self._testFile(f)
 
-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure # "bad_coding.py" and "bad_coding2.py" make the WASM CI fail
     def test_bad_coding_cookie(self):
         self.assertRaises(SyntaxError, self._testFile, 'bad_coding.py')
         self.assertRaises(SyntaxError, self._testFile, 'bad_coding2.py')
@@ -1041,33 +1329,18 @@ def readline():
             nonlocal first
             if not first:
                 first = True
-                return line
+                yield line
             else:
-                return b''
+                yield b''
 
         # skip the initial encoding token and the end tokens
-        tokens = list(_tokenize(readline, encoding='utf-8'))[1:-2]
-        expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]
+        tokens = list(tokenize._generate_tokens_from_c_tokenizer(readline().__next__,
+                                                                 encoding='utf-8',
+                                                                 extra_tokens=True))[:-2]
+        expected_tokens = [tokenize.TokenInfo(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]
         self.assertEqual(tokens, expected_tokens,
                          "bytes not decoded with encoding")
 
-    def test__tokenize_does_not_decode_with_encoding_none(self):
-        literal = '"ЉЊЈЁЂ"'
-        first = False
-        def readline():
-            nonlocal first
-            if not first:
-                first = True
-                return literal
-            else:
-                return b''
-
-        # skip the end tokens
-        tokens = list(_tokenize(readline, encoding=None))[:-2]
-        expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]
-        self.assertEqual(tokens, expected_tokens,
-                         "string not tokenized when encoding is None")
-
 
 class TestDetectEncoding(TestCase):
 
@@ -1088,7 +1361,7 @@ def test_no_bom_no_encoding_cookie(self):
             b'print(something)\n',
             b'do_something(else)\n'
         )
-        encoding, consumed_lines = detect_encoding(self.get_readline(lines))
+        encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines))
         self.assertEqual(encoding, 'utf-8')
         self.assertEqual(consumed_lines, list(lines[:2]))
 
@@ -1098,7 +1371,7 @@ def test_bom_no_cookie(self):
             b'print(something)\n',
             b'do_something(else)\n'
         )
-        encoding, consumed_lines = detect_encoding(self.get_readline(lines))
+        encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines))
         self.assertEqual(encoding, 'utf-8-sig')
         self.assertEqual(consumed_lines,
                          [b'# something\n', b'print(something)\n'])
@@ -1109,7 +1382,7 @@ def test_cookie_first_line_no_bom(self):
             b'print(something)\n',
             b'do_something(else)\n'
         )
-        encoding, consumed_lines = detect_encoding(self.get_readline(lines))
+        encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines))
         self.assertEqual(encoding, 'iso-8859-1')
         self.assertEqual(consumed_lines, [b'# -*- coding: latin-1 -*-\n'])
 
@@ -1119,7 +1392,7 @@ def test_matched_bom_and_cookie_first_line(self):
             b'print(something)\n',
             b'do_something(else)\n'
         )
-        encoding, consumed_lines = detect_encoding(self.get_readline(lines))
+        encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines))
         self.assertEqual(encoding, 'utf-8-sig')
         self.assertEqual(consumed_lines, [b'# coding=utf-8\n'])
 
@@ -1130,7 +1403,7 @@ def test_mismatched_bom_and_cookie_first_line_raises_syntaxerror(self):
             b'do_something(else)\n'
         )
         readline = self.get_readline(lines)
-        self.assertRaises(SyntaxError, detect_encoding, readline)
+        self.assertRaises(SyntaxError, tokenize.detect_encoding, readline)
 
     def test_cookie_second_line_no_bom(self):
         lines = (
@@ -1139,7 +1412,7 @@ def test_cookie_second_line_no_bom(self):
             b'print(something)\n',
             b'do_something(else)\n'
         )
-        encoding, consumed_lines = detect_encoding(self.get_readline(lines))
+        encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines))
         self.assertEqual(encoding, 'ascii')
         expected = [b'#! something\n', b'# vim: set fileencoding=ascii :\n']
         self.assertEqual(consumed_lines, expected)
@@ -1151,7 +1424,7 @@ def test_matched_bom_and_cookie_second_line(self):
             b'print(something)\n',
             b'do_something(else)\n'
         )
-        encoding, consumed_lines = detect_encoding(self.get_readline(lines))
+        encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines))
         self.assertEqual(encoding, 'utf-8-sig')
         self.assertEqual(consumed_lines,
                          [b'#! something\n', b'f# coding=utf-8\n'])
@@ -1164,7 +1437,7 @@ def test_mismatched_bom_and_cookie_second_line_raises_syntaxerror(self):
             b'do_something(else)\n'
         )
         readline = self.get_readline(lines)
-        self.assertRaises(SyntaxError, detect_encoding, readline)
+        self.assertRaises(SyntaxError, tokenize.detect_encoding, readline)
 
     def test_cookie_second_line_noncommented_first_line(self):
         lines = (
@@ -1172,7 +1445,7 @@ def test_cookie_second_line_noncommented_first_line(self):
             b'# vim: set fileencoding=iso8859-15 :\n',
             b"print('\xe2\x82\xac')\n"
         )
-        encoding, consumed_lines = detect_encoding(self.get_readline(lines))
+        encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines))
         self.assertEqual(encoding, 'utf-8')
         expected = [b"print('\xc2\xa3')\n"]
         self.assertEqual(consumed_lines, expected)
@@ -1183,7 +1456,7 @@ def test_cookie_second_line_commented_first_line(self):
             b'# vim: set fileencoding=iso8859-15 :\n',
             b"print('\xe2\x82\xac')\n"
         )
-        encoding, consumed_lines = detect_encoding(self.get_readline(lines))
+        encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines))
         self.assertEqual(encoding, 'iso8859-15')
         expected = [b"#print('\xc2\xa3')\n", b'# vim: set fileencoding=iso8859-15 :\n']
         self.assertEqual(consumed_lines, expected)
@@ -1194,13 +1467,13 @@ def test_cookie_second_line_empty_first_line(self):
             b'# vim: set fileencoding=iso8859-15 :\n',
             b"print('\xe2\x82\xac')\n"
         )
-        encoding, consumed_lines = detect_encoding(self.get_readline(lines))
+        encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines))
         self.assertEqual(encoding, 'iso8859-15')
         expected = [b'\n', b'# vim: set fileencoding=iso8859-15 :\n']
         self.assertEqual(consumed_lines, expected)
 
     def test_latin1_normalization(self):
-        # See get_normal_name() in tokenizer.c.
+        # See get_normal_name() in Parser/tokenizer/helpers.c.
         encodings = ("latin-1", "iso-8859-1", "iso-latin-1", "latin-1-unix",
                      "iso-8859-1-unix", "iso-latin-1-mac")
         for encoding in encodings:
@@ -1211,21 +1484,21 @@ def test_latin1_normalization(self):
                          b"print(things)\n",
                          b"do_something += 4\n")
                 rl = self.get_readline(lines)
-                found, consumed_lines = detect_encoding(rl)
+                found, consumed_lines = tokenize.detect_encoding(rl)
                 self.assertEqual(found, "iso-8859-1")
 
     def test_syntaxerror_latin1(self):
-        # Issue 14629: need to raise SyntaxError if the first
+        # Issue 14629: need to raise TokenError if the first
         # line(s) have non-UTF-8 characters
         lines = (
             b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S
             )
         readline = self.get_readline(lines)
-        self.assertRaises(SyntaxError, detect_encoding, readline)
+        self.assertRaises(SyntaxError, tokenize.detect_encoding, readline)
 
 
     def test_utf8_normalization(self):
-        # See get_normal_name() in tokenizer.c.
+        # See get_normal_name() in Parser/tokenizer/helpers.c.
         encodings = ("utf-8", "utf-8-mac", "utf-8-unix")
         for encoding in encodings:
             for rep in ("-", "_"):
@@ -1234,36 +1507,36 @@ def test_utf8_normalization(self):
                          b"# coding: " + enc.encode("ascii") + b"\n",
                          b"1 + 3\n")
                 rl = self.get_readline(lines)
-                found, consumed_lines = detect_encoding(rl)
+                found, consumed_lines = tokenize.detect_encoding(rl)
                 self.assertEqual(found, "utf-8")
 
     def test_short_files(self):
         readline = self.get_readline((b'print(something)\n',))
-        encoding, consumed_lines = detect_encoding(readline)
+        encoding, consumed_lines = tokenize.detect_encoding(readline)
         self.assertEqual(encoding, 'utf-8')
         self.assertEqual(consumed_lines, [b'print(something)\n'])
 
-        encoding, consumed_lines = detect_encoding(self.get_readline(()))
+        encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(()))
         self.assertEqual(encoding, 'utf-8')
         self.assertEqual(consumed_lines, [])
 
         readline = self.get_readline((b'\xef\xbb\xbfprint(something)\n',))
-        encoding, consumed_lines = detect_encoding(readline)
+        encoding, consumed_lines = tokenize.detect_encoding(readline)
         self.assertEqual(encoding, 'utf-8-sig')
         self.assertEqual(consumed_lines, [b'print(something)\n'])
 
         readline = self.get_readline((b'\xef\xbb\xbf',))
-        encoding, consumed_lines = detect_encoding(readline)
+        encoding, consumed_lines = tokenize.detect_encoding(readline)
         self.assertEqual(encoding, 'utf-8-sig')
         self.assertEqual(consumed_lines, [])
 
         readline = self.get_readline((b'# coding: bad\n',))
-        self.assertRaises(SyntaxError, detect_encoding, readline)
+        self.assertRaises(SyntaxError, tokenize.detect_encoding, readline)
 
     def test_false_encoding(self):
         # Issue 18873: "Encoding" detected in non-comment lines
         readline = self.get_readline((b'print("#coding=fake")',))
-        encoding, consumed_lines = detect_encoding(readline)
+        encoding, consumed_lines = tokenize.detect_encoding(readline)
         self.assertEqual(encoding, 'utf-8')
         self.assertEqual(consumed_lines, [b'print("#coding=fake")'])
 
@@ -1276,14 +1549,14 @@ def test_open(self):
             with open(filename, 'w', encoding=encoding) as fp:
                 print("# coding: %s" % encoding, file=fp)
                 print("print('euro:\u20ac')", file=fp)
-            with tokenize_open(filename) as fp:
+            with tokenize.open(filename) as fp:
                 self.assertEqual(fp.encoding, encoding)
                 self.assertEqual(fp.mode, 'r')
 
         # test BOM (no coding cookie)
         with open(filename, 'w', encoding='utf-8-sig') as fp:
             print("print('euro:\u20ac')", file=fp)
-        with tokenize_open(filename) as fp:
+        with tokenize.open(filename) as fp:
             self.assertEqual(fp.encoding, 'utf-8-sig')
             self.assertEqual(fp.mode, 'r')
 
@@ -1310,16 +1583,16 @@ def readline(self):
             ins = Bunk(lines, path)
             # Make sure lacking a name isn't an issue.
             del ins.name
-            detect_encoding(ins.readline)
+            tokenize.detect_encoding(ins.readline)
         with self.assertRaisesRegex(SyntaxError, '.*{}'.format(path)):
             ins = Bunk(lines, path)
-            detect_encoding(ins.readline)
+            tokenize.detect_encoding(ins.readline)
 
     def test_open_error(self):
         # Issue #23840: open() must close the binary file on error
         m = BytesIO(b'#coding:xxx')
         with mock.patch('tokenize._builtin_open', return_value=m):
-            self.assertRaises(SyntaxError, tokenize_open, 'foobar')
+            self.assertRaises(SyntaxError, tokenize.open, 'foobar')
         self.assertTrue(m.closed)
 
 
@@ -1327,17 +1600,20 @@ class TestTokenize(TestCase):
 
     def test_tokenize(self):
         import tokenize as tokenize_module
-        encoding = object()
+        encoding = "utf-8"
         encoding_used = None
         def mock_detect_encoding(readline):
             return encoding, [b'first', b'second']
 
-        def mock__tokenize(readline, encoding):
+        def mock__tokenize(readline, encoding, **kwargs):
             nonlocal encoding_used
             encoding_used = encoding
             out = []
             while True:
-                next_line = readline()
+                try:
+                    next_line = readline()
+                except StopIteration:
+                    return out
                 if next_line:
                     out.append(next_line)
                     continue
@@ -1352,16 +1628,16 @@ def mock_readline():
             return str(counter).encode()
 
         orig_detect_encoding = tokenize_module.detect_encoding
-        orig__tokenize = tokenize_module._tokenize
+        orig_c_token = tokenize_module._generate_tokens_from_c_tokenizer
         tokenize_module.detect_encoding = mock_detect_encoding
-        tokenize_module._tokenize = mock__tokenize
+        tokenize_module._generate_tokens_from_c_tokenizer = mock__tokenize
         try:
-            results = tokenize(mock_readline)
-            self.assertEqual(list(results),
+            results = tokenize.tokenize(mock_readline)
+            self.assertEqual(list(results)[1:],
                              [b'first', b'second', b'1', b'2', b'3', b'4'])
         finally:
             tokenize_module.detect_encoding = orig_detect_encoding
-            tokenize_module._tokenize = orig__tokenize
+            tokenize_module._generate_tokens_from_c_tokenizer = orig_c_token
 
         self.assertEqual(encoding_used, encoding)
 
@@ -1373,23 +1649,23 @@ def test_oneline_defs(self):
         buf = '\n'.join(buf)
 
         # Test that 500 consequent, one-line defs is OK
-        toks = list(tokenize(BytesIO(buf.encode('utf-8')).readline))
+        toks = list(tokenize.tokenize(BytesIO(buf.encode('utf-8')).readline))
         self.assertEqual(toks[-3].string, 'OK') # [-1] is always ENDMARKER
                                                 # [-2] is always NEWLINE
 
     def assertExactTypeEqual(self, opstr, *optypes):
-        tokens = list(tokenize(BytesIO(opstr.encode('utf-8')).readline))
+        tokens = list(tokenize.tokenize(BytesIO(opstr.encode('utf-8')).readline))
         num_optypes = len(optypes)
         self.assertEqual(len(tokens), 3 + num_optypes)
-        self.assertEqual(tok_name[tokens[0].exact_type],
-                         tok_name[ENCODING])
+        self.assertEqual(tokenize.tok_name[tokens[0].exact_type],
+                         tokenize.tok_name[tokenize.ENCODING])
         for i in range(num_optypes):
-            self.assertEqual(tok_name[tokens[i + 1].exact_type],
-                             tok_name[optypes[i]])
-        self.assertEqual(tok_name[tokens[1 + num_optypes].exact_type],
-                         tok_name[token.NEWLINE])
-        self.assertEqual(tok_name[tokens[2 + num_optypes].exact_type],
-                         tok_name[token.ENDMARKER])
+            self.assertEqual(tokenize.tok_name[tokens[i + 1].exact_type],
+                             tokenize.tok_name[optypes[i]])
+        self.assertEqual(tokenize.tok_name[tokens[1 + num_optypes].exact_type],
+                         tokenize.tok_name[token.NEWLINE])
+        self.assertEqual(tokenize.tok_name[tokens[2 + num_optypes].exact_type],
+                         tokenize.tok_name[token.ENDMARKER])
 
     def test_exact_type(self):
         self.assertExactTypeEqual('()', token.LPAR, token.RPAR)
@@ -1439,11 +1715,11 @@ def test_exact_type(self):
         self.assertExactTypeEqual('@=', token.ATEQUAL)
 
         self.assertExactTypeEqual('a**2+b**2==c**2',
-                                  NAME, token.DOUBLESTAR, NUMBER,
+                                  tokenize.NAME, token.DOUBLESTAR, tokenize.NUMBER,
                                   token.PLUS,
-                                  NAME, token.DOUBLESTAR, NUMBER,
+                                  tokenize.NAME, token.DOUBLESTAR, tokenize.NUMBER,
                                   token.EQEQUAL,
-                                  NAME, token.DOUBLESTAR, NUMBER)
+                                  tokenize.NAME, token.DOUBLESTAR, tokenize.NUMBER)
         self.assertExactTypeEqual('{1, 2, 3}',
                                   token.LBRACE,
                                   token.NUMBER, token.COMMA,
@@ -1460,22 +1736,58 @@ def test_pathological_trailing_whitespace(self):
         # See http://bugs.python.org/issue16152
         self.assertExactTypeEqual('@          ', token.AT)
 
+    @unittest.expectedFailure # TODO: RUSTPYTHON
     def test_comment_at_the_end_of_the_source_without_newline(self):
         # See http://bugs.python.org/issue44667
         source = 'b = 1\n\n#test'
-        expected_tokens = [token.NAME, token.EQUAL, token.NUMBER, token.NEWLINE, token.NL, token.COMMENT]
+        expected_tokens = [
+            tokenize.TokenInfo(type=token.ENCODING, string='utf-8', start=(0, 0), end=(0, 0), line=''),
+            tokenize.TokenInfo(type=token.NAME, string='b', start=(1, 0), end=(1, 1), line='b = 1\n'),
+            tokenize.TokenInfo(type=token.OP, string='=', start=(1, 2), end=(1, 3), line='b = 1\n'),
+            tokenize.TokenInfo(type=token.NUMBER, string='1', start=(1, 4), end=(1, 5), line='b = 1\n'),
+            tokenize.TokenInfo(type=token.NEWLINE, string='\n', start=(1, 5), end=(1, 6), line='b = 1\n'),
+            tokenize.TokenInfo(type=token.NL, string='\n', start=(2, 0), end=(2, 1), line='\n'),
+            tokenize.TokenInfo(type=token.COMMENT, string='#test', start=(3, 0), end=(3, 5), line='#test'),
+            tokenize.TokenInfo(type=token.NL, string='', start=(3, 5), end=(3, 6), line='#test'),
+            tokenize.TokenInfo(type=token.ENDMARKER, string='', start=(4, 0), end=(4, 0), line='')
+        ]
+
+        tokens = list(tokenize.tokenize(BytesIO(source.encode('utf-8')).readline))
+        self.assertEqual(tokens, expected_tokens)
+
+    @unittest.expectedFailure # TODO: RUSTPYTHON
+    def test_newline_and_space_at_the_end_of_the_source_without_newline(self):
+        # See https://github.com/python/cpython/issues/105435
+        source = 'a\n '
+        expected_tokens = [
+            tokenize.TokenInfo(token.ENCODING, string='utf-8', start=(0, 0), end=(0, 0), line=''),
+            tokenize.TokenInfo(token.NAME, string='a', start=(1, 0), end=(1, 1), line='a\n'),
+            tokenize.TokenInfo(token.NEWLINE, string='\n', start=(1, 1), end=(1, 2), line='a\n'),
+            tokenize.TokenInfo(token.NL, string='', start=(2, 1), end=(2, 2), line=' '),
+            tokenize.TokenInfo(token.ENDMARKER, string='', start=(3, 0), end=(3, 0), line='')
+        ]
+
+        tokens = list(tokenize.tokenize(BytesIO(source.encode('utf-8')).readline))
+        self.assertEqual(tokens, expected_tokens)
+
+    def test_invalid_character_in_fstring_middle(self):
+        # See gh-103824
+        script = b'''F"""
+        \xe5"""'''
+
+        with os_helper.temp_dir() as temp_dir:
+            filename = os.path.join(temp_dir, "script.py")
+            with open(filename, 'wb') as file:
+                file.write(script)
+            rs, _ = run_python_until_end(filename)
+            self.assertIn(b"SyntaxError", rs.err)
 
-        tokens = list(tokenize(BytesIO(source.encode('utf-8')).readline))
-        self.assertEqual(tok_name[tokens[0].exact_type], tok_name[ENCODING])
-        for i in range(6):
-            self.assertEqual(tok_name[tokens[i + 1].exact_type], tok_name[expected_tokens[i]])
-        self.assertEqual(tok_name[tokens[-1].exact_type], tok_name[token.ENDMARKER])
 
 class UntokenizeTest(TestCase):
 
     def test_bad_input_order(self):
         # raise if previous row
-        u = Untokenizer()
+        u = tokenize.Untokenizer()
         u.prev_row = 2
         u.prev_col = 2
         with self.assertRaises(ValueError) as cm:
@@ -1485,9 +1797,10 @@ def test_bad_input_order(self):
         # raise if previous column in row
         self.assertRaises(ValueError, u.add_whitespace, (2,1))
 
+    @unittest.expectedFailure # TODO: RUSTPYTHON
     def test_backslash_continuation(self):
         # The problem is that <whitespace>\<newline> leaves no token
-        u = Untokenizer()
+        u = tokenize.Untokenizer()
         u.prev_row = 1
         u.prev_col =  1
         u.tokens = []
@@ -1499,21 +1812,38 @@ def test_backslash_continuation(self):
         TestRoundtrip.check_roundtrip(self, 'a\n  b\n    c\n  \\\n  c\n')
 
     def test_iter_compat(self):
-        u = Untokenizer()
-        token = (NAME, 'Hello')
-        tokens = [(ENCODING, 'utf-8'), token]
+        u = tokenize.Untokenizer()
+        token = (tokenize.NAME, 'Hello')
+        tokens = [(tokenize.ENCODING, 'utf-8'), token]
         u.compat(token, iter([]))
         self.assertEqual(u.tokens, ["Hello "])
-        u = Untokenizer()
+        u = tokenize.Untokenizer()
         self.assertEqual(u.untokenize(iter([token])), 'Hello ')
-        u = Untokenizer()
+        u = tokenize.Untokenizer()
         self.assertEqual(u.untokenize(iter(tokens)), 'Hello ')
         self.assertEqual(u.encoding, 'utf-8')
-        self.assertEqual(untokenize(iter(tokens)), b'Hello ')
+        self.assertEqual(tokenize.untokenize(iter(tokens)), b'Hello ')
+
+
+def contains_ambiguous_backslash(source):
+    """Return `True` if the source contains a backslash on a
+    line by itself. For example:
+
+    a = (1
+        \\
+    )
+
+    Code like this cannot be untokenized exactly. This is because
+    the tokenizer does not produce any tokens for the line containing
+    the backslash and so there is no way to know its indent.
+    """
+    pattern = re.compile(br'\n\s*\\\r?\n')
+    return pattern.search(source) is not None
 
 
 class TestRoundtrip(TestCase):
 
+    @unittest.expectedFailure # TODO: RUSTPYTHON
     def check_roundtrip(self, f):
         """
         Test roundtrip for `untokenize`. `f` is an open file or a string.
@@ -1522,6 +1852,9 @@ def check_roundtrip(self, f):
         tokenize.untokenize(), and the latter tokenized again to 2-tuples.
         The test fails if the 3 pair tokenizations do not match.
 
+        If the source code can be untokenized unambiguously, the
+        untokenized code must match the original code exactly.
+
         When untokenize bugs are fixed, untokenize with 5-tuples should
         reproduce code that does not contain a backslash continuation
         following spaces.  A proper test should test this.
@@ -1531,21 +1864,39 @@ def check_roundtrip(self, f):
             code = f.encode('utf-8')
         else:
             code = f.read()
-            f.close()
         readline = iter(code.splitlines(keepends=True)).__next__
-        tokens5 = list(tokenize(readline))
+        tokens5 = list(tokenize.tokenize(readline))
         tokens2 = [tok[:2] for tok in tokens5]
         # Reproduce tokens2 from pairs
-        bytes_from2 = untokenize(tokens2)
+        bytes_from2 = tokenize.untokenize(tokens2)
         readline2 = iter(bytes_from2.splitlines(keepends=True)).__next__
-        tokens2_from2 = [tok[:2] for tok in tokenize(readline2)]
+        tokens2_from2 = [tok[:2] for tok in tokenize.tokenize(readline2)]
         self.assertEqual(tokens2_from2, tokens2)
         # Reproduce tokens2 from 5-tuples
-        bytes_from5 = untokenize(tokens5)
+        bytes_from5 = tokenize.untokenize(tokens5)
         readline5 = iter(bytes_from5.splitlines(keepends=True)).__next__
-        tokens2_from5 = [tok[:2] for tok in tokenize(readline5)]
+        tokens2_from5 = [tok[:2] for tok in tokenize.tokenize(readline5)]
         self.assertEqual(tokens2_from5, tokens2)
 
+        if not contains_ambiguous_backslash(code):
+            # The BOM does not produce a token so there is no way to preserve it.
+            code_without_bom = code.removeprefix(b'\xef\xbb\xbf')
+            readline = iter(code_without_bom.splitlines(keepends=True)).__next__
+            untokenized_code = tokenize.untokenize(tokenize.tokenize(readline))
+            self.assertEqual(code_without_bom, untokenized_code)
+
+    def check_line_extraction(self, f):
+        if isinstance(f, str):
+            code = f.encode('utf-8')
+        else:
+            code = f.read()
+        readline = iter(code.splitlines(keepends=True)).__next__
+        for tok in tokenize.tokenize(readline):
+            if tok.type in  {tokenize.ENCODING, tokenize.ENDMARKER}:
+                continue
+            self.assertEqual(tok.string, tok.line[tok.start[1]: tok.end[1]])
+
+    @unittest.expectedFailure # TODO: RUSTPYTHON
     def test_roundtrip(self):
         # There are some standard formatting practices that are easy to get right.
 
@@ -1561,7 +1912,7 @@ def test_roundtrip(self):
 
         self.check_roundtrip("if x == 1 : \n"
                              "  print(x)\n")
-        fn = support.findfile("tokenize_tests.txt")
+        fn = support.findfile("tokenize_tests.txt", subdir="tokenizedata")
         with open(fn, 'rb') as f:
             self.check_roundtrip(f)
         self.check_roundtrip("if x == 1:\n"
@@ -1585,6 +1936,64 @@ def test_roundtrip(self):
                              "    print('Can not import' # comment2\n)"
                              "else:   print('Loaded')\n")
 
+        self.check_roundtrip("f'\\N{EXCLAMATION MARK}'")
+        self.check_roundtrip(r"f'\\N{SNAKE}'")
+        self.check_roundtrip(r"f'\\N{{SNAKE}}'")
+        self.check_roundtrip(r"f'\N{SNAKE}'")
+        self.check_roundtrip(r"f'\\\N{SNAKE}'")
+        self.check_roundtrip(r"f'\\\\\N{SNAKE}'")
+        self.check_roundtrip(r"f'\\\\\\\N{SNAKE}'")
+
+        self.check_roundtrip(r"f'\\N{1}'")
+        self.check_roundtrip(r"f'\\\\N{2}'")
+        self.check_roundtrip(r"f'\\\\\\N{3}'")
+        self.check_roundtrip(r"f'\\\\\\\\N{4}'")
+
+        self.check_roundtrip(r"f'\\N{{'")
+        self.check_roundtrip(r"f'\\\\N{{'")
+        self.check_roundtrip(r"f'\\\\\\N{{'")
+        self.check_roundtrip(r"f'\\\\\\\\N{{'")
+
+        self.check_roundtrip(r"f'\n{{foo}}'")
+        self.check_roundtrip(r"f'\\n{{foo}}'")
+        self.check_roundtrip(r"f'\\\n{{foo}}'")
+        self.check_roundtrip(r"f'\\\\n{{foo}}'")
+
+        self.check_roundtrip(r"f'\t{{foo}}'")
+        self.check_roundtrip(r"f'\\t{{foo}}'")
+        self.check_roundtrip(r"f'\\\t{{foo}}'")
+        self.check_roundtrip(r"f'\\\\t{{foo}}'")
+
+        self.check_roundtrip(r"rf'\t{{foo}}'")
+        self.check_roundtrip(r"rf'\\t{{foo}}'")
+        self.check_roundtrip(r"rf'\\\t{{foo}}'")
+        self.check_roundtrip(r"rf'\\\\t{{foo}}'")
+
+        self.check_roundtrip(r"rf'\{{foo}}'")
+        self.check_roundtrip(r"f'\\{{foo}}'")
+        self.check_roundtrip(r"rf'\\\{{foo}}'")
+        self.check_roundtrip(r"f'\\\\{{foo}}'")
+        cases = [
+    """
+if 1:
+    "foo"
+"bar"
+""",
+    """
+if 1:
+    ("foo"
+     "bar")
+""",
+    """
+if 1:
+    "foo"
+    "bar"
+""" ]
+        for case in cases:
+            self.check_roundtrip(case)
+
+
+    @unittest.expectedFailure # TODO: RUSTPYTHON
     def test_continuation(self):
         # Balancing continuation
         self.check_roundtrip("a = (3,4, \n"
@@ -1598,6 +2007,7 @@ def test_continuation(self):
                              "+ len(z) - z[\n"
                              "'b']\n")
 
+    @unittest.expectedFailure # TODO: RUSTPYTHON
     def test_backslash_continuation(self):
         # Backslash means line continuation, except for comments
         self.check_roundtrip("x=1+\\\n"
@@ -1611,26 +2021,15 @@ def test_string_concatenation(self):
         # Two string literals on the same line
         self.check_roundtrip("'' ''")
 
-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
+    @unittest.skip("TODO: RUSTPYTHON; slow and fails")
     def test_random_files(self):
         # Test roundtrip on random python modules.
         # pass the '-ucpu' option to process the full directory.
 
         import glob, random
-        fn = support.findfile("tokenize_tests.txt")
-        tempdir = os.path.dirname(fn) or os.curdir
+        tempdir = os.path.dirname(__file__) or os.curdir
         testfiles = glob.glob(os.path.join(glob.escape(tempdir), "test*.py"))
 
-        # Tokenize is broken on test_pep3131.py because regular expressions are
-        # broken on the obscure unicode identifiers in it. *sigh*
-        # With roundtrip extended to test the 5-tuple mode of untokenize,
-        # 7 more testfiles fail.  Remove them also until the failure is diagnosed.
-
-        testfiles.remove(os.path.join(tempdir, "test_unicode_identifiers.py"))
-        for f in ('buffer', 'builtin', 'fileio', 'inspect', 'os', 'platform', 'sys'):
-            testfiles.remove(os.path.join(tempdir, "test_%s.py") % f)
-
         if not support.is_resource_enabled("cpu"):
             testfiles = random.sample(testfiles, 10)
 
@@ -1640,13 +2039,15 @@ def test_random_files(self):
             with open(testfile, 'rb') as f:
                 with self.subTest(file=testfile):
                     self.check_roundtrip(f)
+                    self.check_line_extraction(f)
 
 
     def roundtrip(self, code):
         if isinstance(code, str):
             code = code.encode('utf-8')
-        return untokenize(tokenize(BytesIO(code).readline)).decode('utf-8')
+        return tokenize.untokenize(tokenize.tokenize(BytesIO(code).readline)).decode('utf-8')
 
+    @unittest.expectedFailure # TODO: RUSTPYTHON
     def test_indentation_semantics_retained(self):
         """
         Ensure that although whitespace might be mutated in a roundtrip,
@@ -1658,5 +2059,1134 @@ def test_indentation_semantics_retained(self):
         self.check_roundtrip(code)
 
 
+class InvalidPythonTests(TestCase):
+    @unittest.expectedFailure # TODO: RUSTPYTHON
+    def test_number_followed_by_name(self):
+        # See issue #gh-105549
+        source = "2sin(x)"
+        expected_tokens = [
+            tokenize.TokenInfo(type=token.NUMBER, string='2', start=(1, 0), end=(1, 1), line='2sin(x)'),
+            tokenize.TokenInfo(type=token.NAME, string='sin', start=(1, 1), end=(1, 4), line='2sin(x)'),
+            tokenize.TokenInfo(type=token.OP, string='(', start=(1, 4), end=(1, 5), line='2sin(x)'),
+            tokenize.TokenInfo(type=token.NAME, string='x', start=(1, 5), end=(1, 6), line='2sin(x)'),
+            tokenize.TokenInfo(type=token.OP, string=')', start=(1, 6), end=(1, 7), line='2sin(x)'),
+            tokenize.TokenInfo(type=token.NEWLINE, string='', start=(1, 7), end=(1, 8), line='2sin(x)'),
+            tokenize.TokenInfo(type=token.ENDMARKER, string='', start=(2, 0), end=(2, 0), line='')
+        ]
+
+        tokens = list(tokenize.generate_tokens(StringIO(source).readline))
+        self.assertEqual(tokens, expected_tokens)
+
+    @unittest.expectedFailure # TODO: RUSTPYTHON
+    def test_number_starting_with_zero(self):
+        source = "01234"
+        expected_tokens = [
+            tokenize.TokenInfo(type=token.NUMBER, string='01234', start=(1, 0), end=(1, 5), line='01234'),
+            tokenize.TokenInfo(type=token.NEWLINE, string='', start=(1, 5), end=(1, 6), line='01234'),
+            tokenize.TokenInfo(type=token.ENDMARKER, string='', start=(2, 0), end=(2, 0), line='')
+        ]
+
+        tokens = list(tokenize.generate_tokens(StringIO(source).readline))
+        self.assertEqual(tokens, expected_tokens)
+
+class CTokenizeTest(TestCase):
+    def check_tokenize(self, s, expected):
+        # Format the tokens in s in a table format.
+        # The ENDMARKER and final NEWLINE are omitted.
+        f = StringIO(s)
+        with self.subTest(source=s):
+            result = stringify_tokens_from_source(
+                tokenize._generate_tokens_from_c_tokenizer(f.readline), s
+            )
+            self.assertEqual(result, expected.rstrip().splitlines())
+
+    @unittest.expectedFailure # TODO: RUSTPYTHON
+    def test_encoding(self):
+        def readline(encoding):
+            yield "1+1".encode(encoding)
+
+        expected = [
+            tokenize.TokenInfo(type=tokenize.NUMBER, string='1', start=(1, 0), end=(1, 1), line='1+1'),
+            tokenize.TokenInfo(type=tokenize.OP, string='+', start=(1, 1), end=(1, 2), line='1+1'),
+            tokenize.TokenInfo(type=tokenize.NUMBER, string='1', start=(1, 2), end=(1, 3), line='1+1'),
+            tokenize.TokenInfo(type=tokenize.NEWLINE, string='', start=(1, 3), end=(1, 4), line='1+1'),
+            tokenize.TokenInfo(type=tokenize.ENDMARKER, string='', start=(2, 0), end=(2, 0), line='')
+        ]
+        for encoding in ["utf-8", "latin-1", "utf-16"]:
+            with self.subTest(encoding=encoding):
+                tokens = list(tokenize._generate_tokens_from_c_tokenizer(
+                    readline(encoding).__next__,
+                    extra_tokens=True,
+                    encoding=encoding,
+                ))
+                self.assertEqual(tokens, expected)
+
+    def test_int(self):
+
+        self.check_tokenize('0xff <= 255', """\
+    NUMBER     '0xff'        (1, 0) (1, 4)
+    LESSEQUAL  '<='          (1, 5) (1, 7)
+    NUMBER     '255'         (1, 8) (1, 11)
+    """)
+
+        self.check_tokenize('0b10 <= 255', """\
+    NUMBER     '0b10'        (1, 0) (1, 4)
+    LESSEQUAL  '<='          (1, 5) (1, 7)
+    NUMBER     '255'         (1, 8) (1, 11)
+    """)
+
+        self.check_tokenize('0o123 <= 0O123', """\
+    NUMBER     '0o123'       (1, 0) (1, 5)
+    LESSEQUAL  '<='          (1, 6) (1, 8)
+    NUMBER     '0O123'       (1, 9) (1, 14)
+    """)
+
+        self.check_tokenize('1234567 > ~0x15', """\
+    NUMBER     '1234567'     (1, 0) (1, 7)
+    GREATER    '>'           (1, 8) (1, 9)
+    TILDE      '~'           (1, 10) (1, 11)
+    NUMBER     '0x15'        (1, 11) (1, 15)
+    """)
+
+        self.check_tokenize('2134568 != 1231515', """\
+    NUMBER     '2134568'     (1, 0) (1, 7)
+    NOTEQUAL   '!='          (1, 8) (1, 10)
+    NUMBER     '1231515'     (1, 11) (1, 18)
+    """)
+
+        self.check_tokenize('(-124561-1) & 200000000', """\
+    LPAR       '('           (1, 0) (1, 1)
+    MINUS      '-'           (1, 1) (1, 2)
+    NUMBER     '124561'      (1, 2) (1, 8)
+    MINUS      '-'           (1, 8) (1, 9)
+    NUMBER     '1'           (1, 9) (1, 10)
+    RPAR       ')'           (1, 10) (1, 11)
+    AMPER      '&'           (1, 12) (1, 13)
+    NUMBER     '200000000'   (1, 14) (1, 23)
+    """)
+
+        self.check_tokenize('0xdeadbeef != -1', """\
+    NUMBER     '0xdeadbeef'  (1, 0) (1, 10)
+    NOTEQUAL   '!='          (1, 11) (1, 13)
+    MINUS      '-'           (1, 14) (1, 15)
+    NUMBER     '1'           (1, 15) (1, 16)
+    """)
+
+        self.check_tokenize('0xdeadc0de & 12345', """\
+    NUMBER     '0xdeadc0de'  (1, 0) (1, 10)
+    AMPER      '&'           (1, 11) (1, 12)
+    NUMBER     '12345'       (1, 13) (1, 18)
+    """)
+
+        self.check_tokenize('0xFF & 0x15 | 1234', """\
+    NUMBER     '0xFF'        (1, 0) (1, 4)
+    AMPER      '&'           (1, 5) (1, 6)
+    NUMBER     '0x15'        (1, 7) (1, 11)
+    VBAR       '|'           (1, 12) (1, 13)
+    NUMBER     '1234'        (1, 14) (1, 18)
+    """)
+
+    def test_float(self):
+
+        self.check_tokenize('x = 3.14159', """\
+    NAME       'x'           (1, 0) (1, 1)
+    EQUAL      '='           (1, 2) (1, 3)
+    NUMBER     '3.14159'     (1, 4) (1, 11)
+    """)
+
+        self.check_tokenize('x = 314159.', """\
+    NAME       'x'           (1, 0) (1, 1)
+    EQUAL      '='           (1, 2) (1, 3)
+    NUMBER     '314159.'     (1, 4) (1, 11)
+    """)
+
+        self.check_tokenize('x = .314159', """\
+    NAME       'x'           (1, 0) (1, 1)
+    EQUAL      '='           (1, 2) (1, 3)
+    NUMBER     '.314159'     (1, 4) (1, 11)
+    """)
+
+        self.check_tokenize('x = 3e14159', """\
+    NAME       'x'           (1, 0) (1, 1)
+    EQUAL      '='           (1, 2) (1, 3)
+    NUMBER     '3e14159'     (1, 4) (1, 11)
+    """)
+
+        self.check_tokenize('x = 3E123', """\
+    NAME       'x'           (1, 0) (1, 1)
+    EQUAL      '='           (1, 2) (1, 3)
+    NUMBER     '3E123'       (1, 4) (1, 9)
+    """)
+
+        self.check_tokenize('x+y = 3e-1230', """\
+    NAME       'x'           (1, 0) (1, 1)
+    PLUS       '+'           (1, 1) (1, 2)
+    NAME       'y'           (1, 2) (1, 3)
+    EQUAL      '='           (1, 4) (1, 5)
+    NUMBER     '3e-1230'     (1, 6) (1, 13)
+    """)
+
+        self.check_tokenize('x = 3.14e159', """\
+    NAME       'x'           (1, 0) (1, 1)
+    EQUAL      '='           (1, 2) (1, 3)
+    NUMBER     '3.14e159'    (1, 4) (1, 12)
+    """)
+
+    @unittest.expectedFailure # TODO: RUSTPYTHON
+    def test_string(self):
+
+        self.check_tokenize('x = \'\'; y = ""', """\
+    NAME       'x'           (1, 0) (1, 1)
+    EQUAL      '='           (1, 2) (1, 3)
+    STRING     "''"          (1, 4) (1, 6)
+    SEMI       ';'           (1, 6) (1, 7)
+    NAME       'y'           (1, 8) (1, 9)
+    EQUAL      '='           (1, 10) (1, 11)
+    STRING     '""'          (1, 12) (1, 14)
+    """)
+
+        self.check_tokenize('x = \'"\'; y = "\'"', """\
+    NAME       'x'           (1, 0) (1, 1)
+    EQUAL      '='           (1, 2) (1, 3)
+    STRING     '\\'"\\''       (1, 4) (1, 7)
+    SEMI       ';'           (1, 7) (1, 8)
+    NAME       'y'           (1, 9) (1, 10)
+    EQUAL      '='           (1, 11) (1, 12)
+    STRING     '"\\'"'        (1, 13) (1, 16)
+    """)
+
+        self.check_tokenize('x = "doesn\'t "shrink", does it"', """\
+    NAME       'x'           (1, 0) (1, 1)
+    EQUAL      '='           (1, 2) (1, 3)
+    STRING     '"doesn\\'t "' (1, 4) (1, 14)
+    NAME       'shrink'      (1, 14) (1, 20)
+    STRING     '", does it"' (1, 20) (1, 31)
+    """)
+
+        self.check_tokenize("x = 'abc' + 'ABC'", """\
+    NAME       'x'           (1, 0) (1, 1)
+    EQUAL      '='           (1, 2) (1, 3)
+    STRING     "'abc'"       (1, 4) (1, 9)
+    PLUS       '+'           (1, 10) (1, 11)
+    STRING     "'ABC'"       (1, 12) (1, 17)
+    """)
+
+        self.check_tokenize('y = "ABC" + "ABC"', """\
+    NAME       'y'           (1, 0) (1, 1)
+    EQUAL      '='           (1, 2) (1, 3)
+    STRING     '"ABC"'       (1, 4) (1, 9)
+    PLUS       '+'           (1, 10) (1, 11)
+    STRING     '"ABC"'       (1, 12) (1, 17)
+    """)
+
+        self.check_tokenize("x = r'abc' + r'ABC' + R'ABC' + R'ABC'", """\
+    NAME       'x'           (1, 0) (1, 1)
+    EQUAL      '='           (1, 2) (1, 3)
+    STRING     "r'abc'"      (1, 4) (1, 10)
+    PLUS       '+'           (1, 11) (1, 12)
+    STRING     "r'ABC'"      (1, 13) (1, 19)
+    PLUS       '+'           (1, 20) (1, 21)
+    STRING     "R'ABC'"      (1, 22) (1, 28)
+    PLUS       '+'           (1, 29) (1, 30)
+    STRING     "R'ABC'"      (1, 31) (1, 37)
+    """)
+
+        self.check_tokenize('y = r"abc" + r"ABC" + R"ABC" + R"ABC"', """\
+    NAME       'y'           (1, 0) (1, 1)
+    EQUAL      '='           (1, 2) (1, 3)
+    STRING     'r"abc"'      (1, 4) (1, 10)
+    PLUS       '+'           (1, 11) (1, 12)
+    STRING     'r"ABC"'      (1, 13) (1, 19)
+    PLUS       '+'           (1, 20) (1, 21)
+    STRING     'R"ABC"'      (1, 22) (1, 28)
+    PLUS       '+'           (1, 29) (1, 30)
+    STRING     'R"ABC"'      (1, 31) (1, 37)
+    """)
+
+        self.check_tokenize("u'abc' + U'abc'", """\
+    STRING     "u'abc'"      (1, 0) (1, 6)
+    PLUS       '+'           (1, 7) (1, 8)
+    STRING     "U'abc'"      (1, 9) (1, 15)
+    """)
+
+        self.check_tokenize('u"abc" + U"abc"', """\
+    STRING     'u"abc"'      (1, 0) (1, 6)
+    PLUS       '+'           (1, 7) (1, 8)
+    STRING     'U"abc"'      (1, 9) (1, 15)
+    """)
+
+        self.check_tokenize("b'abc' + B'abc'", """\
+    STRING     "b'abc'"      (1, 0) (1, 6)
+    PLUS       '+'           (1, 7) (1, 8)
+    STRING     "B'abc'"      (1, 9) (1, 15)
+    """)
+
+        self.check_tokenize('b"abc" + B"abc"', """\
+    STRING     'b"abc"'      (1, 0) (1, 6)
+    PLUS       '+'           (1, 7) (1, 8)
+    STRING     'B"abc"'      (1, 9) (1, 15)
+    """)
+
+        self.check_tokenize("br'abc' + bR'abc' + Br'abc' + BR'abc'", """\
+    STRING     "br'abc'"     (1, 0) (1, 7)
+    PLUS       '+'           (1, 8) (1, 9)
+    STRING     "bR'abc'"     (1, 10) (1, 17)
+    PLUS       '+'           (1, 18) (1, 19)
+    STRING     "Br'abc'"     (1, 20) (1, 27)
+    PLUS       '+'           (1, 28) (1, 29)
+    STRING     "BR'abc'"     (1, 30) (1, 37)
+    """)
+
+        self.check_tokenize('br"abc" + bR"abc" + Br"abc" + BR"abc"', """\
+    STRING     'br"abc"'     (1, 0) (1, 7)
+    PLUS       '+'           (1, 8) (1, 9)
+    STRING     'bR"abc"'     (1, 10) (1, 17)
+    PLUS       '+'           (1, 18) (1, 19)
+    STRING     'Br"abc"'     (1, 20) (1, 27)
+    PLUS       '+'           (1, 28) (1, 29)
+    STRING     'BR"abc"'     (1, 30) (1, 37)
+    """)
+
+        self.check_tokenize("rb'abc' + rB'abc' + Rb'abc' + RB'abc'", """\
+    STRING     "rb'abc'"     (1, 0) (1, 7)
+    PLUS       '+'           (1, 8) (1, 9)
+    STRING     "rB'abc'"     (1, 10) (1, 17)
+    PLUS       '+'           (1, 18) (1, 19)
+    STRING     "Rb'abc'"     (1, 20) (1, 27)
+    PLUS       '+'           (1, 28) (1, 29)
+    STRING     "RB'abc'"     (1, 30) (1, 37)
+    """)
+
+        self.check_tokenize('rb"abc" + rB"abc" + Rb"abc" + RB"abc"', """\
+    STRING     'rb"abc"'     (1, 0) (1, 7)
+    PLUS       '+'           (1, 8) (1, 9)
+    STRING     'rB"abc"'     (1, 10) (1, 17)
+    PLUS       '+'           (1, 18) (1, 19)
+    STRING     'Rb"abc"'     (1, 20) (1, 27)
+    PLUS       '+'           (1, 28) (1, 29)
+    STRING     'RB"abc"'     (1, 30) (1, 37)
+    """)
+
+        self.check_tokenize('"a\\\nde\\\nfg"', """\
+    STRING     '"a\\\\\\nde\\\\\\nfg"\' (1, 0) (3, 3)
+    """)
+
+        self.check_tokenize('u"a\\\nde"', """\
+    STRING     'u"a\\\\\\nde"\'  (1, 0) (2, 3)
+    """)
+
+        self.check_tokenize('rb"a\\\nd"', """\
+    STRING     'rb"a\\\\\\nd"\'  (1, 0) (2, 2)
+    """)
+
+        self.check_tokenize(r'"""a\
+b"""', """\
+    STRING     '\"\""a\\\\\\nb\"\""' (1, 0) (2, 4)
+    """)
+        self.check_tokenize(r'u"""a\
+b"""', """\
+    STRING     'u\"\""a\\\\\\nb\"\""' (1, 0) (2, 4)
+    """)
+        self.check_tokenize(r'rb"""a\
+b\
+c"""', """\
+    STRING     'rb"\""a\\\\\\nb\\\\\\nc"\""' (1, 0) (3, 4)
+    """)
+
+        self.check_tokenize(r'"hola\\\r\ndfgf"', """\
+    STRING     \'"hola\\\\\\\\\\\\r\\\\ndfgf"\' (1, 0) (1, 16)
+    """)
+
+        self.check_tokenize('f"abc"', """\
+    FSTRING_START 'f"'          (1, 0) (1, 2)
+    FSTRING_MIDDLE 'abc'         (1, 2) (1, 5)
+    FSTRING_END '"'           (1, 5) (1, 6)
+    """)
+
+        self.check_tokenize('fR"a{b}c"', """\
+    FSTRING_START 'fR"'         (1, 0) (1, 3)
+    FSTRING_MIDDLE 'a'           (1, 3) (1, 4)
+    LBRACE     '{'           (1, 4) (1, 5)
+    NAME       'b'           (1, 5) (1, 6)
+    RBRACE     '}'           (1, 6) (1, 7)
+    FSTRING_MIDDLE 'c'           (1, 7) (1, 8)
+    FSTRING_END '"'           (1, 8) (1, 9)
+    """)
+
+        self.check_tokenize('f"""abc"""', """\
+    FSTRING_START 'f\"""'        (1, 0) (1, 4)
+    FSTRING_MIDDLE 'abc'         (1, 4) (1, 7)
+    FSTRING_END '\"""'         (1, 7) (1, 10)
+    """)
+
+        self.check_tokenize(r'f"abc\
+def"', """\
+    FSTRING_START \'f"\'          (1, 0) (1, 2)
+    FSTRING_MIDDLE 'abc\\\\\\ndef'  (1, 2) (2, 3)
+    FSTRING_END '"'           (2, 3) (2, 4)
+    """)
+
+        self.check_tokenize('''\
+f"{
+a}"''', """\
+    FSTRING_START 'f"'          (1, 0) (1, 2)
+    LBRACE     '{'           (1, 2) (1, 3)
+    NAME       'a'           (2, 0) (2, 1)
+    RBRACE     '}'           (2, 1) (2, 2)
+    FSTRING_END '"'           (2, 2) (2, 3)
+    """)
+
+        self.check_tokenize(r'Rf"abc\
+def"', """\
+    FSTRING_START 'Rf"'         (1, 0) (1, 3)
+    FSTRING_MIDDLE 'abc\\\\\\ndef'  (1, 3) (2, 3)
+    FSTRING_END '"'           (2, 3) (2, 4)
+    """)
+
+        self.check_tokenize(r'f"hola\\\r\ndfgf"', """\
+    FSTRING_START \'f"\'          (1, 0) (1, 2)
+    FSTRING_MIDDLE 'hola\\\\\\\\\\\\r\\\\ndfgf' (1, 2) (1, 16)
+    FSTRING_END \'"\'           (1, 16) (1, 17)
+    """)
+
+        self.check_tokenize("""\
+f'''__{
+    x:a
+}__'''""", """\
+    FSTRING_START "f'''"        (1, 0) (1, 4)
+    FSTRING_MIDDLE '__'          (1, 4) (1, 6)
+    LBRACE     '{'           (1, 6) (1, 7)
+    NAME       'x'           (2, 4) (2, 5)
+    COLON      ':'           (2, 5) (2, 6)
+    FSTRING_MIDDLE 'a\\n'         (2, 6) (3, 0)
+    RBRACE     '}'           (3, 0) (3, 1)
+    FSTRING_MIDDLE '__'          (3, 1) (3, 3)
+    FSTRING_END "'''"         (3, 3) (3, 6)
+    """)
+
+        self.check_tokenize("""\
+f'''__{
+    x:a
+    b
+     c
+      d
+}__'''""", """\
+    FSTRING_START "f'''"        (1, 0) (1, 4)
+    FSTRING_MIDDLE '__'          (1, 4) (1, 6)
+    LBRACE     '{'           (1, 6) (1, 7)
+    NAME       'x'           (2, 4) (2, 5)
+    COLON      ':'           (2, 5) (2, 6)
+    FSTRING_MIDDLE 'a\\n    b\\n     c\\n      d\\n' (2, 6) (6, 0)
+    RBRACE     '}'           (6, 0) (6, 1)
+    FSTRING_MIDDLE '__'          (6, 1) (6, 3)
+    FSTRING_END "'''"         (6, 3) (6, 6)
+    """)
+
+    def test_function(self):
+
+        self.check_tokenize('def d22(a, b, c=2, d=2, *k): pass', """\
+    NAME       'def'         (1, 0) (1, 3)
+    NAME       'd22'         (1, 4) (1, 7)
+    LPAR       '('           (1, 7) (1, 8)
+    NAME       'a'           (1, 8) (1, 9)
+    COMMA      ','           (1, 9) (1, 10)
+    NAME       'b'           (1, 11) (1, 12)
+    COMMA      ','           (1, 12) (1, 13)
+    NAME       'c'           (1, 14) (1, 15)
+    EQUAL      '='           (1, 15) (1, 16)
+    NUMBER     '2'           (1, 16) (1, 17)
+    COMMA      ','           (1, 17) (1, 18)
+    NAME       'd'           (1, 19) (1, 20)
+    EQUAL      '='           (1, 20) (1, 21)
+    NUMBER     '2'           (1, 21) (1, 22)
+    COMMA      ','           (1, 22) (1, 23)
+    STAR       '*'           (1, 24) (1, 25)
+    NAME       'k'           (1, 25) (1, 26)
+    RPAR       ')'           (1, 26) (1, 27)
+    COLON      ':'           (1, 27) (1, 28)
+    NAME       'pass'        (1, 29) (1, 33)
+    """)
+
+        self.check_tokenize('def d01v_(a=1, *k, **w): pass', """\
+    NAME       'def'         (1, 0) (1, 3)
+    NAME       'd01v_'       (1, 4) (1, 9)
+    LPAR       '('           (1, 9) (1, 10)
+    NAME       'a'           (1, 10) (1, 11)
+    EQUAL      '='           (1, 11) (1, 12)
+    NUMBER     '1'           (1, 12) (1, 13)
+    COMMA      ','           (1, 13) (1, 14)
+    STAR       '*'           (1, 15) (1, 16)
+    NAME       'k'           (1, 16) (1, 17)
+    COMMA      ','           (1, 17) (1, 18)
+    DOUBLESTAR '**'          (1, 19) (1, 21)
+    NAME       'w'           (1, 21) (1, 22)
+    RPAR       ')'           (1, 22) (1, 23)
+    COLON      ':'           (1, 23) (1, 24)
+    NAME       'pass'        (1, 25) (1, 29)
+    """)
+
+        self.check_tokenize('def d23(a: str, b: int=3) -> int: pass', """\
+    NAME       'def'         (1, 0) (1, 3)
+    NAME       'd23'         (1, 4) (1, 7)
+    LPAR       '('           (1, 7) (1, 8)
+    NAME       'a'           (1, 8) (1, 9)
+    COLON      ':'           (1, 9) (1, 10)
+    NAME       'str'         (1, 11) (1, 14)
+    COMMA      ','           (1, 14) (1, 15)
+    NAME       'b'           (1, 16) (1, 17)
+    COLON      ':'           (1, 17) (1, 18)
+    NAME       'int'         (1, 19) (1, 22)
+    EQUAL      '='           (1, 22) (1, 23)
+    NUMBER     '3'           (1, 23) (1, 24)
+    RPAR       ')'           (1, 24) (1, 25)
+    RARROW     '->'          (1, 26) (1, 28)
+    NAME       'int'         (1, 29) (1, 32)
+    COLON      ':'           (1, 32) (1, 33)
+    NAME       'pass'        (1, 34) (1, 38)
+    """)
+
+    def test_comparison(self):
+
+        self.check_tokenize("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != "
+                            "1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass", """\
+    NAME       'if'          (1, 0) (1, 2)
+    NUMBER     '1'           (1, 3) (1, 4)
+    LESS       '<'           (1, 5) (1, 6)
+    NUMBER     '1'           (1, 7) (1, 8)
+    GREATER    '>'           (1, 9) (1, 10)
+    NUMBER     '1'           (1, 11) (1, 12)
+    EQEQUAL    '=='          (1, 13) (1, 15)
+    NUMBER     '1'           (1, 16) (1, 17)
+    GREATEREQUAL '>='          (1, 18) (1, 20)
+    NUMBER     '5'           (1, 21) (1, 22)
+    LESSEQUAL  '<='          (1, 23) (1, 25)
+    NUMBER     '0x15'        (1, 26) (1, 30)
+    LESSEQUAL  '<='          (1, 31) (1, 33)
+    NUMBER     '0x12'        (1, 34) (1, 38)
+    NOTEQUAL   '!='          (1, 39) (1, 41)
+    NUMBER     '1'           (1, 42) (1, 43)
+    NAME       'and'         (1, 44) (1, 47)
+    NUMBER     '5'           (1, 48) (1, 49)
+    NAME       'in'          (1, 50) (1, 52)
+    NUMBER     '1'           (1, 53) (1, 54)
+    NAME       'not'         (1, 55) (1, 58)
+    NAME       'in'          (1, 59) (1, 61)
+    NUMBER     '1'           (1, 62) (1, 63)
+    NAME       'is'          (1, 64) (1, 66)
+    NUMBER     '1'           (1, 67) (1, 68)
+    NAME       'or'          (1, 69) (1, 71)
+    NUMBER     '5'           (1, 72) (1, 73)
+    NAME       'is'          (1, 74) (1, 76)
+    NAME       'not'         (1, 77) (1, 80)
+    NUMBER     '1'           (1, 81) (1, 82)
+    COLON      ':'           (1, 82) (1, 83)
+    NAME       'pass'        (1, 84) (1, 88)
+    """)
+
+    def test_additive(self):
+
+        self.check_tokenize('x = 1 - y + 15 - 1 + 0x124 + z + a[5]', """\
+    NAME       'x'           (1, 0) (1, 1)
+    EQUAL      '='           (1, 2) (1, 3)
+    NUMBER     '1'           (1, 4) (1, 5)
+    MINUS      '-'           (1, 6) (1, 7)
+    NAME       'y'           (1, 8) (1, 9)
+    PLUS       '+'           (1, 10) (1, 11)
+    NUMBER     '15'          (1, 12) (1, 14)
+    MINUS      '-'           (1, 15) (1, 16)
+    NUMBER     '1'           (1, 17) (1, 18)
+    PLUS       '+'           (1, 19) (1, 20)
+    NUMBER     '0x124'       (1, 21) (1, 26)
+    PLUS       '+'           (1, 27) (1, 28)
+    NAME       'z'           (1, 29) (1, 30)
+    PLUS       '+'           (1, 31) (1, 32)
+    NAME       'a'           (1, 33) (1, 34)
+    LSQB       '['           (1, 34) (1, 35)
+    NUMBER     '5'           (1, 35) (1, 36)
+    RSQB       ']'           (1, 36) (1, 37)
+    """)
+
+    def test_multiplicative(self):
+
+        self.check_tokenize('x = 1//1*1/5*12%0x12@42', """\
+    NAME       'x'           (1, 0) (1, 1)
+    EQUAL      '='           (1, 2) (1, 3)
+    NUMBER     '1'           (1, 4) (1, 5)
+    DOUBLESLASH '//'          (1, 5) (1, 7)
+    NUMBER     '1'           (1, 7) (1, 8)
+    STAR       '*'           (1, 8) (1, 9)
+    NUMBER     '1'           (1, 9) (1, 10)
+    SLASH      '/'           (1, 10) (1, 11)
+    NUMBER     '5'           (1, 11) (1, 12)
+    STAR       '*'           (1, 12) (1, 13)
+    NUMBER     '12'          (1, 13) (1, 15)
+    PERCENT    '%'           (1, 15) (1, 16)
+    NUMBER     '0x12'        (1, 16) (1, 20)
+    AT         '@'           (1, 20) (1, 21)
+    NUMBER     '42'          (1, 21) (1, 23)
+    """)
+
+    def test_unary(self):
+
+        self.check_tokenize('~1 ^ 1 & 1 |1 ^ -1', """\
+    TILDE      '~'           (1, 0) (1, 1)
+    NUMBER     '1'           (1, 1) (1, 2)
+    CIRCUMFLEX '^'           (1, 3) (1, 4)
+    NUMBER     '1'           (1, 5) (1, 6)
+    AMPER      '&'           (1, 7) (1, 8)
+    NUMBER     '1'           (1, 9) (1, 10)
+    VBAR       '|'           (1, 11) (1, 12)
+    NUMBER     '1'           (1, 12) (1, 13)
+    CIRCUMFLEX '^'           (1, 14) (1, 15)
+    MINUS      '-'           (1, 16) (1, 17)
+    NUMBER     '1'           (1, 17) (1, 18)
+    """)
+
+        self.check_tokenize('-1*1/1+1*1//1 - ---1**1', """\
+    MINUS      '-'           (1, 0) (1, 1)
+    NUMBER     '1'           (1, 1) (1, 2)
+    STAR       '*'           (1, 2) (1, 3)
+    NUMBER     '1'           (1, 3) (1, 4)
+    SLASH      '/'           (1, 4) (1, 5)
+    NUMBER     '1'           (1, 5) (1, 6)
+    PLUS       '+'           (1, 6) (1, 7)
+    NUMBER     '1'           (1, 7) (1, 8)
+    STAR       '*'           (1, 8) (1, 9)
+    NUMBER     '1'           (1, 9) (1, 10)
+    DOUBLESLASH '//'          (1, 10) (1, 12)
+    NUMBER     '1'           (1, 12) (1, 13)
+    MINUS      '-'           (1, 14) (1, 15)
+    MINUS      '-'           (1, 16) (1, 17)
+    MINUS      '-'           (1, 17) (1, 18)
+    MINUS      '-'           (1, 18) (1, 19)
+    NUMBER     '1'           (1, 19) (1, 20)
+    DOUBLESTAR '**'          (1, 20) (1, 22)
+    NUMBER     '1'           (1, 22) (1, 23)
+    """)
+
+    @unittest.expectedFailure # TODO: RUSTPYTHON
+    def test_selector(self):
+
+        self.check_tokenize("import sys, time\nx = sys.modules['time'].time()", """\
+    NAME       'import'      (1, 0) (1, 6)
+    NAME       'sys'         (1, 7) (1, 10)
+    COMMA      ','           (1, 10) (1, 11)
+    NAME       'time'        (1, 12) (1, 16)
+    NEWLINE    ''            (1, 16) (1, 16)
+    NAME       'x'           (2, 0) (2, 1)
+    EQUAL      '='           (2, 2) (2, 3)
+    NAME       'sys'         (2, 4) (2, 7)
+    DOT        '.'           (2, 7) (2, 8)
+    NAME       'modules'     (2, 8) (2, 15)
+    LSQB       '['           (2, 15) (2, 16)
+    STRING     "'time'"      (2, 16) (2, 22)
+    RSQB       ']'           (2, 22) (2, 23)
+    DOT        '.'           (2, 23) (2, 24)
+    NAME       'time'        (2, 24) (2, 28)
+    LPAR       '('           (2, 28) (2, 29)
+    RPAR       ')'           (2, 29) (2, 30)
+    """)
+
+    @unittest.expectedFailure # TODO: RUSTPYTHON
+    def test_method(self):
+
+        self.check_tokenize('@staticmethod\ndef foo(x,y): pass', """\
+    AT         '@'           (1, 0) (1, 1)
+    NAME       'staticmethod' (1, 1) (1, 13)
+    NEWLINE    ''            (1, 13) (1, 13)
+    NAME       'def'         (2, 0) (2, 3)
+    NAME       'foo'         (2, 4) (2, 7)
+    LPAR       '('           (2, 7) (2, 8)
+    NAME       'x'           (2, 8) (2, 9)
+    COMMA      ','           (2, 9) (2, 10)
+    NAME       'y'           (2, 10) (2, 11)
+    RPAR       ')'           (2, 11) (2, 12)
+    COLON      ':'           (2, 12) (2, 13)
+    NAME       'pass'        (2, 14) (2, 18)
+    """)
+
+    @unittest.expectedFailure # TODO: RUSTPYTHON
+    def test_tabs(self):
+
+        self.check_tokenize('@staticmethod\ndef foo(x,y): pass', """\
+    AT         '@'           (1, 0) (1, 1)
+    NAME       'staticmethod' (1, 1) (1, 13)
+    NEWLINE    ''            (1, 13) (1, 13)
+    NAME       'def'         (2, 0) (2, 3)
+    NAME       'foo'         (2, 4) (2, 7)
+    LPAR       '('           (2, 7) (2, 8)
+    NAME       'x'           (2, 8) (2, 9)
+    COMMA      ','           (2, 9) (2, 10)
+    NAME       'y'           (2, 10) (2, 11)
+    RPAR       ')'           (2, 11) (2, 12)
+    COLON      ':'           (2, 12) (2, 13)
+    NAME       'pass'        (2, 14) (2, 18)
+    """)
+
+    @unittest.expectedFailure # TODO: RUSTPYTHON
+    def test_async(self):
+
+        self.check_tokenize('async = 1', """\
+    NAME       'async'       (1, 0) (1, 5)
+    EQUAL      '='           (1, 6) (1, 7)
+    NUMBER     '1'           (1, 8) (1, 9)
+    """)
+
+        self.check_tokenize('a = (async = 1)', """\
+    NAME       'a'           (1, 0) (1, 1)
+    EQUAL      '='           (1, 2) (1, 3)
+    LPAR       '('           (1, 4) (1, 5)
+    NAME       'async'       (1, 5) (1, 10)
+    EQUAL      '='           (1, 11) (1, 12)
+    NUMBER     '1'           (1, 13) (1, 14)
+    RPAR       ')'           (1, 14) (1, 15)
+    """)
+
+        self.check_tokenize('async()', """\
+    NAME       'async'       (1, 0) (1, 5)
+    LPAR       '('           (1, 5) (1, 6)
+    RPAR       ')'           (1, 6) (1, 7)
+    """)
+
+        self.check_tokenize('class async(Bar):pass', """\
+    NAME       'class'       (1, 0) (1, 5)
+    NAME       'async'       (1, 6) (1, 11)
+    LPAR       '('           (1, 11) (1, 12)
+    NAME       'Bar'         (1, 12) (1, 15)
+    RPAR       ')'           (1, 15) (1, 16)
+    COLON      ':'           (1, 16) (1, 17)
+    NAME       'pass'        (1, 17) (1, 21)
+    """)
+
+        self.check_tokenize('class async:pass', """\
+    NAME       'class'       (1, 0) (1, 5)
+    NAME       'async'       (1, 6) (1, 11)
+    COLON      ':'           (1, 11) (1, 12)
+    NAME       'pass'        (1, 12) (1, 16)
+    """)
+
+        self.check_tokenize('await = 1', """\
+    NAME       'await'       (1, 0) (1, 5)
+    EQUAL      '='           (1, 6) (1, 7)
+    NUMBER     '1'           (1, 8) (1, 9)
+    """)
+
+        self.check_tokenize('foo.async', """\
+    NAME       'foo'         (1, 0) (1, 3)
+    DOT        '.'           (1, 3) (1, 4)
+    NAME       'async'       (1, 4) (1, 9)
+    """)
+
+        self.check_tokenize('async for a in b: pass', """\
+    NAME       'async'       (1, 0) (1, 5)
+    NAME       'for'         (1, 6) (1, 9)
+    NAME       'a'           (1, 10) (1, 11)
+    NAME       'in'          (1, 12) (1, 14)
+    NAME       'b'           (1, 15) (1, 16)
+    COLON      ':'           (1, 16) (1, 17)
+    NAME       'pass'        (1, 18) (1, 22)
+    """)
+
+        self.check_tokenize('async with a as b: pass', """\
+    NAME       'async'       (1, 0) (1, 5)
+    NAME       'with'        (1, 6) (1, 10)
+    NAME       'a'           (1, 11) (1, 12)
+    NAME       'as'          (1, 13) (1, 15)
+    NAME       'b'           (1, 16) (1, 17)
+    COLON      ':'           (1, 17) (1, 18)
+    NAME       'pass'        (1, 19) (1, 23)
+    """)
+
+        self.check_tokenize('async.foo', """\
+    NAME       'async'       (1, 0) (1, 5)
+    DOT        '.'           (1, 5) (1, 6)
+    NAME       'foo'         (1, 6) (1, 9)
+    """)
+
+        self.check_tokenize('async', """\
+    NAME       'async'       (1, 0) (1, 5)
+    """)
+
+        self.check_tokenize('async\n#comment\nawait', """\
+    NAME       'async'       (1, 0) (1, 5)
+    NEWLINE    ''            (1, 5) (1, 5)
+    NAME       'await'       (3, 0) (3, 5)
+    """)
+
+        self.check_tokenize('async\n...\nawait', """\
+    NAME       'async'       (1, 0) (1, 5)
+    NEWLINE    ''            (1, 5) (1, 5)
+    ELLIPSIS   '...'         (2, 0) (2, 3)
+    NEWLINE    ''            (2, 3) (2, 3)
+    NAME       'await'       (3, 0) (3, 5)
+    """)
+
+        self.check_tokenize('async\nawait', """\
+    NAME       'async'       (1, 0) (1, 5)
+    NEWLINE    ''            (1, 5) (1, 5)
+    NAME       'await'       (2, 0) (2, 5)
+    """)
+
+        self.check_tokenize('foo.async + 1', """\
+    NAME       'foo'         (1, 0) (1, 3)
+    DOT        '.'           (1, 3) (1, 4)
+    NAME       'async'       (1, 4) (1, 9)
+    PLUS       '+'           (1, 10) (1, 11)
+    NUMBER     '1'           (1, 12) (1, 13)
+    """)
+
+        self.check_tokenize('async def foo(): pass', """\
+    NAME       'async'       (1, 0) (1, 5)
+    NAME       'def'         (1, 6) (1, 9)
+    NAME       'foo'         (1, 10) (1, 13)
+    LPAR       '('           (1, 13) (1, 14)
+    RPAR       ')'           (1, 14) (1, 15)
+    COLON      ':'           (1, 15) (1, 16)
+    NAME       'pass'        (1, 17) (1, 21)
+    """)
+
+        self.check_tokenize('''\
+async def foo():
+  def foo(await):
+    await = 1
+  if 1:
+    await
+async += 1
+''', """\
+    NAME       'async'       (1, 0) (1, 5)
+    NAME       'def'         (1, 6) (1, 9)
+    NAME       'foo'         (1, 10) (1, 13)
+    LPAR       '('           (1, 13) (1, 14)
+    RPAR       ')'           (1, 14) (1, 15)
+    COLON      ':'           (1, 15) (1, 16)
+    NEWLINE    ''            (1, 16) (1, 16)
+    INDENT     ''            (2, -1) (2, -1)
+    NAME       'def'         (2, 2) (2, 5)
+    NAME       'foo'         (2, 6) (2, 9)
+    LPAR       '('           (2, 9) (2, 10)
+    NAME       'await'       (2, 10) (2, 15)
+    RPAR       ')'           (2, 15) (2, 16)
+    COLON      ':'           (2, 16) (2, 17)
+    NEWLINE    ''            (2, 17) (2, 17)
+    INDENT     ''            (3, -1) (3, -1)
+    NAME       'await'       (3, 4) (3, 9)
+    EQUAL      '='           (3, 10) (3, 11)
+    NUMBER     '1'           (3, 12) (3, 13)
+    NEWLINE    ''            (3, 13) (3, 13)
+    DEDENT     ''            (4, -1) (4, -1)
+    NAME       'if'          (4, 2) (4, 4)
+    NUMBER     '1'           (4, 5) (4, 6)
+    COLON      ':'           (4, 6) (4, 7)
+    NEWLINE    ''            (4, 7) (4, 7)
+    INDENT     ''            (5, -1) (5, -1)
+    NAME       'await'       (5, 4) (5, 9)
+    NEWLINE    ''            (5, 9) (5, 9)
+    DEDENT     ''            (6, -1) (6, -1)
+    DEDENT     ''            (6, -1) (6, -1)
+    NAME       'async'       (6, 0) (6, 5)
+    PLUSEQUAL  '+='          (6, 6) (6, 8)
+    NUMBER     '1'           (6, 9) (6, 10)
+    NEWLINE    ''            (6, 10) (6, 10)
+    """)
+
+        self.check_tokenize('async def foo():\n  async for i in 1: pass', """\
+    NAME       'async'       (1, 0) (1, 5)
+    NAME       'def'         (1, 6) (1, 9)
+    NAME       'foo'         (1, 10) (1, 13)
+    LPAR       '('           (1, 13) (1, 14)
+    RPAR       ')'           (1, 14) (1, 15)
+    COLON      ':'           (1, 15) (1, 16)
+    NEWLINE    ''            (1, 16) (1, 16)
+    INDENT     ''            (2, -1) (2, -1)
+    NAME       'async'       (2, 2) (2, 7)
+    NAME       'for'         (2, 8) (2, 11)
+    NAME       'i'           (2, 12) (2, 13)
+    NAME       'in'          (2, 14) (2, 16)
+    NUMBER     '1'           (2, 17) (2, 18)
+    COLON      ':'           (2, 18) (2, 19)
+    NAME       'pass'        (2, 20) (2, 24)
+    DEDENT     ''            (2, -1) (2, -1)
+    """)
+
+        self.check_tokenize('async def foo(async): await', """\
+    NAME       'async'       (1, 0) (1, 5)
+    NAME       'def'         (1, 6) (1, 9)
+    NAME       'foo'         (1, 10) (1, 13)
+    LPAR       '('           (1, 13) (1, 14)
+    NAME       'async'       (1, 14) (1, 19)
+    RPAR       ')'           (1, 19) (1, 20)
+    COLON      ':'           (1, 20) (1, 21)
+    NAME       'await'       (1, 22) (1, 27)
+    """)
+
+        self.check_tokenize('''\
+def f():
+
+  def baz(): pass
+  async def bar(): pass
+
+  await = 2''', """\
+    NAME       'def'         (1, 0) (1, 3)
+    NAME       'f'           (1, 4) (1, 5)
+    LPAR       '('           (1, 5) (1, 6)
+    RPAR       ')'           (1, 6) (1, 7)
+    COLON      ':'           (1, 7) (1, 8)
+    NEWLINE    ''            (1, 8) (1, 8)
+    INDENT     ''            (3, -1) (3, -1)
+    NAME       'def'         (3, 2) (3, 5)
+    NAME       'baz'         (3, 6) (3, 9)
+    LPAR       '('           (3, 9) (3, 10)
+    RPAR       ')'           (3, 10) (3, 11)
+    COLON      ':'           (3, 11) (3, 12)
+    NAME       'pass'        (3, 13) (3, 17)
+    NEWLINE    ''            (3, 17) (3, 17)
+    NAME       'async'       (4, 2) (4, 7)
+    NAME       'def'         (4, 8) (4, 11)
+    NAME       'bar'         (4, 12) (4, 15)
+    LPAR       '('           (4, 15) (4, 16)
+    RPAR       ')'           (4, 16) (4, 17)
+    COLON      ':'           (4, 17) (4, 18)
+    NAME       'pass'        (4, 19) (4, 23)
+    NEWLINE    ''            (4, 23) (4, 23)
+    NAME       'await'       (6, 2) (6, 7)
+    EQUAL      '='           (6, 8) (6, 9)
+    NUMBER     '2'           (6, 10) (6, 11)
+    DEDENT     ''            (6, -1) (6, -1)
+    """)
+
+        self.check_tokenize('''\
+async def f():
+
+  def baz(): pass
+  async def bar(): pass
+
+  await = 2''', """\
+    NAME       'async'       (1, 0) (1, 5)
+    NAME       'def'         (1, 6) (1, 9)
+    NAME       'f'           (1, 10) (1, 11)
+    LPAR       '('           (1, 11) (1, 12)
+    RPAR       ')'           (1, 12) (1, 13)
+    COLON      ':'           (1, 13) (1, 14)
+    NEWLINE    ''            (1, 14) (1, 14)
+    INDENT     ''            (3, -1) (3, -1)
+    NAME       'def'         (3, 2) (3, 5)
+    NAME       'baz'         (3, 6) (3, 9)
+    LPAR       '('           (3, 9) (3, 10)
+    RPAR       ')'           (3, 10) (3, 11)
+    COLON      ':'           (3, 11) (3, 12)
+    NAME       'pass'        (3, 13) (3, 17)
+    NEWLINE    ''            (3, 17) (3, 17)
+    NAME       'async'       (4, 2) (4, 7)
+    NAME       'def'         (4, 8) (4, 11)
+    NAME       'bar'         (4, 12) (4, 15)
+    LPAR       '('           (4, 15) (4, 16)
+    RPAR       ')'           (4, 16) (4, 17)
+    COLON      ':'           (4, 17) (4, 18)
+    NAME       'pass'        (4, 19) (4, 23)
+    NEWLINE    ''            (4, 23) (4, 23)
+    NAME       'await'       (6, 2) (6, 7)
+    EQUAL      '='           (6, 8) (6, 9)
+    NUMBER     '2'           (6, 10) (6, 11)
+    DEDENT     ''            (6, -1) (6, -1)
+    """)
+
+    @unittest.expectedFailure # TODO: RUSTPYTHON
+    def test_unicode(self):
+
+        self.check_tokenize("Örter = u'places'\ngrün = U'green'", """\
+    NAME       'Örter'       (1, 0) (1, 5)
+    EQUAL      '='           (1, 6) (1, 7)
+    STRING     "u'places'"   (1, 8) (1, 17)
+    NEWLINE    ''            (1, 17) (1, 17)
+    NAME       'grün'        (2, 0) (2, 4)
+    EQUAL      '='           (2, 5) (2, 6)
+    STRING     "U'green'"    (2, 7) (2, 15)
+    """)
+
+    @unittest.expectedFailure # TODO: RUSTPYTHON
+    def test_invalid_syntax(self):
+        def get_tokens(string):
+            the_string = StringIO(string)
+            return list(tokenize._generate_tokens_from_c_tokenizer(the_string.readline))
+
+        for case in [
+            "(1+2]",
+            "(1+2}",
+            "{1+2]",
+            "1_",
+            "1.2_",
+            "1e2_",
+            "1e+",
+
+            "\xa0",
+            "€",
+            "0b12",
+            "0b1_2",
+            "0b2",
+            "0b1_",
+            "0b",
+            "0o18",
+            "0o1_8",
+            "0o8",
+            "0o1_",
+            "0o",
+            "0x1_",
+            "0x",
+            "1_",
+            "012",
+            "1.2_",
+            "1e2_",
+            "1e+",
+            "'sdfsdf",
+            "'''sdfsdf''",
+            "("*1000+"a"+")"*1000,
+            "]",
+            """\
+            f'__{
+                x:d
+            }__'""",
+        ]:
+            with self.subTest(case=case):
+                self.assertRaises(tokenize.TokenError, get_tokens, case)
+
+    @unittest.expectedFailure # TODO: RUSTPYTHON
+    def test_max_indent(self):
+        MAXINDENT = 100
+
+        def generate_source(indents):
+            source = ''.join(('  ' * x) + 'if True:\n' for x in range(indents))
+            source += '  ' * indents + 'pass\n'
+            return source
+
+        valid = generate_source(MAXINDENT - 1)
+        the_input = StringIO(valid)
+        tokens = list(tokenize._generate_tokens_from_c_tokenizer(the_input.readline))
+        self.assertEqual(tokens[-2].type, tokenize.DEDENT)
+        self.assertEqual(tokens[-1].type, tokenize.ENDMARKER)
+        compile(valid, "<string>", "exec")
+
+        invalid = generate_source(MAXINDENT)
+        the_input = StringIO(invalid)
+        self.assertRaises(IndentationError, lambda: list(tokenize._generate_tokens_from_c_tokenizer(the_input.readline)))
+        self.assertRaises(
+            IndentationError, compile, invalid, "<string>", "exec"
+        )
+
+    @unittest.expectedFailure # TODO: RUSTPYTHON
+    def test_continuation_lines_indentation(self):
+        def get_tokens(string):
+            the_string = StringIO(string)
+            return [(kind, string) for (kind, string, *_)
+                    in tokenize._generate_tokens_from_c_tokenizer(the_string.readline)]
+
+        code = dedent("""
+            def fib(n):
+                \\
+            '''Print a Fibonacci series up to n.'''
+                \\
+            a, b = 0, 1
+        """)
+
+        self.check_tokenize(code, """\
+    NAME       'def'         (2, 0) (2, 3)
+    NAME       'fib'         (2, 4) (2, 7)
+    LPAR       '('           (2, 7) (2, 8)
+    NAME       'n'           (2, 8) (2, 9)
+    RPAR       ')'           (2, 9) (2, 10)
+    COLON      ':'           (2, 10) (2, 11)
+    NEWLINE    ''            (2, 11) (2, 11)
+    INDENT     ''            (4, -1) (4, -1)
+    STRING     "'''Print a Fibonacci series up to n.'''" (4, 0) (4, 39)
+    NEWLINE    ''            (4, 39) (4, 39)
+    NAME       'a'           (6, 0) (6, 1)
+    COMMA      ','           (6, 1) (6, 2)
+    NAME       'b'           (6, 3) (6, 4)
+    EQUAL      '='           (6, 5) (6, 6)
+    NUMBER     '0'           (6, 7) (6, 8)
+    COMMA      ','           (6, 8) (6, 9)
+    NUMBER     '1'           (6, 10) (6, 11)
+    NEWLINE    ''            (6, 11) (6, 11)
+    DEDENT     ''            (6, -1) (6, -1)
+        """)
+
+        code_no_cont = dedent("""
+            def fib(n):
+                '''Print a Fibonacci series up to n.'''
+                a, b = 0, 1
+        """)
+
+        self.assertEqual(get_tokens(code), get_tokens(code_no_cont))
+
+        code = dedent("""
+            pass
+                \\
+
+            pass
+        """)
+
+        self.check_tokenize(code, """\
+    NAME       'pass'        (2, 0) (2, 4)
+    NEWLINE    ''            (2, 4) (2, 4)
+    NAME       'pass'        (5, 0) (5, 4)
+    NEWLINE    ''            (5, 4) (5, 4)
+        """)
+
+        code_no_cont = dedent("""
+            pass
+            pass
+        """)
+
+        self.assertEqual(get_tokens(code), get_tokens(code_no_cont))
+
+        code = dedent("""
+            if x:
+                y = 1
+                \\
+                        \\
+                    \\
+                \\
+                foo = 1
+        """)
+
+        self.check_tokenize(code, """\
+    NAME       'if'          (2, 0) (2, 2)
+    NAME       'x'           (2, 3) (2, 4)
+    COLON      ':'           (2, 4) (2, 5)
+    NEWLINE    ''            (2, 5) (2, 5)
+    INDENT     ''            (3, -1) (3, -1)
+    NAME       'y'           (3, 4) (3, 5)
+    EQUAL      '='           (3, 6) (3, 7)
+    NUMBER     '1'           (3, 8) (3, 9)
+    NEWLINE    ''            (3, 9) (3, 9)
+    NAME       'foo'         (8, 4) (8, 7)
+    EQUAL      '='           (8, 8) (8, 9)
+    NUMBER     '1'           (8, 10) (8, 11)
+    NEWLINE    ''            (8, 11) (8, 11)
+    DEDENT     ''            (8, -1) (8, -1)
+        """)
+
+        code_no_cont = dedent("""
+            if x:
+                y = 1
+                foo = 1
+        """)
+
+        self.assertEqual(get_tokens(code), get_tokens(code_no_cont))
+
+
+class CTokenizerBufferTests(unittest.TestCase):
+    def test_newline_at_the_end_of_buffer(self):
+        # See issue 99581: Make sure that if we need to add a new line at the
+        # end of the buffer, we have enough space in the buffer, specially when
+        # the current line is as long as the buffer space available.
+        test_script = f"""\
+        #coding: latin-1
+        #{"a"*10000}
+        #{"a"*10002}"""
+        with os_helper.temp_dir() as temp_dir:
+            file_name = make_script(temp_dir, 'foo', test_script)
+            run_test_script(file_name)
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/Lib/test/test_unittest/testmock/testhelpers.py b/Lib/test/test_unittest/testmock/testhelpers.py
index 040e28517f..5facac685f 100644
--- a/Lib/test/test_unittest/testmock/testhelpers.py
+++ b/Lib/test/test_unittest/testmock/testhelpers.py
@@ -930,8 +930,6 @@ def check_data_descriptor(mock_attr):
         check_data_descriptor(foo.desc)
 
 
-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
     def test_autospec_on_bound_builtin_function(self):
         meth = types.MethodType(time.ctime, time.time())
         self.assertIsInstance(meth(), str)
diff --git a/Lib/test/tokenizedata/__init__.py b/Lib/test/tokenizedata/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/Lib/test/tokenizedata/bad_coding.py b/Lib/test/tokenizedata/bad_coding.py
new file mode 100644
index 0000000000..971b0a8f3d
--- /dev/null
+++ b/Lib/test/tokenizedata/bad_coding.py
@@ -0,0 +1 @@
+# -*- coding: uft-8 -*-
diff --git a/Lib/test/tokenizedata/bad_coding2.py b/Lib/test/tokenizedata/bad_coding2.py
new file mode 100644
index 0000000000..bb2bb7e1e7
--- /dev/null
+++ b/Lib/test/tokenizedata/bad_coding2.py
@@ -0,0 +1,2 @@
+﻿#coding: utf8
+print('我')
diff --git a/Lib/test/tokenizedata/badsyntax_3131.py b/Lib/test/tokenizedata/badsyntax_3131.py
new file mode 100644
index 0000000000..901d3744ca
--- /dev/null
+++ b/Lib/test/tokenizedata/badsyntax_3131.py
@@ -0,0 +1,2 @@
+# -*- coding: utf-8 -*-
+€ = 2
diff --git a/Lib/test/tokenizedata/badsyntax_pep3120.py b/Lib/test/tokenizedata/badsyntax_pep3120.py
new file mode 100644
index 0000000000..d14b4c96ed
--- /dev/null
+++ b/Lib/test/tokenizedata/badsyntax_pep3120.py
@@ -0,0 +1 @@
+print("b�se")
diff --git a/Lib/test/tokenizedata/coding20731.py b/Lib/test/tokenizedata/coding20731.py
new file mode 100644
index 0000000000..b0e227ad11
--- /dev/null
+++ b/Lib/test/tokenizedata/coding20731.py
@@ -0,0 +1,4 @@
+#coding:latin1
+
+
+
diff --git a/Lib/test/tokenize_tests-latin1-coding-cookie-and-utf8-bom-sig.txt b/Lib/test/tokenizedata/tokenize_tests-latin1-coding-cookie-and-utf8-bom-sig.txt
similarity index 100%
rename from Lib/test/tokenize_tests-latin1-coding-cookie-and-utf8-bom-sig.txt
rename to Lib/test/tokenizedata/tokenize_tests-latin1-coding-cookie-and-utf8-bom-sig.txt
diff --git a/Lib/test/tokenize_tests-no-coding-cookie-and-utf8-bom-sig-only.txt b/Lib/test/tokenizedata/tokenize_tests-no-coding-cookie-and-utf8-bom-sig-only.txt
similarity index 100%
rename from Lib/test/tokenize_tests-no-coding-cookie-and-utf8-bom-sig-only.txt
rename to Lib/test/tokenizedata/tokenize_tests-no-coding-cookie-and-utf8-bom-sig-only.txt
diff --git a/Lib/test/tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt b/Lib/test/tokenizedata/tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt
similarity index 100%
rename from Lib/test/tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt
rename to Lib/test/tokenizedata/tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt
diff --git a/Lib/test/tokenize_tests-utf8-coding-cookie-and-utf8-bom-sig.txt b/Lib/test/tokenizedata/tokenize_tests-utf8-coding-cookie-and-utf8-bom-sig.txt
similarity index 100%
rename from Lib/test/tokenize_tests-utf8-coding-cookie-and-utf8-bom-sig.txt
rename to Lib/test/tokenizedata/tokenize_tests-utf8-coding-cookie-and-utf8-bom-sig.txt
diff --git a/Lib/test/tokenize_tests.txt b/Lib/test/tokenizedata/tokenize_tests.txt
similarity index 100%
rename from Lib/test/tokenize_tests.txt
rename to Lib/test/tokenizedata/tokenize_tests.txt
diff --git a/Lib/token.py b/Lib/token.py
index 493bf04265..54d7cdccad 100644
--- a/Lib/token.py
+++ b/Lib/token.py
@@ -1,7 +1,8 @@
 """Token constants."""
-# Auto-generated by Tools/scripts/generate_token.py
+# Auto-generated by Tools/build/generate_token.py
 
-__all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF']
+__all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF',
+           'EXACT_TOKEN_TYPES']
 
 ENDMARKER = 0
 NAME = 1
@@ -57,17 +58,20 @@
 RARROW = 51
 ELLIPSIS = 52
 COLONEQUAL = 53
-OP = 54
-AWAIT = 55
-ASYNC = 56
-TYPE_IGNORE = 57
-TYPE_COMMENT = 58
+EXCLAMATION = 54
+OP = 55
+TYPE_IGNORE = 56
+TYPE_COMMENT = 57
+SOFT_KEYWORD = 58
+FSTRING_START = 59
+FSTRING_MIDDLE = 60
+FSTRING_END = 61
+COMMENT = 62
+NL = 63
 # These aren't used by the C tokenizer but are needed for tokenize.py
-ERRORTOKEN = 59
-COMMENT = 60
-NL = 61
-ENCODING = 62
-N_TOKENS = 63
+ERRORTOKEN = 64
+ENCODING = 65
+N_TOKENS = 66
 # Special definitions for cooperation with parser
 NT_OFFSET = 256
 
@@ -77,6 +81,7 @@
 __all__.extend(tok_name.values())
 
 EXACT_TOKEN_TYPES = {
+    '!': EXCLAMATION,
     '!=': NOTEQUAL,
     '%': PERCENT,
     '%=': PERCENTEQUAL,
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
index d72968e425..7ca552c4fc 100644
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -24,10 +24,7 @@
 __credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, '
                'Skip Montanaro, Raymond Hettinger, Trent Nelson, '
                'Michael Foord')
-try:
-    from builtins import open as _builtin_open
-except ImportError:
-    pass
+from builtins import open as _builtin_open
 from codecs import lookup, BOM_UTF8
 import collections
 import functools
@@ -37,13 +34,14 @@
 import sys
 from token import *
 from token import EXACT_TOKEN_TYPES
+import _tokenize
 
 cookie_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII)
 blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII)
 
 import token
 __all__ = token.__all__ + ["tokenize", "generate_tokens", "detect_encoding",
-                           "untokenize", "TokenInfo"]
+                           "untokenize", "TokenInfo", "open", "TokenError"]
 del token
 
 class TokenInfo(collections.namedtuple('TokenInfo', 'type string start end line')):
@@ -146,6 +144,7 @@ def _compile(expr):
     endpats[_prefix + '"'] = Double
     endpats[_prefix + "'''"] = Single3
     endpats[_prefix + '"""'] = Double3
+del _prefix
 
 # A set of all of the single and triple quoted string prefixes,
 #  including the opening quotes.
@@ -156,13 +155,12 @@ def _compile(expr):
         single_quoted.add(u)
     for u in (t + '"""', t + "'''"):
         triple_quoted.add(u)
+del t, u
 
 tabsize = 8
 
 class TokenError(Exception): pass
 
-class StopTokenizing(Exception): pass
-
 
 class Untokenizer:
 
@@ -170,6 +168,8 @@ def __init__(self):
         self.tokens = []
         self.prev_row = 1
         self.prev_col = 0
+        self.prev_type = None
+        self.prev_line = ""
         self.encoding = None
 
     def add_whitespace(self, start):
@@ -177,14 +177,51 @@ def add_whitespace(self, start):
         if row < self.prev_row or row == self.prev_row and col < self.prev_col:
             raise ValueError("start ({},{}) precedes previous end ({},{})"
                              .format(row, col, self.prev_row, self.prev_col))
-        row_offset = row - self.prev_row
-        if row_offset:
-            self.tokens.append("\\\n" * row_offset)
-            self.prev_col = 0
+        self.add_backslash_continuation(start)
         col_offset = col - self.prev_col
         if col_offset:
             self.tokens.append(" " * col_offset)
 
+    def add_backslash_continuation(self, start):
+        """Add backslash continuation characters if the row has increased
+        without encountering a newline token.
+
+        This also inserts the correct amount of whitespace before the backslash.
+        """
+        row = start[0]
+        row_offset = row - self.prev_row
+        if row_offset == 0:
+            return
+
+        newline = '\r\n' if self.prev_line.endswith('\r\n') else '\n'
+        line = self.prev_line.rstrip('\\\r\n')
+        ws = ''.join(_itertools.takewhile(str.isspace, reversed(line)))
+        self.tokens.append(ws + f"\\{newline}" * row_offset)
+        self.prev_col = 0
+
+    def escape_brackets(self, token):
+        characters = []
+        consume_until_next_bracket = False
+        for character in token:
+            if character == "}":
+                if consume_until_next_bracket:
+                    consume_until_next_bracket = False
+                else:
+                    characters.append(character)
+            if character == "{":
+                n_backslashes = sum(
+                    1 for char in _itertools.takewhile(
+                        "\\".__eq__,
+                        characters[-2::-1]
+                    )
+                )
+                if n_backslashes % 2 == 0 or characters[-1] != "N":
+                    characters.append(character)
+                else:
+                    consume_until_next_bracket = True
+            characters.append(character)
+        return "".join(characters)
+
     def untokenize(self, iterable):
         it = iter(iterable)
         indents = []
@@ -214,12 +251,22 @@ def untokenize(self, iterable):
                     self.tokens.append(indent)
                     self.prev_col = len(indent)
                 startline = False
+            elif tok_type == FSTRING_MIDDLE:
+                if '{' in token or '}' in token:
+                    token = self.escape_brackets(token)
+                    last_line = token.splitlines()[-1]
+                    end_line, end_col = end
+                    extra_chars = last_line.count("{{") + last_line.count("}}")
+                    end = (end_line, end_col + extra_chars)
+
             self.add_whitespace(start)
             self.tokens.append(token)
             self.prev_row, self.prev_col = end
             if tok_type in (NEWLINE, NL):
                 self.prev_row += 1
                 self.prev_col = 0
+            self.prev_type = tok_type
+            self.prev_line = line
         return "".join(self.tokens)
 
     def compat(self, token, iterable):
@@ -227,6 +274,7 @@ def compat(self, token, iterable):
         toks_append = self.tokens.append
         startline = token[0] in (NEWLINE, NL)
         prevstring = False
+        in_fstring = 0
 
         for tok in _itertools.chain([token], iterable):
             toknum, tokval = tok[:2]
@@ -245,6 +293,10 @@ def compat(self, token, iterable):
             else:
                 prevstring = False
 
+            if toknum == FSTRING_START:
+                in_fstring += 1
+            elif toknum == FSTRING_END:
+                in_fstring -= 1
             if toknum == INDENT:
                 indents.append(tokval)
                 continue
@@ -256,7 +308,19 @@ def compat(self, token, iterable):
             elif startline and indents:
                 toks_append(indents[-1])
                 startline = False
+            elif toknum == FSTRING_MIDDLE:
+                tokval = self.escape_brackets(tokval)
+
+            # Insert a space between two consecutive brackets if we are in an f-string
+            if tokval in {"{", "}"} and self.tokens and self.tokens[-1] == tokval and in_fstring:
+                tokval = ' ' + tokval
+
+            # Insert a space between two consecutive f-strings
+            if toknum in (STRING, FSTRING_START) and self.prev_type in (STRING, FSTRING_END):
+                self.tokens.append(" ")
+
             toks_append(tokval)
+            self.prev_type = toknum
 
 
 def untokenize(iterable):
@@ -268,16 +332,10 @@ def untokenize(iterable):
     with at least two elements, a token number and token value.  If
     only two tokens are passed, the resulting output is poor.
 
-    Round-trip invariant for full input:
-        Untokenized source will match input source exactly
-
-    Round-trip invariant for limited input:
-        # Output bytes will tokenize back to the input
-        t1 = [tok[:2] for tok in tokenize(f.readline)]
-        newcode = untokenize(t1)
-        readline = BytesIO(newcode).readline
-        t2 = [tok[:2] for tok in tokenize(readline)]
-        assert t1 == t2
+    The result is guaranteed to tokenize back to match the input so
+    that the conversion is lossless and round-trips are assured.
+    The guarantee applies only to the token type and token string as
+    the spacing between tokens (column positions) may change.
     """
     ut = Untokenizer()
     out = ut.untokenize(iterable)
@@ -287,7 +345,7 @@ def untokenize(iterable):
 
 
 def _get_normal_name(orig_enc):
-    """Imitates get_normal_name in tokenizer.c."""
+    """Imitates get_normal_name in Parser/tokenizer/helpers.c."""
     # Only care about the first 12 characters.
     enc = orig_enc[:12].lower().replace("_", "-")
     if enc == "utf-8" or enc.startswith("utf-8-"):
@@ -405,7 +463,6 @@ def open(filename):
         buffer.close()
         raise
 
-
 def tokenize(readline):
     """
     The tokenize() generator requires one argument, readline, which
@@ -426,193 +483,13 @@ def tokenize(readline):
     which tells you which encoding was used to decode the bytes stream.
     """
     encoding, consumed = detect_encoding(readline)
-    empty = _itertools.repeat(b"")
-    rl_gen = _itertools.chain(consumed, iter(readline, b""), empty)
-    return _tokenize(rl_gen.__next__, encoding)
-
-
-def _tokenize(readline, encoding):
-    lnum = parenlev = continued = 0
-    numchars = '0123456789'
-    contstr, needcont = '', 0
-    contline = None
-    indents = [0]
-
+    rl_gen = _itertools.chain(consumed, iter(readline, b""))
     if encoding is not None:
         if encoding == "utf-8-sig":
             # BOM will already have been stripped.
             encoding = "utf-8"
         yield TokenInfo(ENCODING, encoding, (0, 0), (0, 0), '')
-    last_line = b''
-    line = b''
-    while True:                                # loop over lines in stream
-        try:
-            # We capture the value of the line variable here because
-            # readline uses the empty string '' to signal end of input,
-            # hence `line` itself will always be overwritten at the end
-            # of this loop.
-            last_line = line
-            line = readline()
-        except StopIteration:
-            line = b''
-
-        if encoding is not None:
-            line = line.decode(encoding)
-        lnum += 1
-        pos, max = 0, len(line)
-
-        if contstr:                            # continued string
-            if not line:
-                raise TokenError("EOF in multi-line string", strstart)
-            endmatch = endprog.match(line)
-            if endmatch:
-                pos = end = endmatch.end(0)
-                yield TokenInfo(STRING, contstr + line[:end],
-                       strstart, (lnum, end), contline + line)
-                contstr, needcont = '', 0
-                contline = None
-            elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':
-                yield TokenInfo(ERRORTOKEN, contstr + line,
-                           strstart, (lnum, len(line)), contline)
-                contstr = ''
-                contline = None
-                continue
-            else:
-                contstr = contstr + line
-                contline = contline + line
-                continue
-
-        elif parenlev == 0 and not continued:  # new statement
-            if not line: break
-            column = 0
-            while pos < max:                   # measure leading whitespace
-                if line[pos] == ' ':
-                    column += 1
-                elif line[pos] == '\t':
-                    column = (column//tabsize + 1)*tabsize
-                elif line[pos] == '\f':
-                    column = 0
-                else:
-                    break
-                pos += 1
-            if pos == max:
-                break
-
-            if line[pos] in '#\r\n':           # skip comments or blank lines
-                if line[pos] == '#':
-                    comment_token = line[pos:].rstrip('\r\n')
-                    yield TokenInfo(COMMENT, comment_token,
-                           (lnum, pos), (lnum, pos + len(comment_token)), line)
-                    pos += len(comment_token)
-
-                yield TokenInfo(NL, line[pos:],
-                           (lnum, pos), (lnum, len(line)), line)
-                continue
-
-            if column > indents[-1]:           # count indents or dedents
-                indents.append(column)
-                yield TokenInfo(INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
-            while column < indents[-1]:
-                if column not in indents:
-                    raise IndentationError(
-                        "unindent does not match any outer indentation level",
-                        ("<tokenize>", lnum, pos, line))
-                indents = indents[:-1]
-
-                yield TokenInfo(DEDENT, '', (lnum, pos), (lnum, pos), line)
-
-        else:                                  # continued statement
-            if not line:
-                raise TokenError("EOF in multi-line statement", (lnum, 0))
-            continued = 0
-
-        while pos < max:
-            pseudomatch = _compile(PseudoToken).match(line, pos)
-            if pseudomatch:                                # scan for tokens
-                start, end = pseudomatch.span(1)
-                spos, epos, pos = (lnum, start), (lnum, end), end
-                if start == end:
-                    continue
-                token, initial = line[start:end], line[start]
-
-                if (initial in numchars or                 # ordinary number
-                    (initial == '.' and token != '.' and token != '...')):
-                    yield TokenInfo(NUMBER, token, spos, epos, line)
-                elif initial in '\r\n':
-                    if parenlev > 0:
-                        yield TokenInfo(NL, token, spos, epos, line)
-                    else:
-                        yield TokenInfo(NEWLINE, token, spos, epos, line)
-
-                elif initial == '#':
-                    assert not token.endswith("\n")
-                    yield TokenInfo(COMMENT, token, spos, epos, line)
-
-                elif token in triple_quoted:
-                    endprog = _compile(endpats[token])
-                    endmatch = endprog.match(line, pos)
-                    if endmatch:                           # all on one line
-                        pos = endmatch.end(0)
-                        token = line[start:pos]
-                        yield TokenInfo(STRING, token, spos, (lnum, pos), line)
-                    else:
-                        strstart = (lnum, start)           # multiple lines
-                        contstr = line[start:]
-                        contline = line
-                        break
-
-                # Check up to the first 3 chars of the token to see if
-                #  they're in the single_quoted set. If so, they start
-                #  a string.
-                # We're using the first 3, because we're looking for
-                #  "rb'" (for example) at the start of the token. If
-                #  we switch to longer prefixes, this needs to be
-                #  adjusted.
-                # Note that initial == token[:1].
-                # Also note that single quote checking must come after
-                #  triple quote checking (above).
-                elif (initial in single_quoted or
-                      token[:2] in single_quoted or
-                      token[:3] in single_quoted):
-                    if token[-1] == '\n':                  # continued string
-                        strstart = (lnum, start)
-                        # Again, using the first 3 chars of the
-                        #  token. This is looking for the matching end
-                        #  regex for the correct type of quote
-                        #  character. So it's really looking for
-                        #  endpats["'"] or endpats['"'], by trying to
-                        #  skip string prefix characters, if any.
-                        endprog = _compile(endpats.get(initial) or
-                                           endpats.get(token[1]) or
-                                           endpats.get(token[2]))
-                        contstr, needcont = line[start:], 1
-                        contline = line
-                        break
-                    else:                                  # ordinary string
-                        yield TokenInfo(STRING, token, spos, epos, line)
-
-                elif initial.isidentifier():               # ordinary name
-                    yield TokenInfo(NAME, token, spos, epos, line)
-                elif initial == '\\':                      # continued stmt
-                    continued = 1
-                else:
-                    if initial in '([{':
-                        parenlev += 1
-                    elif initial in ')]}':
-                        parenlev -= 1
-                    yield TokenInfo(OP, token, spos, epos, line)
-            else:
-                yield TokenInfo(ERRORTOKEN, line[pos],
-                           (lnum, pos), (lnum, pos+1), line)
-                pos += 1
-
-    # Add an implicit NEWLINE if the input doesn't end in one
-    if last_line and last_line[-1] not in '\r\n' and not last_line.strip().startswith("#"):
-        yield TokenInfo(NEWLINE, '', (lnum - 1, len(last_line)), (lnum - 1, len(last_line) + 1), '')
-    for indent in indents[1:]:                 # pop remaining indent levels
-        yield TokenInfo(DEDENT, '', (lnum, 0), (lnum, 0), '')
-    yield TokenInfo(ENDMARKER, '', (lnum, 0), (lnum, 0), '')
-
+    yield from _generate_tokens_from_c_tokenizer(rl_gen.__next__, encoding, extra_tokens=True)
 
 def generate_tokens(readline):
     """Tokenize a source reading Python code as unicode strings.
@@ -620,7 +497,7 @@ def generate_tokens(readline):
     This has the same API as tokenize(), except that it expects the *readline*
     callable to return str objects instead of bytes.
     """
-    return _tokenize(readline, None)
+    return _generate_tokens_from_c_tokenizer(readline, extra_tokens=True)
 
 def main():
     import argparse
@@ -657,7 +534,9 @@ def error(message, filename=None, location=None):
                 tokens = list(tokenize(f.readline))
         else:
             filename = "<stdin>"
-            tokens = _tokenize(sys.stdin.readline, None)
+            tokens = _generate_tokens_from_c_tokenizer(
+                sys.stdin.readline, extra_tokens=True)
+
 
         # Output the tokenization
         for token in tokens:
@@ -683,5 +562,31 @@ def error(message, filename=None, location=None):
         perror("unexpected error: %s" % err)
         raise
 
+def _transform_msg(msg):
+    """Transform error messages from the C tokenizer into the Python tokenize
+
+    The C tokenizer is more picky than the Python one, so we need to massage
+    the error messages a bit for backwards compatibility.
+    """
+    if "unterminated triple-quoted string literal" in msg:
+        return "EOF in multi-line string"
+    return msg
+
+def _generate_tokens_from_c_tokenizer(source, encoding=None, extra_tokens=False):
+    """Tokenize a source reading Python code as unicode strings using the internal C tokenizer"""
+    if encoding is None:
+        it = _tokenize.TokenizerIter(source, extra_tokens=extra_tokens)
+    else:
+        it = _tokenize.TokenizerIter(source, encoding=encoding, extra_tokens=extra_tokens)
+    try:
+        for info in it:
+            yield TokenInfo._make(info)
+    except SyntaxError as e:
+        if type(e) != SyntaxError:
+            raise e from None
+        msg = _transform_msg(e.msg)
+        raise TokenError(msg, (e.lineno, e.offset)) from None
+
+
 if __name__ == "__main__":
     main()
diff --git a/stdlib/Cargo.toml b/stdlib/Cargo.toml
index 7f64802d35..4a065bc407 100644
--- a/stdlib/Cargo.toml
+++ b/stdlib/Cargo.toml
@@ -49,6 +49,12 @@ dyn-clone = "1.0.10"
 pymath = { workspace = true }
 xml = "1.0"
 
+# tokenize
+ruff_python_ast = { workspace = true }
+ruff_python_parser = { workspace = true }
+ruff_source_file = { workspace = true }
+ruff_text_size = { workspace = true }
+
 # random
 rand_core = { workspace = true }
 mt19937 = "3.1"
diff --git a/stdlib/src/lib.rs b/stdlib/src/lib.rs
index 706ce0ef21..7c0118b177 100644
--- a/stdlib/src/lib.rs
+++ b/stdlib/src/lib.rs
@@ -50,6 +50,7 @@ mod suggestions;
 pub mod socket;
 #[cfg(all(unix, not(target_os = "redox")))]
 mod syslog;
+mod tokenize;
 mod unicodedata;
 
 mod faulthandler;
@@ -140,6 +141,7 @@ pub fn get_module_inits() -> impl Iterator<Item = (Cow<'static, str>, StdlibInit
             "_random" => random::make_module,
             "_statistics" => statistics::make_module,
             "_struct" => pystruct::make_module,
+            "_tokenize" => tokenize::make_module,
             "unicodedata" => unicodedata::make_module,
             "zlib" => zlib::make_module,
             "_statistics" => statistics::make_module,
diff --git a/stdlib/src/tokenize.rs b/stdlib/src/tokenize.rs
new file mode 100644
index 0000000000..78102fbece
--- /dev/null
+++ b/stdlib/src/tokenize.rs
@@ -0,0 +1,392 @@
+pub(crate) use _tokenize::make_module;
+
+#[pymodule]
+mod _tokenize {
+    use crate::{
+        common::lock::PyRwLock,
+        vm::{
+            AsObject, Py, PyPayload, PyResult, VirtualMachine,
+            builtins::{PyBytes, PyStr, PyStrRef, PyTypeRef},
+            convert::ToPyObject,
+            function::ArgCallable,
+            protocol::PyIterReturn,
+            types::{Constructor, IterNext, Iterable, SelfIter},
+        },
+    };
+    use ruff_python_ast::PySourceType;
+    use ruff_python_parser::{ParseError, Token, TokenKind, Tokens, parse_unchecked_source};
+    use ruff_source_file::{LineIndex, LineRanges};
+    use ruff_text_size::{Ranged, TextRange};
+    use std::{cmp::Ordering, fmt};
+
+    /// Cpython `__import__("token").OP`
+    const TOKEN_OP: u8 = 55;
+
+    #[pyattr]
+    #[pyclass(name = "TokenizerIter")]
+    #[derive(PyPayload)]
+    pub struct PyTokenizerIter {
+        readline: ArgCallable, // TODO: This should be PyObject
+        extra_tokens: bool,
+        encoding: Option<String>,
+        state: PyRwLock<PyTokenizerIterState>,
+    }
+
+    impl PyTokenizerIter {
+        fn readline(&self, vm: &VirtualMachine) -> PyResult<String> {
+            // TODO: When `readline` is PyObject,
+            // we need to check if it's callable and raise a type error if it's not.
+            let raw_line = match self.readline.invoke((), vm) {
+                Ok(v) => v,
+                Err(err) => {
+                    if err.fast_isinstance(vm.ctx.exceptions.stop_iteration) {
+                        return Ok(String::new());
+                    }
+                    return Err(err);
+                }
+            };
+            Ok(match &self.encoding {
+                Some(encoding) => {
+                    let bytes = raw_line
+                        .downcast::<PyBytes>()
+                        .map_err(|_| vm.new_type_error("readline() returned a non-bytes object"))?;
+                    vm.state
+                        .codec_registry
+                        .decode_text(bytes.into(), encoding, None, vm)
+                        .map(|s| s.as_str().to_owned())?
+                }
+                None => raw_line
+                    .downcast::<PyStr>()
+                    .map(|s| s.as_str().to_owned())
+                    .map_err(|_| vm.new_type_error("readline() returned a non-string object"))?,
+            })
+        }
+    }
+
+    impl fmt::Debug for PyTokenizerIter {
+        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+            f.debug_struct("PyTokenizerIter")
+                .field("readline", &self.readline)
+                .field("encoding", &self.encoding)
+                .field("extra_tokens", &self.extra_tokens)
+                .finish()
+        }
+    }
+
+    #[pyclass(with(Constructor, Iterable, IterNext))]
+    impl PyTokenizerIter {}
+
+    impl Constructor for PyTokenizerIter {
+        type Args = PyTokenizerIterArgs;
+
+        fn py_new(cls: PyTypeRef, args: Self::Args, vm: &VirtualMachine) -> PyResult {
+            let Self::Args {
+                readline,
+                extra_tokens,
+                encoding,
+            } = args;
+
+            Self {
+                readline,
+                extra_tokens,
+                encoding: encoding.map(|s| s.as_str().to_owned()),
+                state: PyRwLock::new(PyTokenizerIterState::default()),
+            }
+            .into_ref_with_type(vm, cls)
+            .map(Into::into)
+        }
+    }
+
+    impl SelfIter for PyTokenizerIter {}
+
+    impl IterNext for PyTokenizerIter {
+        fn next(zelf: &Py<Self>, vm: &VirtualMachine) -> PyResult<PyIterReturn> {
+            let mut state = {
+                let guard = zelf.state.read();
+                guard.clone()
+            };
+
+            if state.eof {
+                return Ok(PyIterReturn::StopIteration(None));
+            }
+
+            let token = loop {
+                // TODO: Check here for errors. Raise SyntaxError if needed
+
+                if let Some(tok) = state.next_token() {
+                    break tok;
+                }
+
+                let nline = zelf.readline(vm)?;
+                if nline.is_empty() {
+                    state.eof = true;
+                    *zelf.state.write() = state.clone();
+
+                    let line_num = &state.start().0;
+                    let out = vm
+                        .ctx
+                        .new_tuple(vec![
+                            token_kind_value(TokenKind::EndOfFile).to_pyobject(vm),
+                            vm.ctx.new_str("").into(),
+                            vm.ctx
+                                .new_tuple(vec![line_num.to_pyobject(vm), (-1).to_pyobject(vm)])
+                                .into(),
+                            vm.ctx
+                                .new_tuple(vec![line_num.to_pyobject(vm), (-1).to_pyobject(vm)])
+                                .into(),
+                            vm.ctx.new_str(state.current_line()).into(),
+                        ])
+                        .into();
+                    return Ok(PyIterReturn::Return(out));
+                }
+                state.push_line(&nline);
+            };
+
+            *zelf.state.write() = state.clone();
+
+            let token_kind = token.kind();
+            let token_value = if zelf.extra_tokens && token_kind.is_operator() {
+                TOKEN_OP
+            } else {
+                token_kind_value(token_kind)
+            };
+            let (start_x, start_y) = &state.start();
+            let (end_x, end_y) = &state.end();
+
+            let mut token_repr = &state.source[state.range()];
+            if !zelf.extra_tokens {
+                token_repr = token_repr.trim();
+            }
+
+            let out = vm
+                .ctx
+                .new_tuple(vec![
+                    token_value.to_pyobject(vm),
+                    vm.ctx.new_str(token_repr).into(),
+                    vm.ctx
+                        .new_tuple(vec![start_x.to_pyobject(vm), start_y.to_pyobject(vm)])
+                        .into(),
+                    vm.ctx
+                        .new_tuple(vec![end_x.to_pyobject(vm), end_y.to_pyobject(vm)])
+                        .into(),
+                    vm.ctx.new_str(state.current_line()).into(),
+                ])
+                .into();
+            Ok(PyIterReturn::Return(out))
+        }
+    }
+
+    #[derive(FromArgs)]
+    pub struct PyTokenizerIterArgs {
+        #[pyarg(positional)]
+        readline: ArgCallable,
+        #[pyarg(named)]
+        extra_tokens: bool,
+        #[pyarg(named, optional)]
+        encoding: Option<PyStrRef>,
+    }
+
+    #[derive(Clone, Debug)]
+    struct PyTokenizerIterState {
+        /// Source code.
+        source: String,
+        prev_token: Option<Token>,
+        /// Tokens of `source`.
+        tokens: Tokens,
+        /// Errors of `source`
+        errors: Vec<ParseError>,
+        /// LineIndex of `source`.
+        line_index: LineIndex,
+        /// Marker that says we already emitted EOF, and needs to stop iterating.
+        eof: bool,
+    }
+
+    impl PyTokenizerIterState {
+        fn push_line(&mut self, line: &str) {
+            self.source.push_str(line);
+
+            let parsed = parse_unchecked_source(&self.source, PySourceType::Python);
+            self.tokens = parsed.tokens().clone();
+            self.errors = parsed.errors().to_vec();
+            self.line_index = LineIndex::from_source_text(&self.source);
+        }
+
+        #[must_use]
+        fn current_line(&self) -> &str {
+            let (kind, range) = match self.prev_token {
+                Some(token) => token.as_tuple(),
+                None => (TokenKind::Unknown, TextRange::default()),
+            };
+
+            match kind {
+                TokenKind::Newline => self.source.full_line_str(range.start()),
+                _ => self.source.full_lines_str(range),
+            }
+        }
+
+        #[must_use]
+        fn next_token(&mut self) -> Option<Token> {
+            for token in self.tokens.iter() {
+                let (kind, range) = token.as_tuple();
+
+                if matches!(kind, TokenKind::NonLogicalNewline) {
+                    continue;
+                }
+
+                if matches!(range.ordering(self.range()), Ordering::Greater) {
+                    self.prev_token = Some(*token);
+                    return self.prev_token;
+                }
+            }
+
+            None
+        }
+
+        #[must_use]
+        fn range(&self) -> TextRange {
+            match self.prev_token {
+                Some(token) => token.range(),
+                None => TextRange::default(),
+            }
+        }
+
+        #[must_use]
+        fn start(&self) -> (usize, usize) {
+            let lc = self
+                .line_index
+                .line_column(self.range().start(), &self.source);
+            (lc.line.get(), lc.column.to_zero_indexed())
+        }
+
+        #[must_use]
+        fn end(&self) -> (usize, usize) {
+            let lc = self
+                .line_index
+                .line_column(self.range().end(), &self.source);
+            (lc.line.get(), lc.column.to_zero_indexed())
+        }
+    }
+
+    impl Default for PyTokenizerIterState {
+        fn default() -> Self {
+            const SOURCE: &str = "";
+            let parsed = parse_unchecked_source(SOURCE, PySourceType::Python);
+
+            Self {
+                source: SOURCE.to_owned(),
+                prev_token: None,
+                tokens: parsed.tokens().clone(),
+                errors: parsed.errors().to_vec(),
+                line_index: LineIndex::from_source_text(SOURCE),
+                eof: false,
+            }
+        }
+    }
+
+    const fn token_kind_value(kind: TokenKind) -> u8 {
+        match kind {
+            TokenKind::EndOfFile => 0,
+            TokenKind::Name
+            | TokenKind::For
+            | TokenKind::In
+            | TokenKind::Pass
+            | TokenKind::Class
+            | TokenKind::And
+            | TokenKind::Is
+            | TokenKind::Raise
+            | TokenKind::True
+            | TokenKind::False
+            | TokenKind::Assert
+            | TokenKind::Try
+            | TokenKind::While
+            | TokenKind::Yield
+            | TokenKind::Lambda
+            | TokenKind::None
+            | TokenKind::Not
+            | TokenKind::Or
+            | TokenKind::Break
+            | TokenKind::Continue
+            | TokenKind::Global
+            | TokenKind::Nonlocal
+            | TokenKind::Return
+            | TokenKind::Except
+            | TokenKind::Import
+            | TokenKind::Case
+            | TokenKind::Match
+            | TokenKind::Type
+            | TokenKind::Await
+            | TokenKind::With
+            | TokenKind::Del
+            | TokenKind::Finally
+            | TokenKind::From
+            | TokenKind::Def
+            | TokenKind::If
+            | TokenKind::Else
+            | TokenKind::Elif
+            | TokenKind::As
+            | TokenKind::Async => 1,
+            TokenKind::Int | TokenKind::Complex | TokenKind::Float => 2,
+            TokenKind::String => 3,
+            TokenKind::Newline | TokenKind::NonLogicalNewline => 4,
+            TokenKind::Indent => 5,
+            TokenKind::Dedent => 6,
+            TokenKind::Lpar => 7,
+            TokenKind::Rpar => 8,
+            TokenKind::Lsqb => 9,
+            TokenKind::Rsqb => 10,
+            TokenKind::Colon => 11,
+            TokenKind::Comma => 12,
+            TokenKind::Semi => 13,
+            TokenKind::Plus => 14,
+            TokenKind::Minus => 15,
+            TokenKind::Star => 16,
+            TokenKind::Slash => 17,
+            TokenKind::Vbar => 18,
+            TokenKind::Amper => 19,
+            TokenKind::Less => 20,
+            TokenKind::Greater => 21,
+            TokenKind::Equal => 22,
+            TokenKind::Dot => 23,
+            TokenKind::Percent => 24,
+            TokenKind::Lbrace => 25,
+            TokenKind::Rbrace => 26,
+            TokenKind::EqEqual => 27,
+            TokenKind::NotEqual => 28,
+            TokenKind::LessEqual => 29,
+            TokenKind::GreaterEqual => 30,
+            TokenKind::Tilde => 31,
+            TokenKind::CircumFlex => 32,
+            TokenKind::LeftShift => 33,
+            TokenKind::RightShift => 34,
+            TokenKind::DoubleStar => 35,
+            TokenKind::PlusEqual => 36,
+            TokenKind::MinusEqual => 37,
+            TokenKind::StarEqual => 38,
+            TokenKind::SlashEqual => 39,
+            TokenKind::PercentEqual => 40,
+            TokenKind::AmperEqual => 41,
+            TokenKind::VbarEqual => 42,
+            TokenKind::CircumflexEqual => 43,
+            TokenKind::LeftShiftEqual => 44,
+            TokenKind::RightShiftEqual => 45,
+            TokenKind::DoubleStarEqual => 46,
+            TokenKind::DoubleSlash => 47,
+            TokenKind::DoubleSlashEqual => 48,
+            TokenKind::At => 49,
+            TokenKind::AtEqual => 50,
+            TokenKind::Rarrow => 51,
+            TokenKind::Ellipsis => 52,
+            TokenKind::ColonEqual => 53,
+            TokenKind::Exclamation => 54,
+            TokenKind::FStringStart => 59,
+            TokenKind::FStringMiddle => 60,
+            TokenKind::FStringEnd => 61,
+            TokenKind::Comment => 62,
+            TokenKind::TStringStart => 62,  // 3.14 compatible
+            TokenKind::TStringMiddle => 63, // 3.14 compatible
+            TokenKind::TStringEnd => 64,    // 3.14 compatible
+            TokenKind::IpyEscapeCommand | TokenKind::Question => 0, // Ruff's specific
+            TokenKind::Unknown => 0,
+        }
+    }
+}