diff --git a/doc/release/upcoming_changes/28767.change.rst b/doc/release/upcoming_changes/28767.change.rst
new file mode 100644
index 000000000000..ec173c3672b0
--- /dev/null
+++ b/doc/release/upcoming_changes/28767.change.rst
@@ -0,0 +1,10 @@
+``unique_values`` for string dtypes may return unsorted data
+------------------------------------------------------------
+np.unique now supports hash‐based duplicate removal for string dtypes.
+This enhancement extends the hash-table algorithm to byte strings ('S'),
+Unicode strings ('U'), and the experimental string dtype ('T', StringDType).
+As a result, calling np.unique() on an array of strings will use
+the faster hash-based method to obtain unique values.
+Note that this hash-based method does not guarantee that the returned unique values will be sorted.
+This also works for StringDType arrays containing None (missing values)
+when using equal_nan=True (treating missing values as equal).
diff --git a/doc/release/upcoming_changes/28767.performance.rst b/doc/release/upcoming_changes/28767.performance.rst
new file mode 100644
index 000000000000..ef8ac1c3a45d
--- /dev/null
+++ b/doc/release/upcoming_changes/28767.performance.rst
@@ -0,0 +1,10 @@
+Performance improvements to ``np.unique`` for string dtypes
+-----------------------------------------------------------
+The hash-based algorithm for unique extraction provides
+an order-of-magnitude speedup on large string arrays. 
+In an internal benchmark with about 1 billion string elements,
+the hash-based np.unique completed in roughly 33.5 seconds,
+compared to 498 seconds with the sort-based method
+– about 15× faster for unsorted unique operations on strings.
+This improvement greatly reduces the time to find unique values
+in very large string datasets.
diff --git a/numpy/_core/meson.build b/numpy/_core/meson.build
index a4d2050122c6..4f004dc3ea1f 100644
--- a/numpy/_core/meson.build
+++ b/numpy/_core/meson.build
@@ -1206,6 +1206,7 @@ src_multiarray = multiarray_gen_headers + [
   # Remove this `arm64_exports.c` file once scipy macos arm64 build correctly
   # links to the arm64 npymath library, see gh-22673
   'src/npymath/arm64_exports.c',
+  'src/multiarray/fnv.c',
 ]
 
 src_umath = umath_gen_headers + [
diff --git a/numpy/_core/src/multiarray/fnv.c b/numpy/_core/src/multiarray/fnv.c
new file mode 100644
index 000000000000..2b7848519e61
--- /dev/null
+++ b/numpy/_core/src/multiarray/fnv.c
@@ -0,0 +1,85 @@
+/*
+  FNV-1a hash algorithm implementation
+  Based on the implementation from:
+  https://github.com/lcn2/fnv
+*/
+
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+#define _MULTIARRAYMODULE
+
+#include <Python.h>
+#include "numpy/npy_common.h"
+#include "fnv.h"
+
+
+#define FNV1A_32_INIT ((npy_uint32)0x811c9dc5)
+#define FNV1A_64_INIT ((npy_uint64)0xcbf29ce484222325ULL)
+
+/*
+  Compute a 32-bit FNV-1a hash of buffer
+  original implementation from:
+  https://github.com/lcn2/fnv/blob/b7fcbee95538ee6a15744e756e7e7f1c02862cb0/hash_32a.c
+*/
+npy_uint32 
+npy_fnv1a_32(const void *buf, size_t len, npy_uint32 hval)
+{
+    const unsigned char *bp = (const unsigned char *)buf;  /* start of buffer */
+    const unsigned char *be = bp + len;                    /* beyond end of buffer */
+
+    /*
+      FNV-1a hash each octet in the buffer
+    */
+    while (bp < be) {
+
+        /* xor the bottom with the current octet */
+        hval ^= (npy_uint32)*bp++;
+        
+        /* multiply by the 32 bit FNV magic prime */
+        /* hval *= 0x01000193; */
+        hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
+    }
+
+    return hval;
+}
+
+/*
+  Compute a 64-bit FNV-1a hash of the given data
+  original implementation from:
+  https://github.com/lcn2/fnv/blob/b7fcbee95538ee6a15744e756e7e7f1c02862cb0/hash_64a.c
+*/
+npy_uint64 
+npy_fnv1a_64(const void *buf, size_t len, npy_uint64 hval)
+{
+    const unsigned char *bp = (const unsigned char *)buf;  /* start of buffer */
+    const unsigned char *be = bp + len;                    /* beyond end of buffer */
+
+    /*
+      FNV-1a hash each octet in the buffer
+    */
+    while (bp < be) {
+
+        /* xor the bottom with the current octet */
+        hval ^= (npy_uint64)*bp++;
+        
+        /* multiply by the 64 bit FNV magic prime */
+        /* hval *= 0x100000001b3ULL; */
+        hval += (hval << 1) + (hval << 4) + (hval << 5) +
+		        (hval << 7) + (hval << 8) + (hval << 40);
+    }
+
+    return hval;
+}
+
+/*
+ * Compute a size_t FNV-1a hash of the given data
+ * This will use 32-bit or 64-bit hash depending on the size of size_t
+ */
+size_t 
+npy_fnv1a(const void *buf, size_t len)
+{
+#if NPY_SIZEOF_SIZE_T == 8
+    return (size_t)npy_fnv1a_64(buf, len, FNV1A_64_INIT);
+#else /* NPY_SIZEOF_SIZE_T == 4 */
+    return (size_t)npy_fnv1a_32(buf, len, FNV1A_32_INIT);
+#endif
+}
diff --git a/numpy/_core/src/multiarray/fnv.h b/numpy/_core/src/multiarray/fnv.h
new file mode 100644
index 000000000000..c76f54a645b9
--- /dev/null
+++ b/numpy/_core/src/multiarray/fnv.h
@@ -0,0 +1,26 @@
+/*
+  FNV-1a hash algorithm implementation
+  Based on the implementation from:
+  https://github.com/lcn2/fnv
+*/
+
+#ifndef NUMPY_CORE_INCLUDE_NUMPY_MULTIARRAY_FNV_H_
+#define NUMPY_CORE_INCLUDE_NUMPY_MULTIARRAY_FNV_H_
+
+
+/*
+  Compute a size_t FNV-1a hash of the given data
+  This will use 32-bit or 64-bit hash depending on the size of size_t
+
+  Parameters:
+  -----------
+  buf - pointer to the data to be hashed
+  len - length of the data in bytes
+
+  Returns:
+  -----------
+  size_t hash value
+*/
+size_t npy_fnv1a(const void *buf, size_t len);
+
+#endif  // NUMPY_CORE_INCLUDE_NUMPY_MULTIARRAY_FNV_H_
diff --git a/numpy/_core/src/multiarray/multiarraymodule.c b/numpy/_core/src/multiarray/multiarraymodule.c
index 7724756ba351..955dca01e75d 100644
--- a/numpy/_core/src/multiarray/multiarraymodule.c
+++ b/numpy/_core/src/multiarray/multiarraymodule.c
@@ -4571,7 +4571,7 @@ static struct PyMethodDef array_module_methods[] = {
     {"from_dlpack", (PyCFunction)from_dlpack,
         METH_FASTCALL | METH_KEYWORDS, NULL},
     {"_unique_hash",  (PyCFunction)array__unique_hash,
-        METH_O, "Collect unique values via a hash map."},
+        METH_FASTCALL | METH_KEYWORDS, "Collect unique values via a hash map."},
     {NULL, NULL, 0, NULL}                /* sentinel */
 };
 
diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
index f36acfdef49a..636f1ef0137c 100644
--- a/numpy/_core/src/multiarray/unique.cpp
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -1,13 +1,21 @@
 #define NPY_NO_DEPRECATED_API NPY_API_VERSION
 #define _MULTIARRAYMODULE
 
+#define HASH_TABLE_INITIAL_BUCKETS 1024
 #include <Python.h>
 
-#include <unordered_set>
+#include <algorithm>
+#include <cstring>
 #include <functional>
+#include <unordered_set>
 
 #include <numpy/npy_common.h>
 #include "numpy/arrayobject.h"
+#include "gil_utils.h"
+extern "C" {
+    #include "fnv.h"
+    #include "npy_argparse.h"
+}
 
 // This is to use RAII pattern to handle cpp exceptions while avoiding memory leaks.
 // Adapted from https://stackoverflow.com/a/25510879/2536294
@@ -18,77 +26,128 @@ struct FinalAction {
   private:
     F clean_;
 };
-
 template <typename F>
 FinalAction<F> finally(F f) {
     return FinalAction<F>(f);
 }
 
-template<typename T>
+template <typename T>
 static PyObject*
-unique(PyArrayObject *self)
+unique_integer(PyArrayObject *self, npy_bool equal_nan)
 {
-    /* This function takes a numpy array and returns a numpy array containing
-    the unique values.
-
-    It assumes the numpy array includes data that can be viewed as unsigned integers
-    of a certain size (sizeof(T)).
-
-    It doesn't need to know the actual type, since it needs to find unique values
-    among binary representations of the input data. This means it won't apply to
-    custom or complicated dtypes or string values.
+    /*
+    * Returns a new NumPy array containing the unique values of the input array of integer.
+    * This function uses hashing to identify uniqueness efficiently.
     */
     NPY_ALLOW_C_API_DEF;
-    std::unordered_set<T> hashset;
-
-    NpyIter *iter = NpyIter_New(self, NPY_ITER_READONLY |
-                                      NPY_ITER_EXTERNAL_LOOP |
-                                      NPY_ITER_REFS_OK |
-                                      NPY_ITER_ZEROSIZE_OK |
-                                      NPY_ITER_GROWINNER,
-                                NPY_KEEPORDER, NPY_NO_CASTING,
-                                NULL);
-    // Making sure the iterator is deallocated when the function returns, with
-    // or w/o an exception
-    auto iter_dealloc = finally([&]() { NpyIter_Deallocate(iter); });
-    if (iter == NULL) {
-        return NULL;
+    NPY_ALLOW_C_API;
+    PyArray_Descr *descr = PyArray_DESCR(self);
+    Py_INCREF(descr);
+    NPY_DISABLE_C_API;
+
+    PyThreadState *_save1 = PyEval_SaveThread();
+
+    // number of elements in the input array
+    npy_intp isize = PyArray_SIZE(self);
+
+    // Reserve hashset capacity in advance to minimize reallocations and collisions.
+    // We use min(isize, HASH_TABLE_INITIAL_BUCKETS) as the initial bucket count:
+    // - Reserving for all elements (isize) may over-allocate when there are few unique values.
+    // - Using a moderate upper bound HASH_TABLE_INITIAL_BUCKETS(1024) keeps memory usage reasonable (4 KiB for pointers).
+    // See discussion: https://github.com/numpy/numpy/pull/28767#discussion_r2064267631
+    std::unordered_set<T> hashset(std::min(isize, (npy_intp)HASH_TABLE_INITIAL_BUCKETS));
+
+    // Input array is one-dimensional, enabling efficient iteration using strides.
+    char *idata = PyArray_BYTES(self);
+    npy_intp istride = PyArray_STRIDES(self)[0];
+    for (npy_intp i = 0; i < isize; i++, idata += istride) {
+        hashset.insert(*(T *)idata);
     }
 
-    NpyIter_IterNextFunc *iternext = NpyIter_GetIterNext(iter, NULL);
-    if (iternext == NULL) {
+    npy_intp length = hashset.size();
+
+    PyEval_RestoreThread(_save1);
+    NPY_ALLOW_C_API;
+    PyObject *res_obj = PyArray_NewFromDescr(
+        &PyArray_Type,
+        descr,
+        1, // ndim
+        &length, // shape
+        NULL, // strides
+        NULL, // data
+        // This flag is needed to be able to call .sort on it.
+        NPY_ARRAY_WRITEABLE, // flags
+        NULL // obj
+    );
+
+    if (res_obj == NULL) {
         return NULL;
     }
-    char **dataptr = NpyIter_GetDataPtrArray(iter);
-    npy_intp *strideptr = NpyIter_GetInnerStrideArray(iter);
-    npy_intp *innersizeptr = NpyIter_GetInnerLoopSizePtr(iter);
-
-    // release the GIL
-    PyThreadState *_save;
-    _save = PyEval_SaveThread();
-    // Making sure the GIL is re-acquired when the function returns, with
-    // or w/o an exception
-    auto grab_gil = finally([&]() { PyEval_RestoreThread(_save); });
-    // first we put the data in a hash map
-
-    if (NpyIter_GetIterSize(iter) > 0) {
-        do {
-            char* data = *dataptr;
-            npy_intp stride = *strideptr;
-            npy_intp count = *innersizeptr;
-
-            while (count--) {
-                hashset.insert(*((T *) data));
-                data += stride;
-            }
-        } while (iternext(iter));
+    NPY_DISABLE_C_API;
+    PyThreadState *_save2 = PyEval_SaveThread();
+    auto save2_dealloc = finally([&]() {
+        PyEval_RestoreThread(_save2);
+    });
+
+    char *odata = PyArray_BYTES((PyArrayObject *)res_obj);
+    npy_intp ostride = PyArray_STRIDES((PyArrayObject *)res_obj)[0];
+    // Output array is one-dimensional, enabling efficient iteration using strides.
+    for (auto it = hashset.begin(); it != hashset.end(); it++, odata += ostride) {
+        *(T *)odata = *it;
     }
 
-    npy_intp length = hashset.size();
+    return res_obj;
+}
 
+template <typename T>
+static PyObject*
+unique_string(PyArrayObject *self, npy_bool equal_nan)
+{
+    /*
+    * Returns a new NumPy array containing the unique values of the input array of fixed size strings.
+    * This function uses hashing to identify uniqueness efficiently.
+    */
+    NPY_ALLOW_C_API_DEF;
     NPY_ALLOW_C_API;
     PyArray_Descr *descr = PyArray_DESCR(self);
     Py_INCREF(descr);
+    NPY_DISABLE_C_API;
+
+    PyThreadState *_save1 = PyEval_SaveThread();
+
+    // number of elements in the input array
+    npy_intp isize = PyArray_SIZE(self);
+
+    // variables for the string
+    npy_intp itemsize = descr->elsize;
+    npy_intp num_chars = itemsize / sizeof(T);
+    auto hash = [num_chars](const T *value) -> size_t {
+        return npy_fnv1a(value, num_chars * sizeof(T));
+    };
+    auto equal = [itemsize](const T *lhs, const T *rhs) -> bool {
+        return std::memcmp(lhs, rhs, itemsize) == 0;
+    };
+
+    // Reserve hashset capacity in advance to minimize reallocations and collisions.
+    // We use min(isize, HASH_TABLE_INITIAL_BUCKETS) as the initial bucket count:
+    // - Reserving for all elements (isize) may over-allocate when there are few unique values.
+    // - Using a moderate upper bound HASH_TABLE_INITIAL_BUCKETS(1024) keeps memory usage reasonable (4 KiB for pointers).
+    // See discussion: https://github.com/numpy/numpy/pull/28767#discussion_r2064267631
+    std::unordered_set<T *, decltype(hash), decltype(equal)> hashset(
+        std::min(isize, (npy_intp)HASH_TABLE_INITIAL_BUCKETS), hash, equal
+    );
+
+    // Input array is one-dimensional, enabling efficient iteration using strides.
+    char *idata = PyArray_BYTES(self);
+    npy_intp istride = PyArray_STRIDES(self)[0];
+    for (npy_intp i = 0; i < isize; i++, idata += istride) {
+        hashset.insert((T *)idata);
+    }
+
+    npy_intp length = hashset.size();
+
+    PyEval_RestoreThread(_save1);
+    NPY_ALLOW_C_API;
     PyObject *res_obj = PyArray_NewFromDescr(
         &PyArray_Type,
         descr,
@@ -100,18 +159,147 @@ unique(PyArrayObject *self)
         NPY_ARRAY_WRITEABLE, // flags
         NULL // obj
     );
+
+    if (res_obj == NULL) {
+        return NULL;
+    }
     NPY_DISABLE_C_API;
+    PyThreadState *_save2 = PyEval_SaveThread();
+    auto save2_dealloc = finally([&]() {
+        PyEval_RestoreThread(_save2);
+    });
+
+    char *odata = PyArray_BYTES((PyArrayObject *)res_obj);
+    npy_intp ostride = PyArray_STRIDES((PyArrayObject *)res_obj)[0];
+    // Output array is one-dimensional, enabling efficient iteration using strides.
+    for (auto it = hashset.begin(); it != hashset.end(); it++, odata += ostride) {
+        std::memcpy(odata, *it, itemsize);
+    }
+
+    return res_obj;
+}
+
+static PyObject*
+unique_vstring(PyArrayObject *self, npy_bool equal_nan)
+{
+    /*
+    * Returns a new NumPy array containing the unique values of the input array.
+    * This function uses hashing to identify uniqueness efficiently.
+    */
+    NPY_ALLOW_C_API_DEF;
+    NPY_ALLOW_C_API;
+    PyArray_Descr *descr = PyArray_DESCR(self);
+    Py_INCREF(descr);
+    NPY_DISABLE_C_API;
+
+    PyThreadState *_save1 = PyEval_SaveThread();
+
+    // number of elements in the input array
+    npy_intp isize = PyArray_SIZE(self);
+
+    // variables for the vstring
+    npy_string_allocator *in_allocator = NpyString_acquire_allocator((PyArray_StringDTypeObject *)descr);
+    auto hash = [equal_nan](const npy_static_string *value) -> size_t {
+        if (value->buf == NULL) {
+            if (equal_nan) {
+                return 0;
+            } else {
+                return std::hash<const npy_static_string *>{}(value);
+            }
+        }
+        return npy_fnv1a(value->buf, value->size * sizeof(char));
+    };
+    auto equal = [equal_nan](const npy_static_string *lhs, const npy_static_string *rhs) -> bool {
+        if (lhs->buf == NULL && rhs->buf == NULL) {
+            if (equal_nan) {
+                return true;
+            } else {
+                return lhs == rhs;
+            }
+        }
+        if (lhs->buf == NULL || rhs->buf == NULL) {
+            return false;
+        }
+        if (lhs->size != rhs->size) {
+            return false;
+        }
+        return std::memcmp(lhs->buf, rhs->buf, lhs->size) == 0;
+    };
 
+    // Reserve hashset capacity in advance to minimize reallocations and collisions.
+    // We use min(isize, HASH_TABLE_INITIAL_BUCKETS) as the initial bucket count:
+    // - Reserving for all elements (isize) may over-allocate when there are few unique values.
+    // - Using a moderate upper bound HASH_TABLE_INITIAL_BUCKETS(1024) keeps memory usage reasonable (4 KiB for pointers).
+    // See discussion: https://github.com/numpy/numpy/pull/28767#discussion_r2064267631
+    std::unordered_set<npy_static_string *, decltype(hash), decltype(equal)> hashset(
+        std::min(isize, (npy_intp)HASH_TABLE_INITIAL_BUCKETS), hash, equal
+    );
+
+    // Input array is one-dimensional, enabling efficient iteration using strides.
+    char *idata = PyArray_BYTES(self);
+    npy_intp istride = PyArray_STRIDES(self)[0];
+    // unpacked_strings need to be allocated outside of the loop because of the lifetime problem.
+    std::vector<npy_static_string> unpacked_strings(isize, {0, NULL});
+    for (npy_intp i = 0; i < isize; i++, idata += istride) {
+        npy_packed_static_string *packed_string = (npy_packed_static_string *)idata;
+        int is_null = NpyString_load(in_allocator, packed_string, &unpacked_strings[i]);
+        if (is_null == -1) {
+            npy_gil_error(PyExc_RuntimeError,
+                "Failed to load string from packed static string. ");
+            return NULL;
+        }
+        hashset.insert(&unpacked_strings[i]);
+    }
+
+    NpyString_release_allocator(in_allocator);
+
+    npy_intp length = hashset.size();
+
+    PyEval_RestoreThread(_save1);
+    NPY_ALLOW_C_API;
+    PyObject *res_obj = PyArray_NewFromDescr(
+        &PyArray_Type,
+        descr,
+        1, // ndim
+        &length, // shape
+        NULL, // strides
+        NULL, // data
+        // This flag is needed to be able to call .sort on it.
+        NPY_ARRAY_WRITEABLE, // flags
+        NULL // obj
+    );
     if (res_obj == NULL) {
         return NULL;
     }
+    PyArray_Descr *res_descr = PyArray_DESCR((PyArrayObject *)res_obj);
+    Py_INCREF(res_descr);
+    NPY_DISABLE_C_API;
+
+    PyThreadState *_save2 = PyEval_SaveThread();
+    auto save2_dealloc = finally([&]() {
+        PyEval_RestoreThread(_save2);
+    });
+
+    npy_string_allocator *out_allocator = NpyString_acquire_allocator((PyArray_StringDTypeObject *)res_descr);
+    auto out_allocator_dealloc = finally([&]() {
+        NpyString_release_allocator(out_allocator);
+    });
 
-    // then we iterate through the map's keys to get the unique values
-    T* data = (T *)PyArray_DATA((PyArrayObject *)res_obj);
-    auto it = hashset.begin();
-    size_t i = 0;
-    for (; it != hashset.end(); it++, i++) {
-        data[i] = *it;
+    char *odata = PyArray_BYTES((PyArrayObject *)res_obj);
+    npy_intp ostride = PyArray_STRIDES((PyArrayObject *)res_obj)[0];
+    // Output array is one-dimensional, enabling efficient iteration using strides.
+    for (auto it = hashset.begin(); it != hashset.end(); it++, odata += ostride) {
+        npy_packed_static_string *packed_string = (npy_packed_static_string *)odata;
+        int pack_status = 0;
+        if ((*it)->buf == NULL) {
+            pack_status = NpyString_pack_null(out_allocator, packed_string);
+        } else {
+            pack_status = NpyString_pack(out_allocator, packed_string, (*it)->buf, (*it)->size);
+        }
+        if (pack_status == -1) {
+            // string packing failed
+            return NULL;
+        }
     }
 
     return res_obj;
@@ -119,27 +307,30 @@ unique(PyArrayObject *self)
 
 
 // this map contains the functions used for each item size.
-typedef std::function<PyObject *(PyArrayObject *)> function_type;
+typedef std::function<PyObject *(PyArrayObject *, npy_bool)> function_type;
 std::unordered_map<int, function_type> unique_funcs = {
-    {NPY_BYTE, unique<npy_byte>},
-    {NPY_UBYTE, unique<npy_ubyte>},
-    {NPY_SHORT, unique<npy_short>},
-    {NPY_USHORT, unique<npy_ushort>},
-    {NPY_INT, unique<npy_int>},
-    {NPY_UINT, unique<npy_uint>},
-    {NPY_LONG, unique<npy_long>},
-    {NPY_ULONG, unique<npy_ulong>},
-    {NPY_LONGLONG, unique<npy_longlong>},
-    {NPY_ULONGLONG, unique<npy_ulonglong>},
-    {NPY_INT8, unique<npy_int8>},
-    {NPY_INT16, unique<npy_int16>},
-    {NPY_INT32, unique<npy_int32>},
-    {NPY_INT64, unique<npy_int64>},
-    {NPY_UINT8, unique<npy_uint8>},
-    {NPY_UINT16, unique<npy_uint16>},
-    {NPY_UINT32, unique<npy_uint32>},
-    {NPY_UINT64, unique<npy_uint64>},
-    {NPY_DATETIME, unique<npy_uint64>},
+    {NPY_BYTE, unique_integer<npy_byte>},
+    {NPY_UBYTE, unique_integer<npy_ubyte>},
+    {NPY_SHORT, unique_integer<npy_short>},
+    {NPY_USHORT, unique_integer<npy_ushort>},
+    {NPY_INT, unique_integer<npy_int>},
+    {NPY_UINT, unique_integer<npy_uint>},
+    {NPY_LONG, unique_integer<npy_long>},
+    {NPY_ULONG, unique_integer<npy_ulong>},
+    {NPY_LONGLONG, unique_integer<npy_longlong>},
+    {NPY_ULONGLONG, unique_integer<npy_ulonglong>},
+    {NPY_INT8, unique_integer<npy_int8>},
+    {NPY_INT16, unique_integer<npy_int16>},
+    {NPY_INT32, unique_integer<npy_int32>},
+    {NPY_INT64, unique_integer<npy_int64>},
+    {NPY_UINT8, unique_integer<npy_uint8>},
+    {NPY_UINT16, unique_integer<npy_uint16>},
+    {NPY_UINT32, unique_integer<npy_uint32>},
+    {NPY_UINT64, unique_integer<npy_uint64>},
+    {NPY_DATETIME, unique_integer<npy_uint64>},
+    {NPY_STRING, unique_string<npy_byte>},
+    {NPY_UNICODE, unique_string<npy_ucs4>},
+    {NPY_VSTRING, unique_vstring},
 };
 
 
@@ -154,14 +345,21 @@ std::unordered_map<int, function_type> unique_funcs = {
  * type is unsupported or `NULL` with an error set.
  */
 extern "C" NPY_NO_EXPORT PyObject *
-array__unique_hash(PyObject *NPY_UNUSED(module), PyObject *arr_obj)
+array__unique_hash(PyObject *NPY_UNUSED(module),
+                   PyObject *const *args, Py_ssize_t len_args, PyObject *kwnames)
 {
-    if (!PyArray_Check(arr_obj)) {
-        PyErr_SetString(PyExc_TypeError,
-                "_unique_hash() requires a NumPy array input.");
+    PyArrayObject *arr = NULL;
+    npy_bool equal_nan = NPY_TRUE;  // default to True
+
+    NPY_PREPARE_ARGPARSER;
+    if (npy_parse_arguments("_unique_hash", args, len_args, kwnames,
+                            "arr", &PyArray_Converter, &arr,
+                            "|equal_nan",  &PyArray_BoolConverter, &equal_nan,
+                            NULL, NULL, NULL
+                            ) < 0
+    ) {
         return NULL;
     }
-    PyArrayObject *arr = (PyArrayObject *)arr_obj;
 
     try {
         auto type = PyArray_TYPE(arr);
@@ -170,7 +368,7 @@ array__unique_hash(PyObject *NPY_UNUSED(module), PyObject *arr_obj)
             Py_RETURN_NOTIMPLEMENTED;
         }
 
-        return unique_funcs[type](arr);
+        return unique_funcs[type](arr, equal_nan);
     }
     catch (const std::bad_alloc &e) {
         PyErr_NoMemory();
diff --git a/numpy/_core/src/multiarray/unique.h b/numpy/_core/src/multiarray/unique.h
index 3e258405e8f4..7b3fb143ada4 100644
--- a/numpy/_core/src/multiarray/unique.h
+++ b/numpy/_core/src/multiarray/unique.h
@@ -5,7 +5,8 @@
 extern "C" {
 #endif
 
-PyObject* array__unique_hash(PyObject *NPY_UNUSED(dummy), PyObject *args);
+PyObject* array__unique_hash(PyObject *NPY_UNUSED(dummy),
+                             PyObject *const *args, Py_ssize_t len_args, PyObject *kwnames);
 
 #ifdef __cplusplus
 }
diff --git a/numpy/lib/_arraysetops_impl.py b/numpy/lib/_arraysetops_impl.py
index ef0739ba486f..c4788385b924 100644
--- a/numpy/lib/_arraysetops_impl.py
+++ b/numpy/lib/_arraysetops_impl.py
@@ -368,7 +368,8 @@ def _unique1d(ar, return_index=False, return_inverse=False,
         conv = _array_converter(ar)
         ar_, = conv
 
-        if (hash_unique := _unique_hash(ar_)) is not NotImplemented:
+        if (hash_unique := _unique_hash(ar_, equal_nan=equal_nan)) \
+            is not NotImplemented:
             if sorted:
                 hash_unique.sort()
             # We wrap the result back in case it was a subclass of numpy.ndarray.
diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py
index 7865e1b16ee9..b3e2bfa279b0 100644
--- a/numpy/lib/tests/test_arraysetops.py
+++ b/numpy/lib/tests/test_arraysetops.py
@@ -5,6 +5,7 @@
 
 import numpy as np
 from numpy import ediff1d, intersect1d, isin, setdiff1d, setxor1d, union1d, unique
+from numpy.dtypes import StringDType
 from numpy.exceptions import AxisError
 from numpy.testing import (
     assert_array_equal,
@@ -813,7 +814,9 @@ def test_unique_1d(self):
 
     def test_unique_zero_sized(self):
         # test for zero-sized arrays
-        for dt in self.get_types():
+        types = self.get_types()
+        types.extend('SU')
+        for dt in types:
             a = np.array([], dt)
             b = np.array([], dt)
             i1 = np.array([], np.int64)
@@ -838,6 +841,187 @@ class Subclass(np.ndarray):
             bb = Subclass(b.shape, dtype=dt, buffer=b)
             self.check_all(aa, bb, i1, i2, c, dt)
 
+    def test_unique_byte_string_hash_based(self):
+        # test for byte string arrays
+        arr = ['apple', 'banana', 'apple', 'cherry', 'date', 'banana', 'fig', 'grape']
+        unq_sorted = ['apple', 'banana', 'cherry', 'date', 'fig', 'grape']
+
+        a1 = unique(arr, sorted=False)
+        # the result varies depending on the impl of std::unordered_set,
+        # so we check them by sorting
+        assert_array_equal(sorted(a1.tolist()), unq_sorted)
+
+    def test_unique_unicode_string_hash_based(self):
+        # test for unicode string arrays
+        arr = [
+            'café', 'cafe', 'café', 'naïve', 'naive',
+            'résumé', 'naïve', 'resume', 'résumé',
+        ]
+        unq_sorted = ['cafe', 'café', 'naive', 'naïve', 'resume', 'résumé']
+
+        a1 = unique(arr, sorted=False)
+        # the result varies depending on the impl of std::unordered_set,
+        # so we check them by sorting
+        assert_array_equal(sorted(a1.tolist()), unq_sorted)
+
+    def test_unique_vstring_hash_based_equal_nan(self):
+        # test for unicode and nullable string arrays (equal_nan=True)
+        a = np.array([
+                # short strings
+                'straße',
+                None,
+                'strasse',
+                'straße',
+                None,
+                'niño',
+                'nino',
+                'élève',
+                'eleve',
+                'niño',
+                'élève',
+                # medium strings
+                'b' * 20,
+                'ß' * 30,
+                None,
+                'é' * 30,
+                'e' * 20,
+                'ß' * 30,
+                'n' * 30,
+                'ñ' * 20,
+                None,
+                'e' * 20,
+                'ñ' * 20,
+                # long strings
+                'b' * 300,
+                'ß' * 400,
+                None,
+                'é' * 400,
+                'e' * 300,
+                'ß' * 400,
+                'n' * 400,
+                'ñ' * 300,
+                None,
+                'e' * 300,
+                'ñ' * 300,
+            ],
+            dtype=StringDType(na_object=None)
+        )
+        unq_sorted_wo_none = [
+            'b' * 20,
+            'b' * 300,
+            'e' * 20,
+            'e' * 300,
+            'eleve',
+            'nino',
+            'niño',
+            'n' * 30,
+            'n' * 400,
+            'strasse',
+            'straße',
+            'ß' * 30,
+            'ß' * 400,
+            'élève',
+            'é' * 30,
+            'é' * 400,
+            'ñ' * 20,
+            'ñ' * 300,
+        ]
+
+        a1 = unique(a, sorted=False, equal_nan=True)
+        # the result varies depending on the impl of std::unordered_set,
+        # so we check them by sorting
+
+        # a1 should have exactly one None
+        count_none = sum(x is None for x in a1)
+        assert_equal(count_none, 1)
+
+        a1_wo_none = sorted(x for x in a1 if x is not None)
+        assert_array_equal(a1_wo_none, unq_sorted_wo_none)
+
+    def test_unique_vstring_hash_based_not_equal_nan(self):
+        # test for unicode and nullable string arrays (equal_nan=False)
+        a = np.array([
+                # short strings
+                'straße',
+                None,
+                'strasse',
+                'straße',
+                None,
+                'niño',
+                'nino',
+                'élève',
+                'eleve',
+                'niño',
+                'élève',
+                # medium strings
+                'b' * 20,
+                'ß' * 30,
+                None,
+                'é' * 30,
+                'e' * 20,
+                'ß' * 30,
+                'n' * 30,
+                'ñ' * 20,
+                None,
+                'e' * 20,
+                'ñ' * 20,
+                # long strings
+                'b' * 300,
+                'ß' * 400,
+                None,
+                'é' * 400,
+                'e' * 300,
+                'ß' * 400,
+                'n' * 400,
+                'ñ' * 300,
+                None,
+                'e' * 300,
+                'ñ' * 300,
+            ],
+            dtype=StringDType(na_object=None)
+        )
+        unq_sorted_wo_none = [
+            'b' * 20,
+            'b' * 300,
+            'e' * 20,
+            'e' * 300,
+            'eleve',
+            'nino',
+            'niño',
+            'n' * 30,
+            'n' * 400,
+            'strasse',
+            'straße',
+            'ß' * 30,
+            'ß' * 400,
+            'élève',
+            'é' * 30,
+            'é' * 400,
+            'ñ' * 20,
+            'ñ' * 300,
+        ]
+
+        a1 = unique(a, sorted=False, equal_nan=False)
+        # the result varies depending on the impl of std::unordered_set,
+        # so we check them by sorting
+
+        # a1 should have exactly one None
+        count_none = sum(x is None for x in a1)
+        assert_equal(count_none, 6)
+
+        a1_wo_none = sorted(x for x in a1 if x is not None)
+        assert_array_equal(a1_wo_none, unq_sorted_wo_none)
+
+    def test_unique_vstring_errors(self):
+        a = np.array(
+            [
+                'apple', 'banana', 'apple', None, 'cherry',
+                'date', 'banana', 'fig', None, 'grape',
+            ] * 2,
+            dtype=StringDType(na_object=None)
+        )
+        assert_raises(ValueError, unique, a, equal_nan=False)
+
     @pytest.mark.parametrize("arg", ["return_index", "return_inverse", "return_counts"])
     def test_unsupported_hash_based(self, arg):
         """These currently never use the hash-based solution.  However,